Rizin
unix-like reverse engineering framework and cli tools
|
#include <rz_diff.h>
#include <rz_util.h>
#include <ht_pp.h>
#include <ht_uu.h>
#include "bytes_diff.c"
#include "lines_diff.c"
#include "unified_diff.c"
Go to the source code of this file.
Classes | |
struct | block_t |
struct | methods_internal_t |
struct | rz_diff_t |
Macros | |
#define | NUM2PTR(x) ((void *)(intptr_t)(x)) |
#define | PTR2NUM(x) ((intptr_t)(void *)(x)) |
Typedefs | |
typedef struct block_t | Block |
typedef void(* | RzDiffMethodFree) (const void *array) |
typedef struct methods_internal_t | MethodsInternal |
Functions | |
RZ_LIB_VERSION (rz_diff) | |
RZ_API ut32 | rz_diff_hash_data (RZ_NULLABLE const ut8 *buffer, ut32 size) |
Calculates the hash of any given data. More... | |
static ut32 | default_ksize (const void *a) |
static bool | fake_ignore (const void *value) |
static bool | set_a (RzDiff *diff, const void *a, ut32 a_size) |
static void | free_hits (HtPPKv *kv) |
static bool | set_b (RzDiff *diff, const void *b, ut32 b_size) |
RZ_API RZ_OWN RzDiff * | rz_diff_bytes_new (RZ_BORROW const ut8 *a, ut32 a_size, RZ_BORROW const ut8 *b, ut32 b_size, RZ_NULLABLE RzDiffIgnoreByte ignore) |
Returns the structure needed to diff buffers of ut8. More... | |
RZ_API RZ_OWN RzDiff * | rz_diff_lines_new (RZ_BORROW const char *a, RZ_BORROW const char *b, RZ_NULLABLE RzDiffIgnoreLine ignore) |
Returns the structure needed to diff lines. More... | |
RZ_API RZ_OWN RzDiff * | rz_diff_generic_new (RZ_BORROW const void *a, ut32 a_size, RZ_BORROW const void *b, ut32 b_size, RZ_NONNULL RzDiffMethods *methods) |
Returns the structure needed to diff arrays of user defined types. More... | |
RZ_API void | rz_diff_free (RZ_NULLABLE RzDiff *diff) |
frees the diff structure More... | |
RZ_API RZ_BORROW const void * | rz_diff_get_a (RZ_NONNULL RzDiff *diff) |
returns the pointer of the A array that passed to rz_diff_XXX_new() More... | |
RZ_API RZ_BORROW const void * | rz_diff_get_b (RZ_NONNULL RzDiff *diff) |
returns the pointer of the B array that passed to rz_diff_XXX_new() More... | |
static bool | stack_append_block (RzList *stack, ut32 a_low, ut32 a_hi, ut32 b_low, ut32 b_hi) |
static RzDiffMatch * | match_new (ut32 a, ut32 b, ut32 size) |
static RzDiffMatch * | find_longest_match (RzDiff *diff, Block *block) |
static int | cmp_matches (RzDiffMatch *m0, RzDiffMatch *m1) |
RZ_API RZ_OWN RzList * | rz_diff_matches_new (RZ_NONNULL RzDiff *diff) |
generates a list of matching blocks More... | |
static RzDiffOp * | opcode_new (RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end) |
static void | opcode_set (RzDiffOp *op, RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end) |
RZ_API RZ_OWN RzList * | rz_diff_opcodes_new (RZ_NONNULL RzDiff *diff) |
Generates a list of steps needed to go from A to B. More... | |
static void | group_op_free (RzList *ops) |
RZ_API RZ_OWN RzList * | rz_diff_opcodes_grouped_new (RZ_NONNULL RzDiff *diff, ut32 n_groups) |
Generates groups of opcodes needed to go from A to B. More... | |
RZ_API bool | rz_diff_ratio (RZ_NONNULL RzDiff *diff, RZ_NONNULL double *result) |
Calculates the similarity ratio between A and B. More... | |
RZ_API bool | rz_diff_sizes_ratio (RZ_NONNULL RzDiff *diff, RZ_NONNULL double *result) |
Calculates the size ratio between A and B. More... | |
Ratcliff/Obershelp Pattern Recognition Ratcliff/Obershelp Pattern Recognition algorithm applied to generic data.
The code for diffing is quite simple, given 2 arrays containing data, you calculate the longest sequences of data that matches between the two inputs; to do that you need to create a map in which you will store all the hits found within an array:
Once this map is created, you will need to find the longest subsequence that can be found in both arrays by using the hit-map. then you remove that subsequence from the area of search, and search again for the 2nd longest subsequence (excluding the area of the first subsequence). Then you keep doing this, till all areas and longest matches have been found.
Now that you know all the matching areas, you can generate a series of steps/operations which can transform the first array into the second one, by removing the non matching areas in the 1st array and inserting the missing areas from the 2nd array.
Example: array_a = [A,B,C,D,E,F,G,H,I] array_b = [Y,Z,B,C,D,L,Z,N,H,I]
1: create map of hits and their positions:
2: find all matching areas using the hit-map:
3: create the steps to convert array_a in array_b
Definition in file diff.c.
typedef struct methods_internal_t MethodsInternal |
|
static |
Definition at line 474 of file diff.c.
References match_p_t::a, match_p_t::b, and match_p_t::size.
Referenced by rz_diff_matches_new().
Definition at line 118 of file diff.c.
Referenced by rz_diff_generic_new().
|
static |
Definition at line 356 of file diff.c.
References rz_diff_t::a, a, block_t::a_hi, block_t::a_low, rz_diff_t::b, b, block_t::b_hi, rz_diff_t::b_hits, block_t::b_low, methods_internal_t::compare, compare(), methods_internal_t::elem_at, methods_internal_t::ignore, len, list(), match, match_new(), rz_diff_t::methods, NULL, PTR2NUM, RZ_LOG_ERROR, rz_return_val_if_fail, autogen_x86imm::tmp, and ut64().
Referenced by rz_diff_matches_new().
|
static |
Definition at line 134 of file diff.c.
References rz_list_free().
Referenced by set_b().
|
static |
Definition at line 701 of file diff.c.
References ops, and rz_list_free().
Referenced by rz_diff_opcodes_grouped_new().
|
static |
Definition at line 344 of file diff.c.
References a, b, match, NULL, and RZ_NEW0.
Referenced by find_longest_match(), and rz_diff_matches_new().
|
static |
Definition at line 605 of file diff.c.
References NULL, op, RZ_NEW0, and type.
Referenced by rz_diff_opcodes_grouped_new(), and rz_diff_opcodes_new().
RZ_API RZ_OWN RzDiff* rz_diff_bytes_new | ( | RZ_BORROW const ut8 * | a, |
ut32 | a_size, | ||
RZ_BORROW const ut8 * | b, | ||
ut32 | b_size, | ||
RZ_NULLABLE RzDiffIgnoreByte | ignore | ||
) |
Returns the structure needed to diff buffers of ut8.
Allocates the internal structure needed to diff buffers by using the methods defined in methods_bytes. Allows to define an callback function to ignore bytes.
Definition at line 188 of file diff.c.
References a, b, methods_internal_t::ignore, rz_diff_t::methods, methods_bytes, NULL, rz_diff_free(), RZ_NEW0, rz_return_val_if_fail, set_a(), and set_b().
Referenced by rz_diff_unified_files().
RZ_API void rz_diff_free | ( | RZ_NULLABLE RzDiff * | diff | ) |
frees the diff structure
frees any internal structure and the diff structure.
Definition at line 295 of file diff.c.
References free().
Referenced by core_analysis_graph_construct_nodes(), graph_construct_nodes(), print_diff(), rz_cmd_debug(), rz_diff_bytes_new(), rz_diff_generic_new(), rz_diff_lines_new(), and rz_diff_unified_files().
RZ_API RZ_OWN RzDiff* rz_diff_generic_new | ( | RZ_BORROW const void * | a, |
ut32 | a_size, | ||
RZ_BORROW const void * | b, | ||
ut32 | b_size, | ||
RZ_NONNULL RzDiffMethods * | methods | ||
) |
Returns the structure needed to diff arrays of user defined types.
Allocates the internal structure needed to diff any user defined array of any types by using the methods provided by the user calling this C api.
Definition at line 259 of file diff.c.
References a, b, methods_internal_t::compare, methods_internal_t::elem_at, methods_internal_t::elem_hash, fake_ignore(), methods_internal_t::free, methods_internal_t::ignore, rz_diff_t::methods, NULL, rz_diff_free(), RZ_NEW0, rz_return_val_if_fail, set_a(), set_b(), and methods_internal_t::stringify.
Referenced by rz_diff_classes_new(), rz_diff_entries_new(), rz_diff_fields_new(), rz_diff_imports_new(), rz_diff_libraries_new(), rz_diff_sections_new(), rz_diff_strings_new(), and rz_diff_symbols_new().
RZ_API RZ_BORROW const void* rz_diff_get_a | ( | RZ_NONNULL RzDiff * | diff | ) |
returns the pointer of the A array that passed to rz_diff_XXX_new()
returns the pointer of the A array that passed to rz_diff_XXX_new()
Definition at line 312 of file diff.c.
References NULL, and rz_return_val_if_fail.
RZ_API RZ_BORROW const void* rz_diff_get_b | ( | RZ_NONNULL RzDiff * | diff | ) |
returns the pointer of the B array that passed to rz_diff_XXX_new()
returns the pointer of the B array that passed to rz_diff_XXX_new()
Definition at line 322 of file diff.c.
References NULL, and rz_return_val_if_fail.
Calculates the hash of any given data.
Calculates the hash of any given data with a user defined size.
Definition at line 103 of file diff.c.
Referenced by class_hash(), class_hash_addr(), entry_hash(), field_hash(), field_hash_addr(), import_hash(), libs_hash(), line_hash(), section_hash(), section_hash_addr(), string_hash(), string_hash_addr(), symbol_hash(), and symbol_hash_addr().
RZ_API RZ_OWN RzDiff* rz_diff_lines_new | ( | RZ_BORROW const char * | a, |
RZ_BORROW const char * | b, | ||
RZ_NULLABLE RzDiffIgnoreLine | ignore | ||
) |
Returns the structure needed to diff lines.
Allocates the internal structure needed to diff strings with new lines using the methods defined in methods_lines. Allows to define an callback function to ignore lines.
Definition at line 219 of file diff.c.
References a, b, free(), methods_internal_t::ignore, rz_diff_t::methods, methods_lines, NULL, rz_diff_free(), rz_list_free(), rz_list_length(), RZ_NEW0, rz_return_val_if_fail, set_a(), set_b(), and tokenize_lines().
Referenced by core_analysis_graph_construct_nodes(), graph_construct_nodes(), print_diff(), rz_cmd_debug(), rz_diff_command_new(), and rz_diff_unified_files().
RZ_API RZ_OWN RzList* rz_diff_matches_new | ( | RZ_NONNULL RzDiff * | diff | ) |
generates a list of matching blocks
Generates a list of matching blocks that are found in both inputs. If non are found it returns a match result with size of 0
Definition at line 497 of file diff.c.
References block_t::a_hi, block_t::a_low, block_t::b_hi, block_t::b_low, cmp_matches(), find_longest_match(), free(), regress::m, match_new(), NULL, rz_list_append(), rz_list_free(), rz_list_length(), rz_list_newf(), rz_list_pop(), rz_list_sort(), RZ_LOG_ERROR, rz_return_val_if_fail, and stack_append_block().
Referenced by rz_diff_opcodes_new(), and rz_diff_ratio().
Generates groups of opcodes needed to go from A to B.
Generates groups of opcodes needed to go from A to B, but each group will end with N common EQUAL ops (if possible). default is 3 equals ops before splitting the group.
Definition at line 712 of file diff.c.
References free(), group_op_free(), make_dist_html::groups, NULL, opcode_new(), opcode_set(), opcodes, RZ_DIFF_OP_EQUAL, rz_diff_opcodes_new(), rz_list_append(), rz_list_first(), rz_list_free(), rz_list_last(), rz_list_length(), rz_list_newf(), RZ_LOG_ERROR, RZ_MAX, RZ_MIN, rz_return_val_if_fail, and st32.
Referenced by rz_diff_unified_json(), and rz_diff_unified_text().
RZ_API RZ_OWN RzList* rz_diff_opcodes_new | ( | RZ_NONNULL RzDiff * | diff | ) |
Generates a list of steps needed to go from A to B.
Generates a list of opcodes that are needed to convert A to B.
Definition at line 631 of file diff.c.
References a, b, free(), NULL, opcode_new(), opcodes, rz_diff_matches_new(), RZ_DIFF_OP_DELETE, RZ_DIFF_OP_EQUAL, RZ_DIFF_OP_INSERT, RZ_DIFF_OP_INVALID, RZ_DIFF_OP_REPLACE, rz_list_append(), rz_list_free(), rz_list_newf(), RZ_LOG_ERROR, rz_return_val_if_fail, and type.
Referenced by rz_diff_opcodes_grouped_new().
RZ_API bool rz_diff_ratio | ( | RZ_NONNULL RzDiff * | diff, |
RZ_NONNULL double * | result | ||
) |
Calculates the similarity ratio between A and B.
Calculates the similarity ratio between A and B. Returns a number between 0 and 1; closer to 1 the result is more similar/identical the 2 arrays are.
Definition at line 831 of file diff.c.
References NULL, rz_diff_matches_new(), rz_list_free(), and rz_return_val_if_fail.
RZ_API bool rz_diff_sizes_ratio | ( | RZ_NONNULL RzDiff * | diff, |
RZ_NONNULL double * | result | ||
) |
Calculates the size ratio between A and B.
Works like the rz_diff_ratio, but this checks only how similar are the sizes between the two arrays. Returns a number between 0 and 1, like above.
Definition at line 865 of file diff.c.
References RZ_MIN, and rz_return_val_if_fail.
RZ_LIB_VERSION | ( | rz_diff | ) |
Definition at line 126 of file diff.c.
References rz_diff_t::a, a, rz_diff_t::a_size, and rz_return_val_if_fail.
Referenced by rz_diff_bytes_new(), rz_diff_generic_new(), and rz_diff_lines_new().
Definition at line 138 of file diff.c.
References rz_diff_t::b, b, rz_diff_t::b_hits, rz_diff_t::b_size, methods_internal_t::compare, default_ksize(), methods_internal_t::elem_at, methods_internal_t::elem_hash, free_hits(), i, methods_internal_t::ignore, list(), rz_diff_t::methods, NULL, NUM2PTR, rz_list_append(), rz_list_newf(), RZ_LOG_ERROR, rz_return_val_if_fail, and ut64().
Referenced by rz_diff_bytes_new(), rz_diff_generic_new(), and rz_diff_lines_new().
|
inlinestatic |
Definition at line 327 of file diff.c.
References block_t::a_hi, block_t::a_low, block_t::b_hi, block_t::b_low, free(), rz_list_append(), and RZ_NEW0.
Referenced by rz_diff_matches_new().