Rizin
unix-like reverse engineering framework and cli tools
diff.c File Reference
#include <rz_diff.h>
#include <rz_util.h>
#include <ht_pp.h>
#include <ht_uu.h>
#include "bytes_diff.c"
#include "lines_diff.c"
#include "unified_diff.c"

Go to the source code of this file.

Classes

struct  block_t
 
struct  methods_internal_t
 
struct  rz_diff_t
 

Macros

#define NUM2PTR(x)   ((void *)(intptr_t)(x))
 
#define PTR2NUM(x)   ((intptr_t)(void *)(x))
 

Typedefs

typedef struct block_t Block
 
typedef void(* RzDiffMethodFree) (const void *array)
 
typedef struct methods_internal_t MethodsInternal
 

Functions

 RZ_LIB_VERSION (rz_diff)
 
RZ_API ut32 rz_diff_hash_data (RZ_NULLABLE const ut8 *buffer, ut32 size)
 Calculates the hash of any given data. More...
 
static ut32 default_ksize (const void *a)
 
static bool fake_ignore (const void *value)
 
static bool set_a (RzDiff *diff, const void *a, ut32 a_size)
 
static void free_hits (HtPPKv *kv)
 
static bool set_b (RzDiff *diff, const void *b, ut32 b_size)
 
RZ_API RZ_OWN RzDiffrz_diff_bytes_new (RZ_BORROW const ut8 *a, ut32 a_size, RZ_BORROW const ut8 *b, ut32 b_size, RZ_NULLABLE RzDiffIgnoreByte ignore)
 Returns the structure needed to diff buffers of ut8. More...
 
RZ_API RZ_OWN RzDiffrz_diff_lines_new (RZ_BORROW const char *a, RZ_BORROW const char *b, RZ_NULLABLE RzDiffIgnoreLine ignore)
 Returns the structure needed to diff lines. More...
 
RZ_API RZ_OWN RzDiffrz_diff_generic_new (RZ_BORROW const void *a, ut32 a_size, RZ_BORROW const void *b, ut32 b_size, RZ_NONNULL RzDiffMethods *methods)
 Returns the structure needed to diff arrays of user defined types. More...
 
RZ_API void rz_diff_free (RZ_NULLABLE RzDiff *diff)
 frees the diff structure More...
 
RZ_API RZ_BORROW const void * rz_diff_get_a (RZ_NONNULL RzDiff *diff)
 returns the pointer of the A array that passed to rz_diff_XXX_new() More...
 
RZ_API RZ_BORROW const void * rz_diff_get_b (RZ_NONNULL RzDiff *diff)
 returns the pointer of the B array that passed to rz_diff_XXX_new() More...
 
static bool stack_append_block (RzList *stack, ut32 a_low, ut32 a_hi, ut32 b_low, ut32 b_hi)
 
static RzDiffMatchmatch_new (ut32 a, ut32 b, ut32 size)
 
static RzDiffMatchfind_longest_match (RzDiff *diff, Block *block)
 
static int cmp_matches (RzDiffMatch *m0, RzDiffMatch *m1)
 
RZ_API RZ_OWN RzListrz_diff_matches_new (RZ_NONNULL RzDiff *diff)
 generates a list of matching blocks More...
 
static RzDiffOpopcode_new (RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end)
 
static void opcode_set (RzDiffOp *op, RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end)
 
RZ_API RZ_OWN RzListrz_diff_opcodes_new (RZ_NONNULL RzDiff *diff)
 Generates a list of steps needed to go from A to B. More...
 
static void group_op_free (RzList *ops)
 
RZ_API RZ_OWN RzListrz_diff_opcodes_grouped_new (RZ_NONNULL RzDiff *diff, ut32 n_groups)
 Generates groups of opcodes needed to go from A to B. More...
 
RZ_API bool rz_diff_ratio (RZ_NONNULL RzDiff *diff, RZ_NONNULL double *result)
 Calculates the similarity ratio between A and B. More...
 
RZ_API bool rz_diff_sizes_ratio (RZ_NONNULL RzDiff *diff, RZ_NONNULL double *result)
 Calculates the size ratio between A and B. More...
 

Detailed Description

Ratcliff/Obershelp Pattern Recognition Ratcliff/Obershelp Pattern Recognition algorithm applied to generic data.

The code for diffing is quite simple, given 2 arrays containing data, you calculate the longest sequences of data that matches between the two inputs; to do that you need to create a map in which you will store all the hits found within an array:

  • as key, each single element of one of the arrays.
  • as value, a list of all the locations of which each element appears within the array itself.

Once this map is created, you will need to find the longest subsequence that can be found in both arrays by using the hit-map. then you remove that subsequence from the area of search, and search again for the 2nd longest subsequence (excluding the area of the first subsequence). Then you keep doing this, till all areas and longest matches have been found.

Now that you know all the matching areas, you can generate a series of steps/operations which can transform the first array into the second one, by removing the non matching areas in the 1st array and inserting the missing areas from the 2nd array.

Example: array_a = [A,B,C,D,E,F,G,H,I] array_b = [Y,Z,B,C,D,L,Z,N,H,I]

1: create map of hits and their positions:

  • hit_map(array_b) = { B: [2] C: [3] D: [4] H: [8] I: [9] L: [5] N: [7] Y: [0] Z: [1,6] }

2: find all matching areas using the hit-map:

  • match_0 = [B,C,D] from array_a[1] to array_a[3] and from array_b[3] to array_b[4]
  • match_1 = [H,I] from array_a[1] to array_a[8] and from array_b[8] to array_b[9]

3: create the steps to convert array_a in array_b

  • remove [A] at 0
  • insert [Y,Z] at 0
  • keep [B,C,D] at 1
  • remove [E,F,G] at 4
  • insert [L,Z,N] at 4
  • keep [H,I] at 8

Definition in file diff.c.

Macro Definition Documentation

◆ NUM2PTR

#define NUM2PTR (   x)    ((void *)(intptr_t)(x))

Definition at line 66 of file diff.c.

◆ PTR2NUM

#define PTR2NUM (   x)    ((intptr_t)(void *)(x))

Definition at line 67 of file diff.c.

Typedef Documentation

◆ Block

typedef struct block_t Block

◆ MethodsInternal

◆ RzDiffMethodFree

typedef void(* RzDiffMethodFree) (const void *array)

Definition at line 78 of file diff.c.

Function Documentation

◆ cmp_matches()

static int cmp_matches ( RzDiffMatch m0,
RzDiffMatch m1 
)
static

Definition at line 474 of file diff.c.

474  {
475  if (m0->a > m1->a) {
476  return 1;
477  } else if (m0->a < m1->a) {
478  return -1;
479  } else if (m0->b > m1->b) {
480  return 1;
481  } else if (m0->b < m1->b) {
482  return -1;
483  } else if (m0->size > m1->size) {
484  return 1;
485  } else if (m0->size < m1->size) {
486  return -1;
487  }
488  return 0;
489 }
ut32 b
Definition: rz_diff.h:63
ut32 size
Definition: rz_diff.h:64
ut32 a
Definition: rz_diff.h:62

References match_p_t::a, match_p_t::b, and match_p_t::size.

Referenced by rz_diff_matches_new().

◆ default_ksize()

static ut32 default_ksize ( const void *  a)
static

Definition at line 114 of file diff.c.

114  {
115  return sizeof(ut32);
116 }
uint32_t ut32

Referenced by set_b().

◆ fake_ignore()

static bool fake_ignore ( const void *  value)
static

Definition at line 118 of file diff.c.

118  {
119  return false;
120 }

Referenced by rz_diff_generic_new().

◆ find_longest_match()

static RzDiffMatch* find_longest_match ( RzDiff diff,
Block block 
)
static

Definition at line 356 of file diff.c.

356  {
357  rz_return_val_if_fail(diff && diff->methods.elem_at && diff->methods.compare && diff->methods.ignore, false);
358  RzList *list = NULL;
359  RzListIter *it = NULL;
361  HtUU *tmp = NULL;
362  HtUU *len_map = NULL;
363  void *pnum = NULL;
364  const ut8 *a = diff->a;
365  const ut8 *b = diff->b;
366  const void *elem_a = NULL;
367  const void *elem_b = NULL;
368  RzDiffMethodIgnore ignore = diff->methods.ignore;
369  RzDiffMethodElemAt elem_at = diff->methods.elem_at;
371 
372  ut32 a_low = block->a_low;
373  ut32 a_hi = block->a_hi;
374  ut32 b_low = block->b_low;
375  ut32 b_hi = block->b_hi;
376 
377  ut32 hit_a = a_low;
378  ut32 hit_b = b_low;
379  ut32 hit_size = 0;
380 
381  len_map = ht_uu_new0();
382  if (!len_map) {
383  RZ_LOG_ERROR("find_longest_match: cannot allocate len_map\n");
384  goto find_longest_match_fail;
385  }
386 
387  for (ut32 a_pos = a_low; a_pos < a_hi; ++a_pos) {
388  elem_a = elem_at(a, a_pos);
389  tmp = ht_uu_new0();
390  if (!tmp) {
391  RZ_LOG_ERROR("find_longest_match: cannot allocate tmp\n");
392  goto find_longest_match_fail;
393  }
394 
395  list = ht_pp_find(diff->b_hits, elem_a, NULL);
396  rz_list_foreach (list, it, pnum) {
397  ut64 b_pos = PTR2NUM(pnum);
398  if (b_pos < b_low) {
399  continue;
400  } else if (b_pos >= b_hi) {
401  break;
402  }
403  ut32 len = ht_uu_find(len_map, b_pos - 1, NULL) + 1;
404  ht_uu_insert(tmp, b_pos, len);
405  if (len > hit_size) {
406  hit_a = a_pos - len + 1;
407  hit_b = b_pos - len + 1;
408  hit_size = len;
409  }
410  }
411 
412  ht_uu_free(len_map);
413  len_map = tmp;
414  tmp = NULL;
415  }
416 
417  // Now let's handle the without the ignored chars.
418  while (hit_a > a_low && hit_b > b_low) {
419  elem_a = elem_at(a, hit_a - 1);
420  elem_b = elem_at(b, hit_b - 1);
421  if (ignore(elem_b) || compare(elem_a, elem_b)) {
422  break;
423  }
424  hit_a--;
425  hit_b--;
426  hit_size++;
427  }
428 
429  while (hit_a + hit_size < a_hi && hit_b + hit_size < b_hi) {
430  elem_a = elem_at(a, hit_a + hit_size);
431  elem_b = elem_at(b, hit_b + hit_size);
432  if (ignore(elem_b) || compare(elem_a, elem_b)) {
433  break;
434  }
435  hit_size++;
436  }
437 
438  // Now let's handle the ignored chars.
439  while (hit_a > a_low && hit_b > b_low) {
440  elem_a = elem_at(a, hit_a - 1);
441  elem_b = elem_at(b, hit_b - 1);
442  if (!ignore(elem_b) || compare(elem_a, elem_b)) {
443  break;
444  }
445  hit_a--;
446  hit_b--;
447  hit_size++;
448  }
449 
450  while (hit_a + hit_size < a_hi && hit_b + hit_size < b_hi) {
451  elem_a = elem_at(a, hit_a + hit_size);
452  elem_b = elem_at(b, hit_b + hit_size);
453  if (!ignore(elem_b) || compare(elem_a, elem_b)) {
454  break;
455  }
456  hit_size++;
457  }
458 
459  match = match_new(hit_a, hit_b, hit_size);
460  if (!match) {
461  RZ_LOG_ERROR("find_longest_match: cannot allocate RzDiffMatch\n");
462  goto find_longest_match_fail;
463  }
464 
465  ht_uu_free(len_map);
466  return match;
467 
468 find_longest_match_fail:
469  ht_uu_free(tmp);
470  ht_uu_free(len_map);
471  return NULL;
472 }
size_t len
Definition: 6502dis.c:15
#define NULL
Definition: cris-opc.c:27
static RzDiffMatch * match_new(ut32 a, ut32 b, ut32 size)
Definition: diff.c:344
#define PTR2NUM(x)
Definition: diff.c:67
unsigned char match[65280+2]
Definition: gun.c:165
uint8_t ut8
Definition: lh5801.h:11
static int compare(const char *s1, const char *s2, int l1, int l2)
Definition: chmd.c:864
static void list(RzEgg *egg)
Definition: rz-gg.c:52
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
const void *(* RzDiffMethodElemAt)(RZ_BORROW const void *array, ut32 index)
Definition: rz_diff.h:36
bool(* RzDiffMethodIgnore)(RZ_BORROW const void *elem)
Definition: rz_diff.h:39
int(* RzDiffMethodCompare)(RZ_BORROW const void *a_elem, RZ_BORROW const void *b_elem)
Definition: rz_diff.h:38
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
#define b(i)
Definition: sha256.c:42
#define a(i)
Definition: sha256.c:41
ut32 b_low
Definition: diff.c:74
ut32 b_hi
Definition: diff.c:75
ut32 a_low
Definition: diff.c:72
ut32 a_hi
Definition: diff.c:73
Definition: engine.c:71
RzDiffMethodIgnore ignore
Definition: diff.c:84
RzDiffMethodElemAt elem_at
Definition: diff.c:81
RzDiffMethodCompare compare
Definition: diff.c:83
MethodsInternal methods
Definition: diff.c:95
HtPP * b_hits
Definition: diff.c:94
const void * a
Definition: diff.c:90
const void * b
Definition: diff.c:91
ut64(WINAPI *w32_GetEnabledXStateFeatures)()

References rz_diff_t::a, a, block_t::a_hi, block_t::a_low, rz_diff_t::b, b, block_t::b_hi, rz_diff_t::b_hits, block_t::b_low, methods_internal_t::compare, compare(), methods_internal_t::elem_at, methods_internal_t::ignore, len, list(), match, match_new(), rz_diff_t::methods, NULL, PTR2NUM, RZ_LOG_ERROR, rz_return_val_if_fail, autogen_x86imm::tmp, and ut64().

Referenced by rz_diff_matches_new().

◆ free_hits()

static void free_hits ( HtPPKv *  kv)
static

Definition at line 134 of file diff.c.

134  {
135  rz_list_free(kv->value);
136 }
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137

References rz_list_free().

Referenced by set_b().

◆ group_op_free()

static void group_op_free ( RzList ops)
static

Definition at line 701 of file diff.c.

701  {
702  rz_list_free(ops);
703 }
static struct @29 ops[]

References ops, and rz_list_free().

Referenced by rz_diff_opcodes_grouped_new().

◆ match_new()

static RzDiffMatch* match_new ( ut32  a,
ut32  b,
ut32  size 
)
static

Definition at line 344 of file diff.c.

344  {
346  if (!match) {
347  return NULL;
348  }
349 
350  match->a = a;
351  match->b = b;
352  match->size = size;
353  return match;
354 }
voidpf void uLong size
Definition: ioapi.h:138
#define RZ_NEW0(x)
Definition: rz_types.h:284

References a, b, match, NULL, and RZ_NEW0.

Referenced by find_longest_match(), and rz_diff_matches_new().

◆ opcode_new()

static RzDiffOp* opcode_new ( RzDiffOpType  type,
st32  a_beg,
st32  a_end,
st32  b_beg,
st32  b_end 
)
static

Definition at line 605 of file diff.c.

605  {
607  if (!op) {
608  return NULL;
609  }
610  op->type = type;
611  op->a_beg = a_beg;
612  op->a_end = a_end;
613  op->b_beg = b_beg;
614  op->b_end = b_end;
615  return op;
616 }
ut8 op
Definition: 6502dis.c:13
int type
Definition: mipsasm.c:17
Definition: dis.c:32

References NULL, op, RZ_NEW0, and type.

Referenced by rz_diff_opcodes_grouped_new(), and rz_diff_opcodes_new().

◆ opcode_set()

static void opcode_set ( RzDiffOp op,
RzDiffOpType  type,
st32  a_beg,
st32  a_end,
st32  b_beg,
st32  b_end 
)
static

Definition at line 618 of file diff.c.

618  {
619  op->type = type;
620  op->a_beg = a_beg;
621  op->a_end = a_end;
622  op->b_beg = b_beg;
623  op->b_end = b_end;
624 }

References type.

Referenced by rz_diff_opcodes_grouped_new().

◆ rz_diff_bytes_new()

RZ_API RZ_OWN RzDiff* rz_diff_bytes_new ( RZ_BORROW const ut8 a,
ut32  a_size,
RZ_BORROW const ut8 b,
ut32  b_size,
RZ_NULLABLE RzDiffIgnoreByte  ignore 
)

Returns the structure needed to diff buffers of ut8.

Allocates the internal structure needed to diff buffers by using the methods defined in methods_bytes. Allows to define an callback function to ignore bytes.

Definition at line 188 of file diff.c.

188  {
190 
191  RzDiff *diff = RZ_NEW0(RzDiff);
192  if (!diff) {
193  return NULL;
194  }
195 
196  diff->methods = methods_bytes;
197  if (ignore) {
198  diff->methods.ignore = (RzDiffMethodIgnore)ignore;
199  }
200 
201  if (!set_a(diff, a, a_size)) {
202  rz_diff_free(diff);
203  return NULL;
204  }
205  if (!set_b(diff, b, b_size)) {
206  rz_diff_free(diff);
207  return NULL;
208  }
209  return diff;
210 }
static const MethodsInternal methods_bytes
Definition: bytes_diff.c:24
static bool set_b(RzDiff *diff, const void *b, ut32 b_size)
Definition: diff.c:138
static bool set_a(RzDiff *diff, const void *a, ut32 a_size)
Definition: diff.c:126
RZ_API void rz_diff_free(RZ_NULLABLE RzDiff *diff)
frees the diff structure
Definition: diff.c:295
Definition: diff.c:89

References a, b, methods_internal_t::ignore, rz_diff_t::methods, methods_bytes, NULL, rz_diff_free(), RZ_NEW0, rz_return_val_if_fail, set_a(), and set_b().

Referenced by rz_diff_unified_files().

◆ rz_diff_free()

RZ_API void rz_diff_free ( RZ_NULLABLE RzDiff diff)

frees the diff structure

frees any internal structure and the diff structure.

Definition at line 295 of file diff.c.

295  {
296  if (!diff) {
297  return;
298  }
299  if (diff->methods.free) {
300  diff->methods.free(diff->a);
301  diff->methods.free(diff->b);
302  }
303  ht_pp_free(diff->b_hits);
304  free(diff);
305 }
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130

References free().

Referenced by core_analysis_graph_construct_nodes(), graph_construct_nodes(), print_diff(), rz_cmd_debug(), rz_diff_bytes_new(), rz_diff_generic_new(), rz_diff_lines_new(), and rz_diff_unified_files().

◆ rz_diff_generic_new()

RZ_API RZ_OWN RzDiff* rz_diff_generic_new ( RZ_BORROW const void *  a,
ut32  a_size,
RZ_BORROW const void *  b,
ut32  b_size,
RZ_NONNULL RzDiffMethods methods 
)

Returns the structure needed to diff arrays of user defined types.

Allocates the internal structure needed to diff any user defined array of any types by using the methods provided by the user calling this C api.

Definition at line 259 of file diff.c.

259  {
260  rz_return_val_if_fail(a && b && methods && methods->elem_at && methods->elem_hash && methods->compare && methods->stringify, NULL);
261 
262  RzDiff *diff = RZ_NEW0(RzDiff);
263  if (!diff) {
264  return NULL;
265  }
266 
267  diff->methods.free = NULL;
268  diff->methods.elem_at = methods->elem_at;
269  diff->methods.elem_hash = methods->elem_hash;
270  diff->methods.compare = methods->compare;
271  diff->methods.stringify = methods->stringify;
272 
273  if (methods->ignore) {
274  diff->methods.ignore = methods->ignore;
275  } else {
276  diff->methods.ignore = fake_ignore;
277  }
278 
279  if (!set_a(diff, a, a_size)) {
280  rz_diff_free(diff);
281  return NULL;
282  }
283  if (!set_b(diff, b, b_size)) {
284  rz_diff_free(diff);
285  return NULL;
286  }
287  return diff;
288 }
static bool fake_ignore(const void *value)
Definition: diff.c:118
RzDiffMethodStringify stringify
Definition: diff.c:85
RzDiffMethodElemHash elem_hash
Definition: diff.c:82
RzDiffMethodFree free
Definition: diff.c:86

References a, b, methods_internal_t::compare, methods_internal_t::elem_at, methods_internal_t::elem_hash, fake_ignore(), methods_internal_t::free, methods_internal_t::ignore, rz_diff_t::methods, NULL, rz_diff_free(), RZ_NEW0, rz_return_val_if_fail, set_a(), set_b(), and methods_internal_t::stringify.

Referenced by rz_diff_classes_new(), rz_diff_entries_new(), rz_diff_fields_new(), rz_diff_imports_new(), rz_diff_libraries_new(), rz_diff_sections_new(), rz_diff_strings_new(), and rz_diff_symbols_new().

◆ rz_diff_get_a()

RZ_API RZ_BORROW const void* rz_diff_get_a ( RZ_NONNULL RzDiff diff)

returns the pointer of the A array that passed to rz_diff_XXX_new()

returns the pointer of the A array that passed to rz_diff_XXX_new()

Definition at line 312 of file diff.c.

312  {
314  return diff->a;
315 }

References NULL, and rz_return_val_if_fail.

◆ rz_diff_get_b()

RZ_API RZ_BORROW const void* rz_diff_get_b ( RZ_NONNULL RzDiff diff)

returns the pointer of the B array that passed to rz_diff_XXX_new()

returns the pointer of the B array that passed to rz_diff_XXX_new()

Definition at line 322 of file diff.c.

322  {
324  return diff->b;
325 }

References NULL, and rz_return_val_if_fail.

◆ rz_diff_hash_data()

RZ_API ut32 rz_diff_hash_data ( RZ_NULLABLE const ut8 buffer,
ut32  size 
)

Calculates the hash of any given data.

Calculates the hash of any given data with a user defined size.

Definition at line 103 of file diff.c.

103  {
104  ut32 h = 5381;
105  if (!buffer || !size) {
106  return h;
107  }
108  for (ut32 i = 0; i < size; ++i) {
109  h = (h + (h << 5)) ^ buffer[i];
110  }
111  return h;
112 }
lzma_index ** i
Definition: index.h:629
#define h(i)
Definition: sha256.c:48
Definition: buffer.h:15

References h, and i.

Referenced by class_hash(), class_hash_addr(), entry_hash(), field_hash(), field_hash_addr(), import_hash(), libs_hash(), line_hash(), section_hash(), section_hash_addr(), string_hash(), string_hash_addr(), symbol_hash(), and symbol_hash_addr().

◆ rz_diff_lines_new()

RZ_API RZ_OWN RzDiff* rz_diff_lines_new ( RZ_BORROW const char *  a,
RZ_BORROW const char *  b,
RZ_NULLABLE RzDiffIgnoreLine  ignore 
)

Returns the structure needed to diff lines.

Allocates the internal structure needed to diff strings with new lines using the methods defined in methods_lines. Allows to define an callback function to ignore lines.

Definition at line 219 of file diff.c.

219  {
221 
222  RzDiff *diff = RZ_NEW0(RzDiff);
223  if (!diff) {
224  return NULL;
225  }
226 
227  RzList *a_lines = tokenize_lines(a);
228  RzList *b_lines = tokenize_lines(b);
229  if (!a_lines || !b_lines) {
230  rz_list_free(a_lines);
231  rz_list_free(b_lines);
232  free(diff);
233  return NULL;
234  }
235 
236  diff->methods = methods_lines;
237 
238  if (ignore) {
239  diff->methods.ignore = (RzDiffMethodIgnore)ignore;
240  }
241 
242  if (!set_a(diff, a_lines, rz_list_length(a_lines))) {
243  rz_diff_free(diff);
244  return NULL;
245  }
246  if (!set_b(diff, b_lines, rz_list_length(b_lines))) {
247  rz_diff_free(diff);
248  return NULL;
249  }
250  return diff;
251 }
RZ_API ut32 rz_list_length(RZ_NONNULL const RzList *list)
Returns the length of the list.
Definition: list.c:109
static const MethodsInternal methods_lines
Definition: lines_diff.c:70
static RzList * tokenize_lines(const char *string)
Definition: lines_diff.c:8

References a, b, free(), methods_internal_t::ignore, rz_diff_t::methods, methods_lines, NULL, rz_diff_free(), rz_list_free(), rz_list_length(), RZ_NEW0, rz_return_val_if_fail, set_a(), set_b(), and tokenize_lines().

Referenced by core_analysis_graph_construct_nodes(), graph_construct_nodes(), print_diff(), rz_cmd_debug(), rz_diff_command_new(), and rz_diff_unified_files().

◆ rz_diff_matches_new()

RZ_API RZ_OWN RzList* rz_diff_matches_new ( RZ_NONNULL RzDiff diff)

generates a list of matching blocks

Generates a list of matching blocks that are found in both inputs. If non are found it returns a match result with size of 0

Definition at line 497 of file diff.c.

497  {
499  RzList *stack = NULL;
500  RzList *matches = NULL;
501  RzList *non_adjacent = NULL;
502  RzListIter *it = NULL;
503  Block *block = NULL;
505  ut32 adj_a = 0, adj_b = 0, adj_size = 0;
506 
507  matches = rz_list_newf((RzListFree)free);
508  if (!matches) {
509  RZ_LOG_ERROR("rz_diff_matches_new: cannot allocate matches\n");
510  goto rz_diff_matches_new_fail;
511  }
512  non_adjacent = rz_list_newf((RzListFree)free);
513  if (!matches) {
514  RZ_LOG_ERROR("rz_diff_matches_new: cannot allocate non_adjacent\n");
515  goto rz_diff_matches_new_fail;
516  }
517 
519  if (!stack) {
520  RZ_LOG_ERROR("rz_diff_matches_new: cannot allocate stack\n");
521  goto rz_diff_matches_new_fail;
522  }
523 
524  if (!stack_append_block(stack, 0, diff->a_size, 0, diff->b_size)) {
525  RZ_LOG_ERROR("rz_diff_matches_new: cannot append initial block "
526  "into stack\n");
527  goto rz_diff_matches_new_fail;
528  }
529 
530  while (rz_list_length(stack) > 0) {
531  block = (Block *)rz_list_pop(stack);
532  match = find_longest_match(diff, block);
533  if (!match) {
534  continue;
535  }
536 
537  if (match->size > 0) {
538  if (!rz_list_append(matches, match)) {
539  RZ_LOG_ERROR("rz_diff_matches_new: cannot append match into matches\n");
540  free(match);
541  goto rz_diff_matches_new_fail;
542  }
543  if (block->a_low < match->a && block->b_low < match->b) {
544  if (!stack_append_block(stack, block->a_low, match->a, block->b_low, match->b)) {
545  RZ_LOG_ERROR("rz_diff_matches_new: cannot append low block into stack\n");
546  goto rz_diff_matches_new_fail;
547  }
548  }
549  if (match->a + match->size < block->a_hi && match->b + match->size < block->b_hi) {
550  if (!stack_append_block(stack, match->a + match->size, block->a_hi, match->b + match->size, block->b_hi)) {
551  RZ_LOG_ERROR("rz_diff_matches_new: cannot append high block into stack\n");
552  goto rz_diff_matches_new_fail;
553  }
554  }
555  } else {
556  free(match);
557  }
558  free(block);
559  }
561 
562  adj_a = 0;
563  adj_b = 0;
564  adj_size = 0;
565  rz_list_foreach (matches, it, match) {
566  if ((adj_a + adj_size) == match->a && (adj_b + adj_size) == match->b) {
567  adj_size += match->size;
568  } else {
569  RzDiffMatch *m = adj_size ? match_new(adj_a, adj_b, adj_size) : NULL;
570  if (adj_size && (!m || !rz_list_append(non_adjacent, m))) {
571  RZ_LOG_ERROR("rz_diff_matches_new: cannot append match into non_adjacent\n");
572  free(m);
573  goto rz_diff_matches_new_fail;
574  }
575  adj_a = match->a;
576  adj_b = match->b;
577  adj_size = match->size;
578  }
579  }
580  match = adj_size ? match_new(adj_a, adj_b, adj_size) : NULL;
581  if (adj_size && (!match || !rz_list_append(non_adjacent, match))) {
582  RZ_LOG_ERROR("rz_diff_matches_new: cannot append match into non_adjacent\n");
583  free(match);
584  goto rz_diff_matches_new_fail;
585  }
586 
587  match = match_new(diff->a_size, diff->b_size, 0);
588  if (!match || !rz_list_append(non_adjacent, match)) {
589  RZ_LOG_ERROR("rz_diff_matches_new: cannot append match into non_adjacent\n");
590  free(match);
591  goto rz_diff_matches_new_fail;
592  }
593 
594  rz_list_free(matches);
596  return non_adjacent;
597 
598 rz_diff_matches_new_fail:
599  rz_list_free(non_adjacent);
600  rz_list_free(matches);
602  return NULL;
603 }
static int cmp_matches(RzDiffMatch *m0, RzDiffMatch *m1)
Definition: diff.c:474
static bool stack_append_block(RzList *stack, ut32 a_low, ut32 a_hi, ut32 b_low, ut32 b_hi)
Definition: diff.c:327
static RzDiffMatch * find_longest_match(RzDiff *diff, Block *block)
Definition: diff.c:356
RZ_API RZ_OWN RzList * rz_list_newf(RzListFree f)
Returns a new initialized RzList pointer and sets the free method.
Definition: list.c:248
RZ_API RZ_OWN void * rz_list_pop(RZ_NONNULL RzList *list)
Removes and returns the last element of the list.
Definition: list.c:376
RZ_API void rz_list_sort(RZ_NONNULL RzList *list, RZ_NONNULL RzListComparator cmp)
Sorts via merge sort or via insertion sort a list.
Definition: list.c:743
RZ_API RZ_BORROW RzListIter * rz_list_append(RZ_NONNULL RzList *list, void *data)
Appends at the end of the list a new element.
Definition: list.c:288
void(* RzListFree)(void *ptr)
Definition: rz_list.h:11
int(* RzListComparator)(const void *value, const void *list_data)
Definition: rz_list.h:33
Definition: diff.c:71
Definition: z80asm.h:140

References block_t::a_hi, block_t::a_low, block_t::b_hi, block_t::b_low, cmp_matches(), find_longest_match(), free(), regress::m, match_new(), NULL, rz_list_append(), rz_list_free(), rz_list_length(), rz_list_newf(), rz_list_pop(), rz_list_sort(), RZ_LOG_ERROR, rz_return_val_if_fail, and stack_append_block().

Referenced by rz_diff_opcodes_new(), and rz_diff_ratio().

◆ rz_diff_opcodes_grouped_new()

RZ_API RZ_OWN RzList* rz_diff_opcodes_grouped_new ( RZ_NONNULL RzDiff diff,
ut32  n_groups 
)

Generates groups of opcodes needed to go from A to B.

Generates groups of opcodes needed to go from A to B, but each group will end with N common EQUAL ops (if possible). default is 3 equals ops before splitting the group.

Definition at line 712 of file diff.c.

712  {
713  rz_return_val_if_fail(diff && n_groups > 1, NULL);
714  RzDiffOp *op = NULL;
715  RzListIter *it = NULL;
716  RzList *group = NULL;
717  RzList *groups = NULL;
718  RzList *opcodes = NULL;
719  st32 a_beg = 0, b_beg = 0, max_groups = 0;
720 
721  max_groups = n_groups << 1;
722 
724  if (!groups) {
725  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate groups\n");
726  goto rz_diff_opcodes_grouped_new_fail;
727  }
728 
730  if (!opcodes) {
731  goto rz_diff_opcodes_grouped_new_fail;
732  }
733 
734  if (rz_list_length(opcodes) < 1) {
735  op = opcode_new(RZ_DIFF_OP_EQUAL, 0, 1, 0, 1);
736  if (!op) {
737  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate op for opcodes\n");
738  goto rz_diff_opcodes_grouped_new_fail;
739  } else if (!rz_list_append(opcodes, op)) {
740  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot append op into opcodes\n");
741  free(op);
742  goto rz_diff_opcodes_grouped_new_fail;
743  }
744  }
745 
747  if (op->type == RZ_DIFF_OP_EQUAL) {
748  opcode_set(op, op->type, RZ_MAX(op->a_beg, op->a_end - n_groups), op->a_end, RZ_MAX(op->b_beg, op->b_end - n_groups), op->b_end);
749  }
750 
752  if (op->type == RZ_DIFF_OP_EQUAL) {
753  opcode_set(op, op->type, op->a_beg, RZ_MIN(op->a_end, op->a_beg + n_groups), op->b_beg, RZ_MIN(op->b_end, op->b_beg + n_groups));
754  }
755 
756  group = rz_list_newf((RzListFree)free);
757  if (!group) {
758  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate group\n");
759  goto rz_diff_opcodes_grouped_new_fail;
760  }
761 
762  rz_list_foreach (opcodes, it, op) {
763  a_beg = op->a_beg;
764  b_beg = op->b_beg;
765 
766  if (op->type == RZ_DIFF_OP_EQUAL && (op->a_end - a_beg) > max_groups) {
767  // append the last op of the group, append group to groups and create a new group.
768  RzDiffOp *op2 = opcode_new(RZ_DIFF_OP_EQUAL, a_beg, RZ_MIN(op->a_end, a_beg + n_groups), b_beg, RZ_MIN(op->b_end, b_beg + n_groups));
769  if (!op2) {
770  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate op for group\n");
771  goto rz_diff_opcodes_grouped_new_fail;
772  } else if (!rz_list_append(group, op2)) {
773  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot append op into group\n");
774  free(op2);
775  goto rz_diff_opcodes_grouped_new_fail;
776  } else if (!rz_list_append(groups, group)) {
777  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot append group into groups\n");
778  rz_list_free(group);
779  goto rz_diff_opcodes_grouped_new_fail;
780  }
781 
782  group = rz_list_newf((RzListFree)free);
783  if (!group) {
784  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate new group\n");
785  goto rz_diff_opcodes_grouped_new_fail;
786  }
787  a_beg = RZ_MAX(a_beg, op->a_end - n_groups);
788  b_beg = RZ_MAX(b_beg, op->b_end - n_groups);
789  }
790 
791  op = opcode_new(op->type, a_beg, op->a_end, b_beg, op->b_end);
792  if (!op) {
793  rz_list_free(group);
794  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot allocate op for group\n");
795  goto rz_diff_opcodes_grouped_new_fail;
796  } else if (!rz_list_append(group, op)) {
797  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot append op into group\n");
798  free(op);
799  rz_list_free(group);
800  goto rz_diff_opcodes_grouped_new_fail;
801  }
802  }
803 
805  if (!(rz_list_length(opcodes) == 1 && op->type == RZ_DIFF_OP_EQUAL)) {
806  if (!rz_list_append(groups, group)) {
807  RZ_LOG_ERROR("rz_diff_opcodes_grouped_new: cannot append group into groups\n");
808  rz_list_free(group);
809  goto rz_diff_opcodes_grouped_new_fail;
810  }
811  } else {
812  rz_list_free(group);
813  }
814 
816  return groups;
817 
818 rz_diff_opcodes_grouped_new_fail:
821  return NULL;
822 }
OPCODE_DESC opcodes[]
Definition: avr_esil.c:1270
static void opcode_set(RzDiffOp *op, RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end)
Definition: diff.c:618
RZ_API RZ_OWN RzList * rz_diff_opcodes_new(RZ_NONNULL RzDiff *diff)
Generates a list of steps needed to go from A to B.
Definition: diff.c:631
static void group_op_free(RzList *ops)
Definition: diff.c:701
static RzDiffOp * opcode_new(RzDiffOpType type, st32 a_beg, st32 a_end, st32 b_beg, st32 b_end)
Definition: diff.c:605
RZ_API RZ_BORROW void * rz_list_first(RZ_NONNULL const RzList *list)
Returns the first element of the list.
Definition: list.c:77
RZ_API RZ_BORROW void * rz_list_last(RZ_NONNULL const RzList *list)
Returns the last element of the list.
Definition: list.c:86
@ RZ_DIFF_OP_EQUAL
Definition: rz_diff.h:22
#define RZ_MIN(x, y)
#define RZ_MAX(x, y)
#define st32
Definition: rz_types_base.h:12

References free(), group_op_free(), make_dist_html::groups, NULL, opcode_new(), opcode_set(), opcodes, RZ_DIFF_OP_EQUAL, rz_diff_opcodes_new(), rz_list_append(), rz_list_first(), rz_list_free(), rz_list_last(), rz_list_length(), rz_list_newf(), RZ_LOG_ERROR, RZ_MAX, RZ_MIN, rz_return_val_if_fail, and st32.

Referenced by rz_diff_unified_json(), and rz_diff_unified_text().

◆ rz_diff_opcodes_new()

RZ_API RZ_OWN RzList* rz_diff_opcodes_new ( RZ_NONNULL RzDiff diff)

Generates a list of steps needed to go from A to B.

Generates a list of opcodes that are needed to convert A to B.

Definition at line 631 of file diff.c.

631  {
633  ut32 a = 0, b = 0;
635  RzDiffOp *op = NULL;
637  RzListIter *it = NULL;
638  RzList *matches = NULL;
639  RzList *opcodes = NULL;
640 
641  matches = rz_diff_matches_new(diff);
642  if (!matches) {
643  goto rz_diff_opcodes_new_fail;
644  }
645 
647  if (!opcodes) {
648  RZ_LOG_ERROR("rz_diff_opcodes_new: cannot allocate opcodes\n");
649  goto rz_diff_opcodes_new_fail;
650  }
651 
652  a = 0;
653  b = 0;
654  rz_list_foreach (matches, it, match) {
656 
657  if (a < match->a && b < match->b) {
659  } else if (a < match->a) {
661  } else if (b < match->b) {
663  }
664 
665  if (type != RZ_DIFF_OP_INVALID) {
666  op = opcode_new(type, a, match->a, b, match->b);
667  if (!op) {
668  RZ_LOG_ERROR("rz_diff_opcodes_new: cannot allocate op\n");
669  goto rz_diff_opcodes_new_fail;
670  } else if (!rz_list_append(opcodes, op)) {
671  RZ_LOG_ERROR("rz_diff_opcodes_new: cannot append op into opcodes\n");
672  free(op);
673  goto rz_diff_opcodes_new_fail;
674  }
675  }
676  a = match->a + match->size;
677  b = match->b + match->size;
678 
679  if (match->size > 0) {
680  op = opcode_new(RZ_DIFF_OP_EQUAL, match->a, a, match->b, b);
681  if (!op) {
682  RZ_LOG_ERROR("rz_diff_opcodes_new: cannot allocate op\n");
683  goto rz_diff_opcodes_new_fail;
684  } else if (!rz_list_append(opcodes, op)) {
685  RZ_LOG_ERROR("rz_diff_opcodes_new: cannot append op into opcodes\n");
686  free(op);
687  goto rz_diff_opcodes_new_fail;
688  }
689  }
690  }
691 
692  rz_list_free(matches);
693  return opcodes;
694 
695 rz_diff_opcodes_new_fail:
696  rz_list_free(matches);
698  return NULL;
699 }
RZ_API RZ_OWN RzList * rz_diff_matches_new(RZ_NONNULL RzDiff *diff)
generates a list of matching blocks
Definition: diff.c:497
@ RZ_DIFF_OP_INVALID
Definition: rz_diff.h:20
@ RZ_DIFF_OP_REPLACE
Definition: rz_diff.h:24
@ RZ_DIFF_OP_DELETE
Definition: rz_diff.h:21
@ RZ_DIFF_OP_INSERT
Definition: rz_diff.h:23
enum rz_diff_op_type_t RzDiffOpType

References a, b, free(), NULL, opcode_new(), opcodes, rz_diff_matches_new(), RZ_DIFF_OP_DELETE, RZ_DIFF_OP_EQUAL, RZ_DIFF_OP_INSERT, RZ_DIFF_OP_INVALID, RZ_DIFF_OP_REPLACE, rz_list_append(), rz_list_free(), rz_list_newf(), RZ_LOG_ERROR, rz_return_val_if_fail, and type.

Referenced by rz_diff_opcodes_grouped_new().

◆ rz_diff_ratio()

RZ_API bool rz_diff_ratio ( RZ_NONNULL RzDiff diff,
RZ_NONNULL double *  result 
)

Calculates the similarity ratio between A and B.

Calculates the similarity ratio between A and B. Returns a number between 0 and 1; closer to 1 the result is more similar/identical the 2 arrays are.

Definition at line 831 of file diff.c.

831  {
832  rz_return_val_if_fail(diff && result, false);
833  RzList *matches = NULL;
835  RzListIter *it = NULL;
836  ut32 hits = 0;
837 
838  matches = rz_diff_matches_new(diff);
839  if (!matches) {
840  return false;
841  }
842  rz_list_foreach (matches, it, match) {
843  hits += match->size;
844  }
845  rz_list_free(matches);
846 
847  /* simple cast to avoid math issues */
848  double d_hits = hits;
849  double d_size = diff->a_size + diff->b_size;
850  if (d_size > 0.0) {
851  *result = (2.0 * d_hits) / d_size;
852  } else {
853  *result = 1.0;
854  }
855  return true;
856 }

References NULL, rz_diff_matches_new(), rz_list_free(), and rz_return_val_if_fail.

◆ rz_diff_sizes_ratio()

RZ_API bool rz_diff_sizes_ratio ( RZ_NONNULL RzDiff diff,
RZ_NONNULL double *  result 
)

Calculates the size ratio between A and B.

Works like the rz_diff_ratio, but this checks only how similar are the sizes between the two arrays. Returns a number between 0 and 1, like above.

Definition at line 865 of file diff.c.

865  {
866  rz_return_val_if_fail(diff && result, false);
867 
868  /* simple cast to avoid math issues */
869  double d_hits = RZ_MIN(diff->a_size, diff->b_size);
870  double d_size = diff->a_size + diff->b_size;
871  if (d_size > 0.0) {
872  *result = (2.0 * d_hits) / d_size;
873  } else {
874  *result = 1.0;
875  }
876  return true;
877 }

References RZ_MIN, and rz_return_val_if_fail.

◆ RZ_LIB_VERSION()

RZ_LIB_VERSION ( rz_diff  )

◆ set_a()

static bool set_a ( RzDiff diff,
const void *  a,
ut32  a_size 
)
static

Definition at line 126 of file diff.c.

126  {
127  rz_return_val_if_fail(a, false);
128 
129  diff->a = a;
130  diff->a_size = a_size;
131  return true;
132 }
ut32 a_size
Definition: diff.c:92

References rz_diff_t::a, a, rz_diff_t::a_size, and rz_return_val_if_fail.

Referenced by rz_diff_bytes_new(), rz_diff_generic_new(), and rz_diff_lines_new().

◆ set_b()

static bool set_b ( RzDiff diff,
const void *  b,
ut32  b_size 
)
static

Definition at line 138 of file diff.c.

138  {
139  rz_return_val_if_fail(b && diff->methods.elem_at && diff->methods.elem_hash && diff->methods.compare && diff->methods.ignore, false);
140 
141  diff->b = b;
142  diff->b_size = b_size;
143 
144  RzList *list = NULL;
145  RzDiffMethodElemAt elem_at = diff->methods.elem_at;
146  RzDiffMethodIgnore ignore = diff->methods.ignore;
147 
148  /* we need to generate the hits list for B */
149  ht_pp_free(diff->b_hits);
150  diff->b_hits = ht_pp_new(NULL, free_hits, NULL);
151  diff->b_hits->opt.cmp /* */ = diff->methods.compare;
152  diff->b_hits->opt.calcsizeK = default_ksize;
153  diff->b_hits->opt.dupkey /* */ = NULL; // avoid to duplicate key
154  diff->b_hits->opt.hashfn /* */ = diff->methods.elem_hash;
155 
156  for (ut64 i = 0; i < diff->b_size; ++i) {
157  const void *elem = elem_at(diff->b, i);
158  if (ignore && ignore(elem)) {
159  continue;
160  }
161 
162  list = ht_pp_find(diff->b_hits, elem, NULL);
163  if (!list) {
165  if (!list) {
166  RZ_LOG_ERROR("rz_diff_set_b: cannot allocate list\n");
167  return false;
168  }
169  ht_pp_insert(diff->b_hits, elem, list);
170  }
171 
172  if (!rz_list_append(list, NUM2PTR(i))) {
173  RZ_LOG_ERROR("rz_diff_set_b: cannot append index to list\n");
174  return false;
175  }
176  }
177 
178  return true;
179 }
#define NUM2PTR(x)
Definition: diff.c:66
static ut32 default_ksize(const void *a)
Definition: diff.c:114
static void free_hits(HtPPKv *kv)
Definition: diff.c:134
ut32 b_size
Definition: diff.c:93

References rz_diff_t::b, b, rz_diff_t::b_hits, rz_diff_t::b_size, methods_internal_t::compare, default_ksize(), methods_internal_t::elem_at, methods_internal_t::elem_hash, free_hits(), i, methods_internal_t::ignore, list(), rz_diff_t::methods, NULL, NUM2PTR, rz_list_append(), rz_list_newf(), RZ_LOG_ERROR, rz_return_val_if_fail, and ut64().

Referenced by rz_diff_bytes_new(), rz_diff_generic_new(), and rz_diff_lines_new().

◆ stack_append_block()

static bool stack_append_block ( RzList stack,
ut32  a_low,
ut32  a_hi,
ut32  b_low,
ut32  b_hi 
)
inlinestatic

Definition at line 327 of file diff.c.

327  {
328  Block *block = RZ_NEW0(Block);
329  if (!block) {
330  return false;
331  }
332 
333  block->a_low = a_low;
334  block->a_hi = a_hi;
335  block->b_low = b_low;
336  block->b_hi = b_hi;
337  if (!rz_list_append(stack, block)) {
338  free(block);
339  return false;
340  }
341  return true;
342 }

References block_t::a_hi, block_t::a_low, block_t::b_hi, block_t::b_low, free(), rz_list_append(), and RZ_NEW0.

Referenced by rz_diff_matches_new().