Rizin
unix-like reverse engineering framework and cli tools
distance.c File Reference
#include <rz_diff.h>
#include <rz_util/rz_assert.h>

Go to the source code of this file.

Functions

RZ_API bool rz_diff_myers_distance (RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity)
 Calculates the distance between two buffers using the Myers algorithm. More...
 
RZ_API bool rz_diff_levenstein_distance (RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity)
 Calculates the distance between two buffers using the Levenshtein algorithm. More...
 

Function Documentation

◆ rz_diff_levenstein_distance()

RZ_API bool rz_diff_levenstein_distance ( RZ_NONNULL const ut8 a,
ut32  la,
RZ_NONNULL const ut8 b,
ut32  lb,
RZ_NULLABLE ut32 distance,
RZ_NULLABLE double *  similarity 
)

Calculates the distance between two buffers using the Levenshtein algorithm.

Calculates the distance between two buffers using the Levenshtein distance algorithm.

  • distance: is the minimum number of edits needed to transform A into B
  • similarity: is a number that defines how similar/identical the 2 buffers are.

Definition at line 68 of file distance.c.

68  {
69  rz_return_val_if_fail(a && b, false);
70 
71  const ut32 length = RZ_MAX(la, lb);
72  const ut8 *ea = a + la, *eb = b + lb, *t;
73  ut32 *d, i, j;
74 
75  for (; a < ea && b < eb && *a == *b; a++, b++) {
76  }
77  for (; a < ea && b < eb && ea[-1] == eb[-1]; ea--, eb--) {
78  }
79  la = ea - a;
80  lb = eb - b;
81  if (la < lb) {
82  i = la;
83  la = lb;
84  lb = i;
85  t = a;
86  a = b;
87  b = t;
88  }
89 
90  if (sizeof(ut32) > SIZE_MAX / (lb + 1) || !(d = malloc((lb + 1) * sizeof(ut32)))) {
91  return false;
92  }
93  for (i = 0; i <= lb; i++) {
94  d[i] = i;
95  }
96  for (i = 0; i < la; i++) {
97  ut32 ul = d[0];
98  d[0] = i + 1;
99  for (j = 0; j < lb; j++) {
100  ut32 u = d[j + 1];
101  d[j + 1] = a[i] == b[j] ? ul : RZ_MIN(ul, RZ_MIN(d[j], u)) + 1;
102  ul = u;
103  }
104  }
105 
106  if (distance) {
107  *distance = d[lb];
108  }
109  if (similarity) {
110  *similarity = length ? 1.0 - (double)d[lb] / length : 1.0;
111  }
112  free(d);
113  return true;
114 }
lzma_index ** i
Definition: index.h:629
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
uint32_t ut32
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
uint8_t ut8
Definition: lh5801.h:11
void * malloc(size_t size)
Definition: malloc.c:123
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
#define RZ_MIN(x, y)
#define RZ_MAX(x, y)
#define d(i)
Definition: sha256.c:44
#define b(i)
Definition: sha256.c:42
#define a(i)
Definition: sha256.c:41
#define SIZE_MAX

References a, b, d, free(), i, length, malloc(), RZ_MAX, RZ_MIN, rz_return_val_if_fail, and SIZE_MAX.

Referenced by rz_analysis_diff_bb(), rz_analysis_diff_fcn(), and rz_diff_calculate_distance().

◆ rz_diff_myers_distance()

RZ_API bool rz_diff_myers_distance ( RZ_NONNULL const ut8 a,
ut32  la,
RZ_NONNULL const ut8 b,
ut32  lb,
RZ_NULLABLE ut32 distance,
RZ_NULLABLE double *  similarity 
)

Calculates the distance between two buffers using the Myers algorithm.

Calculates the distance between two buffers using the Eugene W. Myers' O(ND) diff algorithm.

  • distance: is the minimum number of edits needed to transform A into B
  • similarity: is a number that defines how similar/identical the 2 buffers are.

Definition at line 14 of file distance.c.

14  {
15  rz_return_val_if_fail(a && b, false);
16 
17  const ut32 length = la + lb;
18  const ut8 *ea = a + la, *eb = b + lb;
19 
20  for (; a < ea && b < eb && *a == *b; a++, b++) {
21  }
22  for (; a < ea && b < eb && ea[-1] == eb[-1]; ea--, eb--) {
23  }
24  la = ea - a;
25  lb = eb - b;
26  ut32 *v0, *v;
27  st64 m = (st64)la + lb, di = 0, low, high, i, x, y;
28  if (m + 2 > SIZE_MAX / sizeof(st64) || !(v0 = malloc((m + 2) * sizeof(ut32)))) {
29  return false;
30  }
31  v = v0 + lb;
32  v[1] = 0;
33  for (di = 0; di <= m; di++) {
34  low = -di + 2 * RZ_MAX(0, di - (st64)lb);
35  high = di - 2 * RZ_MAX(0, di - (st64)la);
36  for (i = low; i <= high; i += 2) {
37  x = i == -di || (i != di && v[i - 1] < v[i + 1]) ? v[i + 1] : v[i - 1] + 1;
38  y = x - i;
39  while (x < la && y < lb && a[x] == b[y]) {
40  x++;
41  y++;
42  }
43  v[i] = x;
44  if (x == la && y == lb) {
45  goto out;
46  }
47  }
48  }
49 
50 out:
51  free(v0);
52  if (distance) {
53  *distance = di;
54  }
55  if (similarity) {
56  *similarity = length ? 1.0 - (double)di / length : 1.0;
57  }
58  return true;
59 }
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
const char * v
Definition: dsignal.c:12
@ v0
Definition: lanai.h:84
int x
Definition: mipsasm.c:20
#define st64
Definition: rz_types_base.h:10

References a, b, free(), i, length, regress::m, malloc(), out, RZ_MAX, rz_return_val_if_fail, SIZE_MAX, st64, v, v0, and x.

Referenced by rz_diff_calculate_distance().