Rizin
unix-like reverse engineering framework and cli tools
distance.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2017 Fangrui Song <i@maskray.me>
2 // SPDX-FileCopyrightText: 2016 NikolaiHampton <nikolaih@3583bytesready.net>
3 // SPDX-License-Identifier: LGPL-3.0-only
4 #include <rz_diff.h>
5 #include <rz_util/rz_assert.h>
6 
14 RZ_API bool rz_diff_myers_distance(RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity) {
15  rz_return_val_if_fail(a && b, false);
16 
17  const ut32 length = la + lb;
18  const ut8 *ea = a + la, *eb = b + lb;
19 
20  for (; a < ea && b < eb && *a == *b; a++, b++) {
21  }
22  for (; a < ea && b < eb && ea[-1] == eb[-1]; ea--, eb--) {
23  }
24  la = ea - a;
25  lb = eb - b;
26  ut32 *v0, *v;
27  st64 m = (st64)la + lb, di = 0, low, high, i, x, y;
28  if (m + 2 > SIZE_MAX / sizeof(st64) || !(v0 = malloc((m + 2) * sizeof(ut32)))) {
29  return false;
30  }
31  v = v0 + lb;
32  v[1] = 0;
33  for (di = 0; di <= m; di++) {
34  low = -di + 2 * RZ_MAX(0, di - (st64)lb);
35  high = di - 2 * RZ_MAX(0, di - (st64)la);
36  for (i = low; i <= high; i += 2) {
37  x = i == -di || (i != di && v[i - 1] < v[i + 1]) ? v[i + 1] : v[i - 1] + 1;
38  y = x - i;
39  while (x < la && y < lb && a[x] == b[y]) {
40  x++;
41  y++;
42  }
43  v[i] = x;
44  if (x == la && y == lb) {
45  goto out;
46  }
47  }
48  }
49 
50 out:
51  free(v0);
52  if (distance) {
53  *distance = di;
54  }
55  if (similarity) {
56  *similarity = length ? 1.0 - (double)di / length : 1.0;
57  }
58  return true;
59 }
60 
68 RZ_API bool rz_diff_levenstein_distance(RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity) {
69  rz_return_val_if_fail(a && b, false);
70 
71  const ut32 length = RZ_MAX(la, lb);
72  const ut8 *ea = a + la, *eb = b + lb, *t;
73  ut32 *d, i, j;
74 
75  for (; a < ea && b < eb && *a == *b; a++, b++) {
76  }
77  for (; a < ea && b < eb && ea[-1] == eb[-1]; ea--, eb--) {
78  }
79  la = ea - a;
80  lb = eb - b;
81  if (la < lb) {
82  i = la;
83  la = lb;
84  lb = i;
85  t = a;
86  a = b;
87  b = t;
88  }
89 
90  if (sizeof(ut32) > SIZE_MAX / (lb + 1) || !(d = malloc((lb + 1) * sizeof(ut32)))) {
91  return false;
92  }
93  for (i = 0; i <= lb; i++) {
94  d[i] = i;
95  }
96  for (i = 0; i < la; i++) {
97  ut32 ul = d[0];
98  d[0] = i + 1;
99  for (j = 0; j < lb; j++) {
100  ut32 u = d[j + 1];
101  d[j + 1] = a[i] == b[j] ? ul : RZ_MIN(ul, RZ_MIN(d[j], u)) + 1;
102  ul = u;
103  }
104  }
105 
106  if (distance) {
107  *distance = d[lb];
108  }
109  if (similarity) {
110  *similarity = length ? 1.0 - (double)d[lb] / length : 1.0;
111  }
112  free(d);
113  return true;
114 }
lzma_index ** i
Definition: index.h:629
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
#define RZ_API
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
uint32_t ut32
RZ_API bool rz_diff_myers_distance(RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity)
Calculates the distance between two buffers using the Myers algorithm.
Definition: distance.c:14
RZ_API bool rz_diff_levenstein_distance(RZ_NONNULL const ut8 *a, ut32 la, RZ_NONNULL const ut8 *b, ut32 lb, RZ_NULLABLE ut32 *distance, RZ_NULLABLE double *similarity)
Calculates the distance between two buffers using the Levenshtein algorithm.
Definition: distance.c:68
const char * v
Definition: dsignal.c:12
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
@ v0
Definition: lanai.h:84
uint8_t ut8
Definition: lh5801.h:11
void * malloc(size_t size)
Definition: malloc.c:123
int x
Definition: mipsasm.c:20
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
#define RZ_NULLABLE
Definition: rz_types.h:65
#define RZ_NONNULL
Definition: rz_types.h:64
#define RZ_MIN(x, y)
#define st64
Definition: rz_types_base.h:10
#define RZ_MAX(x, y)
#define d(i)
Definition: sha256.c:44
#define b(i)
Definition: sha256.c:42
#define a(i)
Definition: sha256.c:41
#define SIZE_MAX