Rizin
unix-like reverse engineering framework and cli tools
zip_hash.c
Go to the documentation of this file.
1 /*
2  zip_hash.c -- hash table string -> uint64
3  Copyright (C) 2015-2021 Dieter Baron and Thomas Klausner
4 
5  This file is part of libzip, a library to manipulate ZIP archives.
6  The authors can be contacted at <info@libzip.org>
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11  1. Redistributions of source code must retain the above copyright
12  notice, this list of conditions and the following disclaimer.
13  2. Redistributions in binary form must reproduce the above copyright
14  notice, this list of conditions and the following disclaimer in
15  the documentation and/or other materials provided with the
16  distribution.
17  3. The names of the authors may not be used to endorse or promote
18  products derived from this software without specific prior
19  written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 #include "zipint.h"
35 #include <stdlib.h>
36 #include <string.h>
37 
38 /* parameter for the string hash function */
39 #define HASH_MULTIPLIER 33
40 #define HASH_START 5381
41 
42 /* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */
43 #define HASH_MAX_FILL .75
44 #define HASH_MIN_FILL .01
45 
46 /* but hash table size is kept between these */
47 #define HASH_MIN_SIZE 256
48 #define HASH_MAX_SIZE 0x80000000ul
49 
51  const zip_uint8_t *name;
56 };
57 typedef struct zip_hash_entry zip_hash_entry_t;
58 
59 struct zip_hash {
63 };
64 
65 
66 /* free list of entries */
67 static void
69  while (entry != NULL) {
70  zip_hash_entry_t *next = entry->next;
71  free(entry);
72  entry = next;
73  }
74 }
75 
76 
77 /* compute hash of string, full 32 bit value */
78 static zip_uint32_t
81 
82  if (name == NULL) {
83  return 0;
84  }
85 
86  while (*name != 0) {
87  value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul);
88  name++;
89  }
90 
91  return (zip_uint32_t)value;
92 }
93 
94 
95 /* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */
96 static bool
98  zip_hash_entry_t **new_table;
99 
100  if (new_size == hash->table_size) {
101  return true;
102  }
103 
104  if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) {
106  return false;
107  }
108 
109  if (hash->nentries > 0) {
110  zip_uint32_t i;
111 
112  for (i = 0; i < hash->table_size; i++) {
113  zip_hash_entry_t *entry = hash->table[i];
114  while (entry) {
115  zip_hash_entry_t *next = entry->next;
116 
117  zip_uint32_t new_index = entry->hash_value % new_size;
118 
119  entry->next = new_table[new_index];
120  new_table[new_index] = entry;
121 
122  entry = next;
123  }
124  }
125  }
126 
127  free(hash->table);
128  hash->table = new_table;
129  hash->table_size = new_size;
130 
131  return true;
132 }
133 
134 
135 static zip_uint32_t
137  double needed_size = capacity / HASH_MAX_FILL;
138  zip_uint32_t v;
139 
140  if (needed_size > ZIP_UINT32_MAX) {
141  v = ZIP_UINT32_MAX;
142  }
143  else {
144  v = (zip_uint32_t)needed_size;
145  }
146 
147  if (v > HASH_MAX_SIZE) {
148  return HASH_MAX_SIZE;
149  }
150 
151  /* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu>
152  (http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */
153 
154  v--;
155  v |= v >> 1;
156  v |= v >> 2;
157  v |= v >> 4;
158  v |= v >> 8;
159  v |= v >> 16;
160  v++;
161 
162  return v;
163 }
164 
165 
166 zip_hash_t *
168  zip_hash_t *hash;
169 
170  if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) {
172  return NULL;
173  }
174 
175  hash->table_size = 0;
176  hash->nentries = 0;
177  hash->table = NULL;
178 
179  return hash;
180 }
181 
182 
183 void
185  zip_uint32_t i;
186 
187  if (hash == NULL) {
188  return;
189  }
190 
191  if (hash->table != NULL) {
192  for (i = 0; i < hash->table_size; i++) {
193  if (hash->table[i] != NULL) {
194  free_list(hash->table[i]);
195  }
196  }
197  free(hash->table);
198  }
199  free(hash);
200 }
201 
202 
203 /* insert into hash, return error on existence or memory issues */
204 bool
206  zip_uint32_t hash_value, table_index;
208 
209  if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) {
211  return false;
212  }
213 
214  if (hash->table_size == 0) {
215  if (!hash_resize(hash, HASH_MIN_SIZE, error)) {
216  return false;
217  }
218  }
219 
220  hash_value = hash_string(name);
221  table_index = hash_value % hash->table_size;
222 
223  for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) {
224  if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
225  if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) {
227  return false;
228  }
229  else {
230  break;
231  }
232  }
233  }
234 
235  if (entry == NULL) {
236  if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) {
238  return false;
239  }
240  entry->name = name;
241  entry->next = hash->table[table_index];
242  hash->table[table_index] = entry;
243  entry->hash_value = hash_value;
244  entry->orig_index = -1;
245  hash->nentries++;
246  if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) {
247  if (!hash_resize(hash, hash->table_size * 2, error)) {
248  return false;
249  }
250  }
251  }
252 
253  if (flags & ZIP_FL_UNCHANGED) {
254  entry->orig_index = (zip_int64_t)index;
255  }
256  entry->current_index = (zip_int64_t)index;
257 
258  return true;
259 }
260 
261 
262 /* remove entry from hash, error if not found */
263 bool
265  zip_uint32_t hash_value, index;
266  zip_hash_entry_t *entry, *previous;
267 
268  if (hash == NULL || name == NULL) {
270  return false;
271  }
272 
273  if (hash->nentries > 0) {
274  hash_value = hash_string(name);
275  index = hash_value % hash->table_size;
276  previous = NULL;
277  entry = hash->table[index];
278  while (entry) {
279  if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
280  if (entry->orig_index == -1) {
281  if (previous) {
282  previous->next = entry->next;
283  }
284  else {
285  hash->table[index] = entry->next;
286  }
287  free(entry);
288  hash->nentries--;
289  if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
290  if (!hash_resize(hash, hash->table_size / 2, error)) {
291  return false;
292  }
293  }
294  }
295  else {
296  entry->current_index = -1;
297  }
298  return true;
299  }
300  previous = entry;
301  entry = entry->next;
302  }
303  }
304 
306  return false;
307 }
308 
309 
310 /* find value for entry in hash, -1 if not found */
313  zip_uint32_t hash_value, index;
315 
316  if (hash == NULL || name == NULL) {
318  return -1;
319  }
320 
321  if (hash->nentries > 0) {
322  hash_value = hash_string(name);
323  index = hash_value % hash->table_size;
324  for (entry = hash->table[index]; entry != NULL; entry = entry->next) {
325  if (strcmp((const char *)name, (const char *)entry->name) == 0) {
326  if (flags & ZIP_FL_UNCHANGED) {
327  if (entry->orig_index != -1) {
328  return entry->orig_index;
329  }
330  }
331  else {
332  if (entry->current_index != -1) {
333  return entry->current_index;
334  }
335  }
336  break;
337  }
338  }
339  }
340 
342  return -1;
343 }
344 
345 
346 bool
348  zip_uint32_t new_size;
349 
350  if (capacity == 0) {
351  return true;
352  }
353 
354  new_size = size_for_capacity(capacity);
355 
356  if (new_size <= hash->table_size) {
357  return true;
358  }
359 
360  if (!hash_resize(hash, new_size, error)) {
361  return false;
362  }
363 
364  return true;
365 }
366 
367 
368 bool
370  zip_uint32_t i;
371  zip_hash_entry_t *entry, *previous;
372 
373  for (i = 0; i < hash->table_size; i++) {
374  previous = NULL;
375  entry = hash->table[i];
376  while (entry) {
377  if (entry->orig_index == -1) {
379  if (previous) {
380  previous->next = entry->next;
381  }
382  else {
383  hash->table[i] = entry->next;
384  }
385  p = entry;
386  entry = entry->next;
387  /* previous does not change */
388  free(p);
389  hash->nentries--;
390  }
391  else {
392  entry->current_index = entry->orig_index;
393  previous = entry;
394  entry = entry->next;
395  }
396  }
397  }
398 
399  if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
400  zip_uint32_t new_size = hash->table_size / 2;
401  while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) {
402  new_size /= 2;
403  }
404  if (!hash_resize(hash, new_size, error)) {
405  return false;
406  }
407  }
408 
409  return true;
410 }
lzma_index ** i
Definition: index.h:629
static int value
Definition: cmd_api.c:93
#define NULL
Definition: cris-opc.c:27
const char * v
Definition: dsignal.c:12
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
void * p
Definition: libc.cpp:67
ZIP_EXTERN void zip_error_set(zip_error_t *_Nullable, int, int)
Definition: zip_error.c:126
#define ZIP_ER_MEMORY
Definition: zip.h:119
#define ZIP_ER_EXISTS
Definition: zip.h:115
#define ZIP_ER_NOENT
Definition: zip.h:114
#define ZIP_ER_INVAL
Definition: zip.h:123
zip_uint32_t zip_flags_t
Definition: zip.h:347
#define ZIP_FL_UNCHANGED
Definition: zip.h:79
void * malloc(size_t size)
Definition: malloc.c:123
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
const char * name
Definition: op.c:541
static struct sockaddr static addrlen static backlog const void static flags void flags
Definition: sfsocketcall.h:123
Definition: zipcmp.c:77
char * name
Definition: zipcmp.c:78
Definition: z80asm.h:102
Definition: zip.h:284
Definition: zip_hash.c:50
const zip_uint8_t * name
Definition: zip_hash.c:51
zip_uint32_t hash_value
Definition: zip_hash.c:55
zip_int64_t orig_index
Definition: zip_hash.c:52
zip_int64_t current_index
Definition: zip_hash.c:53
struct zip_hash_entry * next
Definition: zip_hash.c:54
zip_hash_entry_t ** table
Definition: zip_hash.c:62
zip_uint32_t table_size
Definition: zip_hash.c:60
zip_uint64_t nentries
Definition: zip_hash.c:61
void error(const char *msg)
Definition: untgz.c:593
bool _zip_hash_revert(zip_hash_t *hash, zip_error_t *error)
Definition: zip_hash.c:369
static zip_uint32_t hash_string(const zip_uint8_t *name)
Definition: zip_hash.c:79
bool _zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error)
Definition: zip_hash.c:264
zip_int64_t _zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error)
Definition: zip_hash.c:312
#define HASH_MAX_FILL
Definition: zip_hash.c:43
#define HASH_MIN_FILL
Definition: zip_hash.c:44
#define HASH_MIN_SIZE
Definition: zip_hash.c:47
static bool hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error)
Definition: zip_hash.c:97
#define HASH_MULTIPLIER
Definition: zip_hash.c:39
#define HASH_START
Definition: zip_hash.c:40
static void free_list(zip_hash_entry_t *entry)
Definition: zip_hash.c:68
#define HASH_MAX_SIZE
Definition: zip_hash.c:48
bool _zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error)
Definition: zip_hash.c:347
bool _zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error)
Definition: zip_hash.c:205
zip_hash_t * _zip_hash_new(zip_error_t *error)
Definition: zip_hash.c:167
static zip_uint32_t size_for_capacity(zip_uint64_t capacity)
Definition: zip_hash.c:136
void _zip_hash_free(zip_hash_t *hash)
Definition: zip_hash.c:184
uint64_t zip_uint64_t
Definition: zipconf.h:39
#define ZIP_UINT32_MAX
Definition: zipconf.h:51
uint32_t zip_uint32_t
Definition: zipconf.h:37
uint8_t zip_uint8_t
Definition: zipconf.h:33
#define ZIP_INT64_MAX
Definition: zipconf.h:54
int64_t zip_int64_t
Definition: zipconf.h:38