Rizin
unix-like reverse engineering framework and cli tools
lz_decoder.h
Go to the documentation of this file.
1 //
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
11 //
13 
14 #ifndef LZMA_LZ_DECODER_H
15 #define LZMA_LZ_DECODER_H
16 
17 #include "common.h"
18 
19 
20 typedef struct {
25 
28  size_t pos;
29 
33  size_t full;
34 
36  size_t limit;
37 
39  size_t size;
40 
42  bool need_reset;
43 
44 } lzma_dict;
45 
46 
47 typedef struct {
48  size_t dict_size;
52 
53 
54 typedef struct {
56  void *coder;
57 
59  lzma_ret (*code)(void *coder,
60  lzma_dict *restrict dict, const uint8_t *restrict in,
61  size_t *restrict in_pos, size_t in_size);
62 
63  void (*reset)(void *coder, const void *options);
64 
66  void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
67 
69  void (*end)(void *coder, const lzma_allocator *allocator);
70 
72 
73 
74 #define LZMA_LZ_DECODER_INIT \
75  (lzma_lz_decoder){ \
76  .coder = NULL, \
77  .code = NULL, \
78  .reset = NULL, \
79  .set_uncompressed = NULL, \
80  .end = NULL, \
81  }
82 
83 
87  lzma_ret (*lz_init)(lzma_lz_decoder *lz,
88  const lzma_allocator *allocator, const void *options,
89  lzma_lz_options *lz_options));
90 
91 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
92 
94  void *coder, lzma_vli uncompressed_size);
95 
96 
98 // Inline functions //
100 
102 static inline uint8_t
103 dict_get(const lzma_dict *const dict, const uint32_t distance)
104 {
105  return dict->buf[dict->pos - distance - 1
106  + (distance < dict->pos ? 0 : dict->size)];
107 }
108 
109 
111 static inline bool
112 dict_is_empty(const lzma_dict *const dict)
113 {
114  return dict->full == 0;
115 }
116 
117 
119 static inline bool
120 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
121 {
122  return dict->full > distance;
123 }
124 
125 
127 static inline bool
129 {
130  // Don't write past the end of the dictionary.
131  const size_t dict_avail = dict->limit - dict->pos;
132  uint32_t left = my_min(dict_avail, *len);
133  *len -= left;
134 
135  // Repeat a block of data from the history. Because memcpy() is faster
136  // than copying byte by byte in a loop, the copying process gets split
137  // into three cases.
138  if (distance < left) {
139  // Source and target areas overlap, thus we can't use
140  // memcpy() nor even memmove() safely.
141  do {
142  dict->buf[dict->pos] = dict_get(dict, distance);
143  ++dict->pos;
144  } while (--left > 0);
145 
146  } else if (distance < dict->pos) {
147  // The easiest and fastest case
148  memcpy(dict->buf + dict->pos,
149  dict->buf + dict->pos - distance - 1,
150  left);
151  dict->pos += left;
152 
153  } else {
154  // The bigger the dictionary, the more rare this
155  // case occurs. We need to "wrap" the dict, thus
156  // we might need two memcpy() to copy all the data.
157  assert(dict->full == dict->size);
158  const uint32_t copy_pos
159  = dict->pos - distance - 1 + dict->size;
160  uint32_t copy_size = dict->size - copy_pos;
161 
162  if (copy_size < left) {
163  memmove(dict->buf + dict->pos, dict->buf + copy_pos,
164  copy_size);
165  dict->pos += copy_size;
166  copy_size = left - copy_size;
167  memcpy(dict->buf + dict->pos, dict->buf, copy_size);
168  dict->pos += copy_size;
169  } else {
170  memmove(dict->buf + dict->pos, dict->buf + copy_pos,
171  left);
172  dict->pos += left;
173  }
174  }
175 
176  // Update how full the dictionary is.
177  if (dict->full < dict->pos)
178  dict->full = dict->pos;
179 
180  return unlikely(*len != 0);
181 }
182 
183 
186 static inline bool
188 {
189  if (unlikely(dict->pos == dict->limit))
190  return true;
191 
192  dict->buf[dict->pos++] = byte;
193 
194  if (dict->pos > dict->full)
195  dict->full = dict->pos;
196 
197  return false;
198 }
199 
200 
202 static inline void
204  size_t *restrict in_pos, size_t in_size,
205  size_t *restrict left)
206 {
207  // NOTE: If we are being given more data than the size of the
208  // dictionary, it could be possible to optimize the LZ decoder
209  // so that not everything needs to go through the dictionary.
210  // This shouldn't be very common thing in practice though, and
211  // the slowdown of one extra memcpy() isn't bad compared to how
212  // much time it would have taken if the data were compressed.
213 
214  if (in_size - *in_pos > *left)
215  in_size = *in_pos + *left;
216 
217  *left -= lzma_bufcpy(in, in_pos, in_size,
218  dict->buf, &dict->pos, dict->limit);
219 
220  if (dict->pos > dict->full)
221  dict->full = dict->pos;
222 
223  return;
224 }
225 
226 
227 static inline void
229 {
230  dict->need_reset = true;
231  return;
232 }
233 
234 #endif
size_t len
Definition: 6502dis.c:15
const lzma_allocator const uint8_t size_t * in_pos
Definition: block.h:579
const lzma_allocator const uint8_t size_t in_size
Definition: block.h:527
const lzma_allocator * allocator
Definition: block.h:377
const lzma_allocator const uint8_t * in
Definition: block.h:527
const lzma_filter * filters
Definition: container.h:315
#define restrict
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
#define unlikely(expr)
Definition: lz4.c:177
lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret(*lz_init)(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options))
Definition: lz_decoder.c:212
static bool dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
Repeat *len bytes at distance.
Definition: lz_decoder.h:128
static bool dict_put(lzma_dict *dict, uint8_t byte)
Definition: lz_decoder.h:187
static uint8_t dict_get(const lzma_dict *const dict, const uint32_t distance)
Get a byte from the history buffer.
Definition: lz_decoder.h:103
static bool dict_is_empty(const lzma_dict *const dict)
Test if dictionary is empty.
Definition: lz_decoder.h:112
static bool dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
Validate the match distance.
Definition: lz_decoder.h:120
static void dict_reset(lzma_dict *dict)
Definition: lz_decoder.h:228
uint64_t lzma_lz_decoder_memusage(size_t dictionary_size)
Definition: lz_decoder.c:300
void lzma_lz_decoder_uncompressed(void *coder, lzma_vli uncompressed_size)
Definition: lz_decoder.c:307
static void dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, size_t *restrict left)
Copies arbitrary amount of data into the dictionary.
Definition: lz_decoder.h:203
assert(limit<=UINT32_MAX/2)
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
unsigned char uint8_t
Definition: sftypes.h:31
Definition: inftree9.h:24
Custom functions for memory handling.
Definition: base.h:372
bool need_reset
True when dictionary should be reset before decoding more data.
Definition: lz_decoder.h:42
size_t size
Size of the dictionary.
Definition: lz_decoder.h:39
size_t limit
Write limit.
Definition: lz_decoder.h:36
uint8_t * buf
Definition: lz_decoder.h:24
size_t full
Definition: lz_decoder.h:33
size_t pos
Definition: lz_decoder.h:28
void * coder
Data specific to the LZ-based decoder.
Definition: lz_decoder.h:56
const uint8_t * preset_dict
TODO: Comment.
Definition: lz_decoder.h:49
size_t preset_dict_size
Definition: lz_decoder.h:50
size_t dict_size
Size of the history buffer.
Definition: lz_decoder.h:48
Hold data and function pointers of the next filter in the chain.
Definition: common.h:135
int pos
Definition: main.c:11
uint64_t uncompressed_size
Definition: list.c:106
#define my_min(x, y)
Definition: sysdefs.h:185
uint64_t lzma_vli
Variable-length integer type.
Definition: vli.h:63
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size)
Definition: common.c:94