Rizin
unix-like reverse engineering framework and cli tools
lz_encoder.h
Go to the documentation of this file.
1 //
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
11 //
13 
14 #ifndef LZMA_LZ_ENCODER_H
15 #define LZMA_LZ_ENCODER_H
16 
17 #include "common.h"
18 
19 
22 typedef struct {
25 } lzma_match;
26 
27 
28 typedef struct lzma_mf_s lzma_mf;
29 struct lzma_mf_s {
31  // In Window //
33 
36 
40 
46 
52 
59 
64 
68 
76 
81 
85 
87  // Match Finder //
89 
92  uint32_t (*find)(lzma_mf *mf, lzma_match *matches);
93 
97  void (*skip)(lzma_mf *mf, uint32_t num);
98 
102  uint32_t cyclic_size; // Must be dictionary size + 1.
104 
107 
110 
115 
120 
123 
126 };
127 
128 
129 typedef struct {
132  size_t before_size;
133 
135  size_t dict_size;
136 
139  size_t after_size;
140 
145 
148  size_t nice_len;
149 
152 
155 
157  const uint8_t *preset_dict;
158 
160 
162 
163 
164 // The total usable buffer space at any moment outside the match finder:
165 // before_size + dict_size + after_size + match_len_max
166 //
167 // In reality, there's some extra space allocated to prevent the number of
168 // memmove() calls reasonable. The bigger the dict_size is, the bigger
169 // this extra buffer will be since with bigger dictionaries memmove() would
170 // also take longer.
171 //
172 // A single encoder loop in the LZ-based encoder may call the match finder
173 // (mf_find() or mf_skip()) at most after_size times. In other words,
174 // a single encoder loop may increment lzma_mf.read_pos at most after_size
175 // times. Since matches are looked up to
176 // lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total
177 // amount of extra buffer needed after dict_size becomes
178 // after_size + match_len_max.
179 //
180 // before_size has two uses. The first one is to keep literals available
181 // in cases when the LZ-based encoder has made some read ahead.
182 // TODO: Maybe this could be changed by making the LZ-based encoders to
183 // store the actual literals as they do with length-distance pairs.
184 //
185 // Algorithms such as LZMA2 first try to compress a chunk, and then check
186 // if the encoded result is smaller than the uncompressed one. If the chunk
187 // was uncompressible, it is better to store it in uncompressed form in
188 // the output stream. To do this, the whole uncompressed chunk has to be
189 // still available in the history buffer. before_size achieves that.
190 
191 
192 typedef struct {
194  void *coder;
195 
197  lzma_ret (*code)(void *coder,
199  size_t *restrict out_pos, size_t out_size);
200 
202  void (*end)(void *coder, const lzma_allocator *allocator);
203 
205  lzma_ret (*options_update)(void *coder, const lzma_filter *filter);
206 
208 
209 
210 // Basic steps:
211 // 1. Input gets copied into the dictionary.
212 // 2. Data in dictionary gets run through the match finder byte by byte.
213 // 3. The literals and matches are encoded using e.g. LZMA.
214 //
215 // The bytes that have been ran through the match finder, but not encoded yet,
216 // are called `read ahead'.
217 
218 
220 static inline const uint8_t *
221 mf_ptr(const lzma_mf *mf)
222 {
223  return mf->buffer + mf->read_pos;
224 }
225 
226 
228 static inline uint32_t
229 mf_avail(const lzma_mf *mf)
230 {
231  return mf->write_pos - mf->read_pos;
232 }
233 
234 
237 static inline uint32_t
239 {
240  return mf->write_pos - mf->read_pos + mf->read_ahead;
241 }
242 
243 
251 static inline uint32_t
253 {
254  return mf->read_pos - mf->read_ahead;
255 }
256 
257 
259 #define mf_find lzma_mf_find
260 
261 
266 static inline void
268 {
269  if (amount != 0) {
270  mf->skip(mf, amount);
271  mf->read_ahead += amount;
272  }
273 }
274 
275 
278 static inline void
279 mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size,
280  size_t *left)
281 {
282  const size_t out_avail = out_size - *out_pos;
283  const size_t copy_size = my_min(out_avail, *left);
284 
285  assert(mf->read_ahead == 0);
286  assert(mf->read_pos >= *left);
287 
288  memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left,
289  copy_size);
290 
291  *out_pos += copy_size;
292  *left -= copy_size;
293  return;
294 }
295 
296 
299  const lzma_filter_info *filters,
300  lzma_ret (*lz_init)(lzma_lz_encoder *lz,
301  const lzma_allocator *allocator, const void *options,
302  lzma_lz_options *lz_options));
303 
304 
305 extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options);
306 
307 
308 // These are only for LZ encoder's internal use.
309 extern uint32_t lzma_mf_find(
310  lzma_mf *mf, uint32_t *count, lzma_match *matches);
311 
312 extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches);
313 extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount);
314 
315 extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches);
316 extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount);
317 
318 extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches);
319 extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount);
320 
321 extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches);
322 extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount);
323 
324 extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches);
325 extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount);
326 
327 #endif
const lzma_allocator const uint8_t size_t uint8_t size_t * out_pos
Definition: block.h:528
const lzma_allocator * allocator
Definition: block.h:377
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
const lzma_filter * filters
Definition: container.h:315
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
#define restrict
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode static oldfd struct tms static buf static getgid static geteuid const char static filename static arg static mask struct ustat static ubuf static getppid static setsid static egid sigset_t static set struct timeval struct timezone static tz fd_set fd_set fd_set struct timeval static timeout const char char static bufsiz const char static swapflags void static offset const char static length static mode static who const char struct statfs static buf unsigned unsigned num
Definition: sflib.h:126
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
static void mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, size_t *left)
Definition: lz_encoder.h:279
void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount)
uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches)
uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches)
static uint32_t mf_avail(const lzma_mf *mf)
Get the number of bytes that haven't been ran through the match finder yet.
Definition: lz_encoder.h:229
uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches)
static uint32_t mf_unencoded(const lzma_mf *mf)
Definition: lz_encoder.h:238
static const uint8_t * mf_ptr(const lzma_mf *mf)
Get pointer to the first byte not ran through the match finder.
Definition: lz_encoder.h:221
uint32_t lzma_mf_find(lzma_mf *mf, uint32_t *count, lzma_match *matches)
Find matches starting from the current byte.
Definition: lz_encoder_mf.c:23
uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches)
void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount)
uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
Definition: lz_encoder.c:464
static uint32_t mf_position(const lzma_mf *mf)
Definition: lz_encoder.h:252
void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount)
static void mf_skip(lzma_mf *mf, uint32_t amount)
Definition: lz_encoder.h:267
void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount)
lzma_ret lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret(*lz_init)(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options))
Definition: lz_encoder.c:525
void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount)
uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches)
lzma_match_finder
Match finders.
Definition: lzma12.h:58
assert(limit<=UINT32_MAX/2)
static bool filter(RzParse *p, ut64 addr, RzFlag *f, RzAnalysisHint *hint, char *data, char *str, int len, bool big_endian)
Definition: filter.c:185
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
unsigned char uint8_t
Definition: sftypes.h:31
Definition: inftree9.h:24
Custom functions for memory handling.
Definition: base.h:372
Filter options.
Definition: filter.h:43
void * coder
Data specific to the LZ-based encoder.
Definition: lz_encoder.h:194
size_t after_size
Definition: lz_encoder.h:139
size_t match_len_max
Definition: lz_encoder.h:144
uint32_t depth
Maximum search depth.
Definition: lz_encoder.h:154
size_t before_size
Definition: lz_encoder.h:132
uint32_t preset_dict_size
Definition: lz_encoder.h:159
lzma_match_finder match_finder
Type of the match finder to use.
Definition: lz_encoder.h:151
uint32_t len
Definition: lz_encoder.h:23
uint32_t dist
Definition: lz_encoder.h:24
uint32_t hash_count
Number of elements in hash[].
Definition: lz_encoder.h:122
uint32_t read_pos
Definition: lz_encoder.h:63
uint32_t offset
Definition: lz_encoder.h:58
uint32_t cyclic_size
Definition: lz_encoder.h:102
uint8_t * buffer
Pointer to buffer with data to be compressed.
Definition: lz_encoder.h:35
uint32_t hash_mask
Definition: lz_encoder.h:103
lzma_action action
Definition: lz_encoder.h:119
uint32_t * son
Definition: lz_encoder.h:100
uint32_t depth
Maximum number of loops in the match finder.
Definition: lz_encoder.h:106
uint32_t match_len_max
Definition: lz_encoder.h:114
uint32_t cyclic_pos
Definition: lz_encoder.h:101
uint32_t keep_size_after
Definition: lz_encoder.h:51
uint32_t(* find)(lzma_mf *mf, lzma_match *matches)
Definition: lz_encoder.h:92
uint32_t read_limit
Definition: lz_encoder.h:75
uint32_t nice_len
Maximum length of a match that the match finder will try to find.
Definition: lz_encoder.h:109
uint32_t * hash
Definition: lz_encoder.h:99
uint32_t read_ahead
Definition: lz_encoder.h:67
uint32_t sons_count
Number of elements in son[].
Definition: lz_encoder.h:125
void(* skip)(lzma_mf *mf, uint32_t num)
Definition: lz_encoder.h:97
uint32_t keep_size_before
Definition: lz_encoder.h:45
uint32_t pending
Definition: lz_encoder.h:84
uint32_t write_pos
Definition: lz_encoder.h:80
uint32_t size
Definition: lz_encoder.h:39
Hold data and function pointers of the next filter in the chain.
Definition: common.h:135
#define my_min(x, y)
Definition: sysdefs.h:185
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
lzma_action
The ‘action’ argument for lzma_code()
Definition: base.h:250