Rizin
unix-like reverse engineering framework and cli tools
lz_encoder.h File Reference

LZ in window and match finder API. More...

#include "common.h"

Go to the source code of this file.

Classes

struct  lzma_match
 
struct  lzma_mf_s
 
struct  lzma_lz_options
 
struct  lzma_lz_encoder
 

Macros

#define mf_find   lzma_mf_find
 Since everything else begins with mf_, use it also for lzma_mf_find(). More...
 

Typedefs

typedef struct lzma_mf_s lzma_mf
 

Functions

static const uint8_tmf_ptr (const lzma_mf *mf)
 Get pointer to the first byte not ran through the match finder. More...
 
static uint32_t mf_avail (const lzma_mf *mf)
 Get the number of bytes that haven't been ran through the match finder yet. More...
 
static uint32_t mf_unencoded (const lzma_mf *mf)
 
static uint32_t mf_position (const lzma_mf *mf)
 
static void mf_skip (lzma_mf *mf, uint32_t amount)
 
static void mf_read (lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, size_t *left)
 
lzma_ret lzma_lz_encoder_init (lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret(*lz_init)(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options))
 
uint64_t lzma_lz_encoder_memusage (const lzma_lz_options *lz_options)
 
uint32_t lzma_mf_find (lzma_mf *mf, uint32_t *count, lzma_match *matches)
 Find matches starting from the current byte. More...
 
uint32_t lzma_mf_hc3_find (lzma_mf *dict, lzma_match *matches)
 
void lzma_mf_hc3_skip (lzma_mf *dict, uint32_t amount)
 
uint32_t lzma_mf_hc4_find (lzma_mf *dict, lzma_match *matches)
 
void lzma_mf_hc4_skip (lzma_mf *dict, uint32_t amount)
 
uint32_t lzma_mf_bt2_find (lzma_mf *dict, lzma_match *matches)
 
void lzma_mf_bt2_skip (lzma_mf *dict, uint32_t amount)
 
uint32_t lzma_mf_bt3_find (lzma_mf *dict, lzma_match *matches)
 
void lzma_mf_bt3_skip (lzma_mf *dict, uint32_t amount)
 
uint32_t lzma_mf_bt4_find (lzma_mf *dict, lzma_match *matches)
 
void lzma_mf_bt4_skip (lzma_mf *dict, uint32_t amount)
 

Detailed Description

LZ in window and match finder API.

Definition in file lz_encoder.h.

Macro Definition Documentation

◆ mf_find

#define mf_find   lzma_mf_find

Since everything else begins with mf_, use it also for lzma_mf_find().

Definition at line 259 of file lz_encoder.h.

Typedef Documentation

◆ lzma_mf

typedef struct lzma_mf_s lzma_mf

Definition at line 1 of file lz_encoder.h.

Function Documentation

◆ lzma_lz_encoder_init()

lzma_ret lzma_lz_encoder_init ( lzma_next_coder next,
const lzma_allocator allocator,
const lzma_filter_info filters,
lzma_ret(*)(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)  lz_init 
)

Definition at line 525 of file lz_encoder.c.

530 {
531 #ifdef HAVE_SMALL
532  // We need that the CRC32 table has been initialized.
533  lzma_crc32_init();
534 #endif
535 
536  // Allocate and initialize the base data structure.
537  lzma_coder *coder = next->coder;
538  if (coder == NULL) {
539  coder = lzma_alloc(sizeof(lzma_coder), allocator);
540  if (coder == NULL)
541  return LZMA_MEM_ERROR;
542 
543  next->coder = coder;
544  next->code = &lz_encode;
545  next->end = &lz_encoder_end;
546  next->update = &lz_encoder_update;
547 
548  coder->lz.coder = NULL;
549  coder->lz.code = NULL;
550  coder->lz.end = NULL;
551 
552  // mf.size is initialized to silence Valgrind
553  // when used on optimized binaries (GCC may reorder
554  // code in a way that Valgrind gets unhappy).
555  coder->mf.buffer = NULL;
556  coder->mf.size = 0;
557  coder->mf.hash = NULL;
558  coder->mf.son = NULL;
559  coder->mf.hash_count = 0;
560  coder->mf.sons_count = 0;
561 
562  coder->next = LZMA_NEXT_CODER_INIT;
563  }
564 
565  // Initialize the LZ-based encoder.
566  lzma_lz_options lz_options;
567  return_if_error(lz_init(&coder->lz, allocator,
568  filters[0].options, &lz_options));
569 
570  // Setup the size information into coder->mf and deallocate
571  // old buffers if they have wrong size.
572  if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))
573  return LZMA_OPTIONS_ERROR;
574 
575  // Allocate new buffers if needed, and do the rest of
576  // the initialization.
577  if (lz_encoder_init(&coder->mf, allocator, &lz_options))
578  return LZMA_MEM_ERROR;
579 
580  // Initialize the next filter in the chain, if any.
581  return lzma_next_filter_init(&coder->next, allocator, filters + 1);
582 }
const lzma_allocator * allocator
Definition: block.h:377
const lzma_filter * filters
Definition: container.h:315
void lzma_crc32_init(void)
Definition: crc32_small.c:41
#define NULL
Definition: cris-opc.c:27
static lzma_ret lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator, const lzma_filter *filters_null lzma_attribute((__unused__)), const lzma_filter *reversed_filters)
Definition: lz_encoder.c:507
static lzma_ret lz_encode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action)
Definition: lz_encoder.c:160
static void lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
Definition: lz_encoder.c:486
static bool lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options)
Definition: lz_encoder.c:371
static bool lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options)
Definition: lz_encoder.c:193
lzma_next_coder next
Next coder in the chain.
Definition: lz_decoder.c:33
lzma_lz_decoder lz
The actual LZ-based decoder e.g. LZMA.
Definition: lz_decoder.c:28
lzma_mf mf
History buffer and match finder.
Definition: lz_encoder.c:31
void * options
Pointer to filter-specific options structure.
Definition: filter.h:63
void * coder
Data specific to the LZ-based decoder.
Definition: lz_decoder.h:56
void(* end)(void *coder, const lzma_allocator *allocator)
Free allocated resources.
Definition: lz_decoder.h:69
lzma_ret(* code)(void *coder, lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size)
Function to decode from in[] to *dict.
Definition: lz_decoder.h:59
uint32_t hash_count
Number of elements in hash[].
Definition: lz_encoder.h:122
uint8_t * buffer
Pointer to buffer with data to be compressed.
Definition: lz_encoder.h:35
uint32_t * son
Definition: lz_encoder.h:100
uint32_t * hash
Definition: lz_encoder.h:99
uint32_t sons_count
Number of elements in son[].
Definition: lz_encoder.h:125
uint32_t size
Definition: lz_encoder.h:39
lzma_code_function code
Pointer to function to do the actual coding.
Definition: common.h:150
void * coder
Pointer to coder-specific data.
Definition: common.h:137
lzma_end_function end
Definition: common.h:155
lzma_ret(* update)(void *coder, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters)
Definition: common.h:173
#define LZMA_NEXT_CODER_INIT
Macro to initialize lzma_next_coder structure.
Definition: common.h:180
#define return_if_error(expr)
Return if expression doesn't evaluate to LZMA_OK.
Definition: common.h:278
void * lzma_alloc(size_t size, const lzma_allocator *allocator) lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
Allocates memory.
@ LZMA_MEM_ERROR
Cannot allocate memory.
Definition: base.h:128
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160
lzma_ret lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters)
Definition: common.c:116

References allocator, lzma_mf_s::buffer, lzma_next_coder_s::code, lzma_lz_decoder::code, lzma_next_coder_s::coder, lzma_lz_decoder::coder, lzma_next_coder_s::end, lzma_lz_decoder::end, filters, lzma_mf_s::hash, lzma_mf_s::hash_count, lzma_coder::lz, lz_encode(), lz_encoder_end(), lz_encoder_init(), lz_encoder_prepare(), lz_encoder_update(), lzma_alloc(), lzma_crc32_init(), LZMA_MEM_ERROR, LZMA_NEXT_CODER_INIT, lzma_next_filter_init(), LZMA_OPTIONS_ERROR, lzma_coder::mf, lzma_coder::next, NULL, lzma_filter::options, return_if_error, lzma_mf_s::size, lzma_mf_s::son, lzma_mf_s::sons_count, and lzma_next_coder_s::update.

Referenced by lzma_lzma2_encoder_init(), and lzma_lzma_encoder_init().

◆ lzma_lz_encoder_memusage()

uint64_t lzma_lz_encoder_memusage ( const lzma_lz_options lz_options)

Definition at line 464 of file lz_encoder.c.

465 {
466  // Old buffers must not exist when calling lz_encoder_prepare().
467  lzma_mf mf = {
468  .buffer = NULL,
469  .hash = NULL,
470  .son = NULL,
471  .hash_count = 0,
472  .sons_count = 0,
473  };
474 
475  // Setup the size information into mf.
476  if (lz_encoder_prepare(&mf, NULL, lz_options))
477  return UINT64_MAX;
478 
479  // Calculate the memory usage.
480  return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
481  + mf.size + sizeof(lzma_coder);
482 }
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
#define UINT64_MAX

References lzma_mf_s::buffer, lzma_mf_s::hash_count, lz_encoder_prepare(), NULL, lzma_mf_s::size, lzma_mf_s::sons_count, and UINT64_MAX.

Referenced by lzma_lzma_encoder_memusage().

◆ lzma_mf_bt2_find()

uint32_t lzma_mf_bt2_find ( lzma_mf dict,
lzma_match matches 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_bt2_skip()

void lzma_mf_bt2_skip ( lzma_mf dict,
uint32_t  amount 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_bt3_find()

uint32_t lzma_mf_bt3_find ( lzma_mf dict,
lzma_match matches 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_bt3_skip()

void lzma_mf_bt3_skip ( lzma_mf dict,
uint32_t  amount 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_bt4_find()

uint32_t lzma_mf_bt4_find ( lzma_mf dict,
lzma_match matches 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_bt4_skip()

void lzma_mf_bt4_skip ( lzma_mf dict,
uint32_t  amount 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_find()

uint32_t lzma_mf_find ( lzma_mf mf,
uint32_t count_ptr,
lzma_match matches 
)

Find matches starting from the current byte.

Returns
The length of the longest match found

Definition at line 23 of file lz_encoder_mf.c.

24 {
25  // Call the match finder. It returns the number of length-distance
26  // pairs found.
27  // FIXME: Minimum count is zero, what _exactly_ is the maximum?
28  const uint32_t count = mf->find(mf, matches);
29 
30  // Length of the longest match; assume that no matches were found
31  // and thus the maximum length is zero.
32  uint32_t len_best = 0;
33 
34  if (count > 0) {
35 #ifndef NDEBUG
36  // Validate the matches.
37  for (uint32_t i = 0; i < count; ++i) {
38  assert(matches[i].len <= mf->nice_len);
39  assert(matches[i].dist < mf->read_pos);
40  assert(memcmp(mf_ptr(mf) - 1,
41  mf_ptr(mf) - matches[i].dist - 2,
42  matches[i].len) == 0);
43  }
44 #endif
45 
46  // The last used element in the array contains
47  // the longest match.
48  len_best = matches[count - 1].len;
49 
50  // If a match of maximum search length was found, try to
51  // extend the match to maximum possible length.
52  if (len_best == mf->nice_len) {
53  // The limit for the match length is either the
54  // maximum match length supported by the LZ-based
55  // encoder or the number of bytes left in the
56  // dictionary, whichever is smaller.
57  uint32_t limit = mf_avail(mf) + 1;
58  if (limit > mf->match_len_max)
59  limit = mf->match_len_max;
60 
61  // Pointer to the byte we just ran through
62  // the match finder.
63  const uint8_t *p1 = mf_ptr(mf) - 1;
64 
65  // Pointer to the beginning of the match. We need -1
66  // here because the match distances are zero based.
67  const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
68 
69  len_best = lzma_memcmplen(p1, p2, len_best, limit);
70  }
71  }
72 
73  *count_ptr = count;
74 
75  // Finally update the read position to indicate that match finder was
76  // run for this dictionary offset.
77  ++mf->read_ahead;
78 
79  return len_best;
80 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
static uint32_t mf_avail(const lzma_mf *mf)
Get the number of bytes that haven't been ran through the match finder yet.
Definition: lz_encoder.h:229
static const uint8_t * mf_ptr(const lzma_mf *mf)
Get pointer to the first byte not ran through the match finder.
Definition: lz_encoder.h:221
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
unsigned char uint8_t
Definition: sftypes.h:31
uint32_t len
Definition: lz_encoder.h:23
uint32_t dist
Definition: lz_encoder.h:24
uint32_t match_len_max
Definition: lz_encoder.h:114
uint32_t(* find)(lzma_mf *mf, lzma_match *matches)
Definition: lz_encoder.h:92
uint32_t nice_len
Maximum length of a match that the match finder will try to find.
Definition: lz_encoder.h:109
uint32_t read_ahead
Definition: lz_encoder.h:67

References assert(), count, lzma_match::dist, lzma_mf_s::find, i, len, lzma_match::len, limit, lzma_mf_s::match_len_max, mf_avail(), mf_ptr(), lzma_mf_s::nice_len, and lzma_mf_s::read_ahead.

◆ lzma_mf_hc3_find()

uint32_t lzma_mf_hc3_find ( lzma_mf dict,
lzma_match matches 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_hc3_skip()

void lzma_mf_hc3_skip ( lzma_mf dict,
uint32_t  amount 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_hc4_find()

uint32_t lzma_mf_hc4_find ( lzma_mf dict,
lzma_match matches 
)

Referenced by lz_encoder_prepare().

◆ lzma_mf_hc4_skip()

void lzma_mf_hc4_skip ( lzma_mf dict,
uint32_t  amount 
)

Referenced by lz_encoder_prepare().

◆ mf_avail()

static uint32_t mf_avail ( const lzma_mf mf)
inlinestatic

Get the number of bytes that haven't been ran through the match finder yet.

Definition at line 229 of file lz_encoder.h.

230 {
231  return mf->write_pos - mf->read_pos;
232 }
uint32_t read_pos
Definition: lz_encoder.h:63
uint32_t write_pos
Definition: lz_encoder.h:80

References lzma_mf_s::read_pos, and lzma_mf_s::write_pos.

Referenced by helper1(), lzma_lzma_optimum_fast(), lzma_lzma_optimum_normal(), and lzma_mf_find().

◆ mf_position()

static uint32_t mf_position ( const lzma_mf mf)
inlinestatic

Calculate the absolute offset from the beginning of the most recent dictionary reset. Only the lowest four bits are important, so there's no problem that we don't know the 64-bit size of the data encoded so far.

NOTE: When moving the input window, we need to do it so that the lowest bits of dict->read_pos are not modified to keep this macro working as intended.

Definition at line 252 of file lz_encoder.h.

253 {
254  return mf->read_pos - mf->read_ahead;
255 }

References lzma_mf_s::read_ahead, and lzma_mf_s::read_pos.

Referenced by encode_init(), and lzma_lzma_encode().

◆ mf_ptr()

static const uint8_t* mf_ptr ( const lzma_mf mf)
inlinestatic

Get pointer to the first byte not ran through the match finder.

Definition at line 221 of file lz_encoder.h.

222 {
223  return mf->buffer + mf->read_pos;
224 }

References lzma_mf_s::buffer, and lzma_mf_s::read_pos.

Referenced by helper1(), lzma_lzma_optimum_fast(), lzma_lzma_optimum_normal(), and lzma_mf_find().

◆ mf_read()

static void mf_read ( lzma_mf mf,
uint8_t out,
size_t out_pos,
size_t  out_size,
size_t left 
)
inlinestatic

Copies at most *left number of bytes from the history buffer to out[]. This is needed by LZMA2 to encode uncompressed chunks.

Definition at line 279 of file lz_encoder.h.

281 {
282  const size_t out_avail = out_size - *out_pos;
283  const size_t copy_size = my_min(out_avail, *left);
284 
285  assert(mf->read_ahead == 0);
286  assert(mf->read_pos >= *left);
287 
288  memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left,
289  copy_size);
290 
291  *out_pos += copy_size;
292  *left -= copy_size;
293  return;
294 }
const lzma_allocator const uint8_t size_t uint8_t size_t * out_pos
Definition: block.h:528
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
#define my_min(x, y)
Definition: sysdefs.h:185

References assert(), lzma_mf_s::buffer, memcpy(), my_min, out, out_pos, lzma_mf_s::read_ahead, and lzma_mf_s::read_pos.

Referenced by lzma2_encode().

◆ mf_skip()

static void mf_skip ( lzma_mf mf,
uint32_t  amount 
)
inlinestatic

Skip the given number of bytes. This is used when a good match was found. For example, if mf_find() finds a match of 200 bytes long, the first byte of that match was already consumed by mf_find(), and the rest 199 bytes have to be skipped with mf_skip(mf, 199).

Definition at line 267 of file lz_encoder.h.

268 {
269  if (amount != 0) {
270  mf->skip(mf, amount);
271  mf->read_ahead += amount;
272  }
273 }
void(* skip)(lzma_mf *mf, uint32_t num)
Definition: lz_encoder.h:97

References lzma_mf_s::read_ahead, and lzma_mf_s::skip.

Referenced by encode_init(), helper1(), and lzma_lzma_optimum_fast().

◆ mf_unencoded()

static uint32_t mf_unencoded ( const lzma_mf mf)
inlinestatic

Get the number of bytes that haven't been encoded yet (some of these bytes may have been ran through the match finder though).

Definition at line 238 of file lz_encoder.h.

239 {
240  return mf->write_pos - mf->read_pos + mf->read_ahead;
241 }

References lzma_mf_s::read_ahead, lzma_mf_s::read_pos, and lzma_mf_s::write_pos.

Referenced by lzma2_encode().