Rizin
unix-like reverse engineering framework and cli tools
lzma_encoder_private.h File Reference

Private definitions for LZMA encoder. More...

#include "lz_encoder.h"
#include "range_encoder.h"
#include "lzma_common.h"
#include "lzma_encoder.h"

Go to the source code of this file.

Classes

struct  lzma_length_encoder
 
struct  lzma_optimal
 
struct  lzma_lzma1_encoder_s
 

Macros

#define not_equal_16(a, b)    ((a)[0] != (b)[0] || (a)[1] != (b)[1])
 
#define OPTS   (1 << 12)
 

Functions

void lzma_lzma_optimum_fast (lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res)
 
void lzma_lzma_optimum_normal (lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res, uint32_t position)
 

Detailed Description

Private definitions for LZMA encoder.

Definition in file lzma_encoder_private.h.

Macro Definition Documentation

◆ not_equal_16

#define not_equal_16 (   a,
  b 
)     ((a)[0] != (b)[0] || (a)[1] != (b)[1])

Definition at line 30 of file lzma_encoder_private.h.

◆ OPTS

#define OPTS   (1 << 12)

Definition at line 36 of file lzma_encoder_private.h.

Function Documentation

◆ lzma_lzma_optimum_fast()

void lzma_lzma_optimum_fast ( lzma_lzma1_encoder *restrict  coder,
lzma_mf *restrict  mf,
uint32_t *restrict  back_res,
uint32_t *restrict  len_res 
)

Definition at line 21 of file lzma_encoder_optimum_fast.c.

24 {
25  const uint32_t nice_len = mf->nice_len;
26 
27  uint32_t len_main;
28  uint32_t matches_count;
29  if (mf->read_ahead == 0) {
30  len_main = mf_find(mf, &matches_count, coder->matches);
31  } else {
32  assert(mf->read_ahead == 1);
33  len_main = coder->longest_match_length;
34  matches_count = coder->matches_count;
35  }
36 
37  const uint8_t *buf = mf_ptr(mf) - 1;
38  const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
39 
40  if (buf_avail < 2) {
41  // There's not enough input left to encode a match.
42  *back_res = UINT32_MAX;
43  *len_res = 1;
44  return;
45  }
46 
47  // Look for repeated matches; scan the previous four match distances
48  uint32_t rep_len = 0;
49  uint32_t rep_index = 0;
50 
51  for (uint32_t i = 0; i < REPS; ++i) {
52  // Pointer to the beginning of the match candidate
53  const uint8_t *const buf_back = buf - coder->reps[i] - 1;
54 
55  // If the first two bytes (2 == MATCH_LEN_MIN) do not match,
56  // this rep is not useful.
57  if (not_equal_16(buf, buf_back))
58  continue;
59 
60  // The first two bytes matched.
61  // Calculate the length of the match.
62  const uint32_t len = lzma_memcmplen(
63  buf, buf_back, 2, buf_avail);
64 
65  // If we have found a repeated match that is at least
66  // nice_len long, return it immediately.
67  if (len >= nice_len) {
68  *back_res = i;
69  *len_res = len;
70  mf_skip(mf, len - 1);
71  return;
72  }
73 
74  if (len > rep_len) {
75  rep_index = i;
76  rep_len = len;
77  }
78  }
79 
80  // We didn't find a long enough repeated match. Encode it as a normal
81  // match if the match length is at least nice_len.
82  if (len_main >= nice_len) {
83  *back_res = coder->matches[matches_count - 1].dist + REPS;
84  *len_res = len_main;
85  mf_skip(mf, len_main - 1);
86  return;
87  }
88 
89  uint32_t back_main = 0;
90  if (len_main >= 2) {
91  back_main = coder->matches[matches_count - 1].dist;
92 
93  while (matches_count > 1 && len_main ==
94  coder->matches[matches_count - 2].len + 1) {
95  if (!change_pair(coder->matches[
96  matches_count - 2].dist,
97  back_main))
98  break;
99 
100  --matches_count;
101  len_main = coder->matches[matches_count - 1].len;
102  back_main = coder->matches[matches_count - 1].dist;
103  }
104 
105  if (len_main == 2 && back_main >= 0x80)
106  len_main = 1;
107  }
108 
109  if (rep_len >= 2) {
110  if (rep_len + 1 >= len_main
111  || (rep_len + 2 >= len_main
112  && back_main > (UINT32_C(1) << 9))
113  || (rep_len + 3 >= len_main
114  && back_main > (UINT32_C(1) << 15))) {
115  *back_res = rep_index;
116  *len_res = rep_len;
117  mf_skip(mf, rep_len - 1);
118  return;
119  }
120  }
121 
122  if (len_main < 2 || buf_avail <= 2) {
123  *back_res = UINT32_MAX;
124  *len_res = 1;
125  return;
126  }
127 
128  // Get the matches for the next byte. If we find a better match,
129  // the current byte is encoded as a literal.
130  coder->longest_match_length = mf_find(mf,
131  &coder->matches_count, coder->matches);
132 
133  if (coder->longest_match_length >= 2) {
134  const uint32_t new_dist = coder->matches[
135  coder->matches_count - 1].dist;
136 
137  if ((coder->longest_match_length >= len_main
138  && new_dist < back_main)
139  || (coder->longest_match_length == len_main + 1
140  && !change_pair(back_main, new_dist))
141  || (coder->longest_match_length > len_main + 1)
142  || (coder->longest_match_length + 1 >= len_main
143  && len_main >= 3
144  && change_pair(new_dist, back_main))) {
145  *back_res = UINT32_MAX;
146  *len_res = 1;
147  return;
148  }
149  }
150 
151  // In contrast to LZMA SDK, dictionary could not have been moved
152  // between mf_find() calls, thus it is safe to just increment
153  // the old buf pointer instead of recalculating it with mf_ptr().
154  ++buf;
155 
156  const uint32_t limit = my_max(2, len_main - 1);
157 
158  for (uint32_t i = 0; i < REPS; ++i) {
159  if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
160  *back_res = UINT32_MAX;
161  *len_res = 1;
162  return;
163  }
164  }
165 
166  *back_res = back_main + REPS;
167  *len_res = len_main;
168  mf_skip(mf, len_main - 2);
169  return;
170 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
voidpf void * buf
Definition: ioapi.h:138
static uint32_t mf_avail(const lzma_mf *mf)
Get the number of bytes that haven't been ran through the match finder yet.
Definition: lz_encoder.h:229
static const uint8_t * mf_ptr(const lzma_mf *mf)
Get pointer to the first byte not ran through the match finder.
Definition: lz_encoder.h:221
static void mf_skip(lzma_mf *mf, uint32_t amount)
Definition: lz_encoder.h:267
#define mf_find
Since everything else begins with mf_, use it also for lzma_mf_find().
Definition: lz_encoder.h:259
#define MATCH_LEN_MAX
Definition: lzma_common.h:168
#define REPS
Definition: lzma_common.h:223
#define change_pair(small_dist, big_dist)
#define not_equal_16(a, b)
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT32_C(val)
#define UINT32_MAX
uint32_t reps[REPS]
The four most recent match distances.
lzma_match matches[MATCH_LEN_MAX+1]
Array of match candidates.
uint32_t matches_count
Number of match candidates in matches[].
uint32_t len
Definition: lz_encoder.h:23
uint32_t dist
Definition: lz_encoder.h:24
uint32_t nice_len
Maximum length of a match that the match finder will try to find.
Definition: lz_encoder.h:109
uint32_t read_ahead
Definition: lz_encoder.h:67
#define my_min(x, y)
Definition: sysdefs.h:185
#define my_max(x, y)
Definition: sysdefs.h:186

References assert(), change_pair, i, len, limit, MATCH_LEN_MAX, mf_avail(), mf_find, mf_ptr(), mf_skip(), my_max, my_min, not_equal_16, REPS, UINT32_C, and UINT32_MAX.

Referenced by lzma_lzma_encode().

◆ lzma_lzma_optimum_normal()

void lzma_lzma_optimum_normal ( lzma_lzma1_encoder *restrict  coder,
lzma_mf *restrict  mf,
uint32_t *restrict  back_res,
uint32_t *restrict  len_res,
uint32_t  position 
)

Definition at line 804 of file lzma_encoder_optimum_normal.c.

808 {
809  // If we have symbols pending, return the next pending symbol.
810  if (coder->opts_end_index != coder->opts_current_index) {
811  assert(mf->read_ahead > 0);
812  *len_res = coder->opts[coder->opts_current_index].pos_prev
813  - coder->opts_current_index;
814  *back_res = coder->opts[coder->opts_current_index].back_prev;
815  coder->opts_current_index = coder->opts[
817  return;
818  }
819 
820  // Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
821  // this was done in both initialization function and in the main loop.
822  // In liblzma they were moved into this single place.
823  if (mf->read_ahead == 0) {
824  if (coder->match_price_count >= (1 << 7))
825  fill_dist_prices(coder);
826 
827  if (coder->align_price_count >= ALIGN_SIZE)
828  fill_align_prices(coder);
829  }
830 
831  // TODO: This needs quite a bit of cleaning still. But splitting
832  // the original function into two pieces makes it at least a little
833  // more readable, since those two parts don't share many variables.
834 
835  uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
836  if (len_end == UINT32_MAX)
837  return;
838 
839  uint32_t reps[REPS];
840  memcpy(reps, coder->reps, sizeof(reps));
841 
842  uint32_t cur;
843  for (cur = 1; cur < len_end; ++cur) {
844  assert(cur < OPTS);
845 
846  coder->longest_match_length = mf_find(
847  mf, &coder->matches_count, coder->matches);
848 
849  if (coder->longest_match_length >= mf->nice_len)
850  break;
851 
852  len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
853  position + cur, cur, mf->nice_len,
854  my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
855  }
856 
857  backward(coder, len_res, back_res, cur);
858  return;
859 }
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
#define ALIGN_SIZE
Definition: lzma_common.h:218
static void fill_align_prices(lzma_lzma1_encoder *coder)
static uint32_t helper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf, uint32_t len_end, uint32_t position, const uint32_t cur, const uint32_t nice_len, const uint32_t buf_avail_full)
static void fill_dist_prices(lzma_lzma1_encoder *coder)
static uint32_t helper1(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res, uint32_t position)
static void backward(lzma_lzma1_encoder *restrict coder, uint32_t *restrict len_res, uint32_t *restrict back_res, uint32_t cur)
#define OPTS
lzma_optimal opts[OPTS]

References ALIGN_SIZE, assert(), backward(), fill_align_prices(), fill_dist_prices(), helper1(), helper2(), memcpy(), mf_avail(), mf_find, mf_ptr(), my_min, OPTS, REPS, and UINT32_MAX.

Referenced by lzma_lzma_encode().