Rizin
unix-like reverse engineering framework and cli tools
lzma_encoder_optimum_fast.c
Go to the documentation of this file.
1 //
4 //
5 // Author: Igor Pavlov
6 //
7 // This file has been put into the public domain.
8 // You can do whatever you want with this file.
9 //
11 
12 #include "lzma_encoder_private.h"
13 #include "memcmplen.h"
14 
15 
16 #define change_pair(small_dist, big_dist) \
17  (((big_dist) >> 7) > (small_dist))
18 
19 
20 extern void
22  lzma_mf *restrict mf,
23  uint32_t *restrict back_res, uint32_t *restrict len_res)
24 {
25  const uint32_t nice_len = mf->nice_len;
26 
27  uint32_t len_main;
28  uint32_t matches_count;
29  if (mf->read_ahead == 0) {
30  len_main = mf_find(mf, &matches_count, coder->matches);
31  } else {
32  assert(mf->read_ahead == 1);
33  len_main = coder->longest_match_length;
34  matches_count = coder->matches_count;
35  }
36 
37  const uint8_t *buf = mf_ptr(mf) - 1;
38  const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
39 
40  if (buf_avail < 2) {
41  // There's not enough input left to encode a match.
42  *back_res = UINT32_MAX;
43  *len_res = 1;
44  return;
45  }
46 
47  // Look for repeated matches; scan the previous four match distances
48  uint32_t rep_len = 0;
49  uint32_t rep_index = 0;
50 
51  for (uint32_t i = 0; i < REPS; ++i) {
52  // Pointer to the beginning of the match candidate
53  const uint8_t *const buf_back = buf - coder->reps[i] - 1;
54 
55  // If the first two bytes (2 == MATCH_LEN_MIN) do not match,
56  // this rep is not useful.
57  if (not_equal_16(buf, buf_back))
58  continue;
59 
60  // The first two bytes matched.
61  // Calculate the length of the match.
62  const uint32_t len = lzma_memcmplen(
63  buf, buf_back, 2, buf_avail);
64 
65  // If we have found a repeated match that is at least
66  // nice_len long, return it immediately.
67  if (len >= nice_len) {
68  *back_res = i;
69  *len_res = len;
70  mf_skip(mf, len - 1);
71  return;
72  }
73 
74  if (len > rep_len) {
75  rep_index = i;
76  rep_len = len;
77  }
78  }
79 
80  // We didn't find a long enough repeated match. Encode it as a normal
81  // match if the match length is at least nice_len.
82  if (len_main >= nice_len) {
83  *back_res = coder->matches[matches_count - 1].dist + REPS;
84  *len_res = len_main;
85  mf_skip(mf, len_main - 1);
86  return;
87  }
88 
89  uint32_t back_main = 0;
90  if (len_main >= 2) {
91  back_main = coder->matches[matches_count - 1].dist;
92 
93  while (matches_count > 1 && len_main ==
94  coder->matches[matches_count - 2].len + 1) {
95  if (!change_pair(coder->matches[
96  matches_count - 2].dist,
97  back_main))
98  break;
99 
100  --matches_count;
101  len_main = coder->matches[matches_count - 1].len;
102  back_main = coder->matches[matches_count - 1].dist;
103  }
104 
105  if (len_main == 2 && back_main >= 0x80)
106  len_main = 1;
107  }
108 
109  if (rep_len >= 2) {
110  if (rep_len + 1 >= len_main
111  || (rep_len + 2 >= len_main
112  && back_main > (UINT32_C(1) << 9))
113  || (rep_len + 3 >= len_main
114  && back_main > (UINT32_C(1) << 15))) {
115  *back_res = rep_index;
116  *len_res = rep_len;
117  mf_skip(mf, rep_len - 1);
118  return;
119  }
120  }
121 
122  if (len_main < 2 || buf_avail <= 2) {
123  *back_res = UINT32_MAX;
124  *len_res = 1;
125  return;
126  }
127 
128  // Get the matches for the next byte. If we find a better match,
129  // the current byte is encoded as a literal.
130  coder->longest_match_length = mf_find(mf,
131  &coder->matches_count, coder->matches);
132 
133  if (coder->longest_match_length >= 2) {
134  const uint32_t new_dist = coder->matches[
135  coder->matches_count - 1].dist;
136 
137  if ((coder->longest_match_length >= len_main
138  && new_dist < back_main)
139  || (coder->longest_match_length == len_main + 1
140  && !change_pair(back_main, new_dist))
141  || (coder->longest_match_length > len_main + 1)
142  || (coder->longest_match_length + 1 >= len_main
143  && len_main >= 3
144  && change_pair(new_dist, back_main))) {
145  *back_res = UINT32_MAX;
146  *len_res = 1;
147  return;
148  }
149  }
150 
151  // In contrast to LZMA SDK, dictionary could not have been moved
152  // between mf_find() calls, thus it is safe to just increment
153  // the old buf pointer instead of recalculating it with mf_ptr().
154  ++buf;
155 
156  const uint32_t limit = my_max(2, len_main - 1);
157 
158  for (uint32_t i = 0; i < REPS; ++i) {
159  if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
160  *back_res = UINT32_MAX;
161  *len_res = 1;
162  return;
163  }
164  }
165 
166  *back_res = back_main + REPS;
167  *len_res = len_main;
168  mf_skip(mf, len_main - 2);
169  return;
170 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
voidpf void * buf
Definition: ioapi.h:138
#define restrict
static uint32_t mf_avail(const lzma_mf *mf)
Get the number of bytes that haven't been ran through the match finder yet.
Definition: lz_encoder.h:229
static const uint8_t * mf_ptr(const lzma_mf *mf)
Get pointer to the first byte not ran through the match finder.
Definition: lz_encoder.h:221
static void mf_skip(lzma_mf *mf, uint32_t amount)
Definition: lz_encoder.h:267
#define mf_find
Since everything else begins with mf_, use it also for lzma_mf_find().
Definition: lz_encoder.h:259
#define MATCH_LEN_MAX
Definition: lzma_common.h:168
#define REPS
Definition: lzma_common.h:223
#define change_pair(small_dist, big_dist)
void lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res)
Private definitions for LZMA encoder.
#define not_equal_16(a, b)
Optimized comparison of two buffers.
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT32_C(val)
#define UINT32_MAX
#define my_min(x, y)
Definition: sysdefs.h:185
#define my_max(x, y)
Definition: sysdefs.h:186