Rizin
unix-like reverse engineering framework and cli tools
lzma_encoder_optimum_normal.c
Go to the documentation of this file.
1 //
4 //
5 // Author: Igor Pavlov
6 //
7 // This file has been put into the public domain.
8 // You can do whatever you want with this file.
9 //
11 
12 #include "lzma_encoder_private.h"
13 #include "fastpos.h"
14 #include "memcmplen.h"
15 
16 
18 // Prices //
20 
21 static uint32_t
23  const uint32_t prev_byte, const bool match_mode,
24  uint32_t match_byte, uint32_t symbol)
25 {
26  const probability *const subcoder = literal_subcoder(coder->literal,
28  pos, prev_byte);
29 
30  uint32_t price = 0;
31 
32  if (!match_mode) {
33  price = rc_bittree_price(subcoder, 8, symbol);
34  } else {
35  uint32_t offset = 0x100;
36  symbol += UINT32_C(1) << 8;
37 
38  do {
39  match_byte <<= 1;
40 
41  const uint32_t match_bit = match_byte & offset;
42  const uint32_t subcoder_index
43  = offset + match_bit + (symbol >> 8);
44  const uint32_t bit = (symbol >> 7) & 1;
45  price += rc_bit_price(subcoder[subcoder_index], bit);
46 
47  symbol <<= 1;
48  offset &= ~(match_byte ^ symbol);
49 
50  } while (symbol < (UINT32_C(1) << 16));
51  }
52 
53  return price;
54 }
55 
56 
57 static inline uint32_t
58 get_len_price(const lzma_length_encoder *const lencoder,
59  const uint32_t len, const uint32_t pos_state)
60 {
61  // NOTE: Unlike the other price tables, length prices are updated
62  // in lzma_encoder.c
63  return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
64 }
65 
66 
67 static inline uint32_t
69  const lzma_lzma_state state, const uint32_t pos_state)
70 {
71  return rc_bit_0_price(coder->is_rep0[state])
72  + rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
73 }
74 
75 
76 static inline uint32_t
77 get_pure_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
78  const lzma_lzma_state state, uint32_t pos_state)
79 {
80  uint32_t price;
81 
82  if (rep_index == 0) {
83  price = rc_bit_0_price(coder->is_rep0[state]);
84  price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
85  } else {
86  price = rc_bit_1_price(coder->is_rep0[state]);
87 
88  if (rep_index == 1) {
89  price += rc_bit_0_price(coder->is_rep1[state]);
90  } else {
91  price += rc_bit_1_price(coder->is_rep1[state]);
92  price += rc_bit_price(coder->is_rep2[state],
93  rep_index - 2);
94  }
95  }
96 
97  return price;
98 }
99 
100 
101 static inline uint32_t
102 get_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index,
103  const uint32_t len, const lzma_lzma_state state,
104  const uint32_t pos_state)
105 {
106  return get_len_price(&coder->rep_len_encoder, len, pos_state)
107  + get_pure_rep_price(coder, rep_index, state, pos_state);
108 }
109 
110 
111 static inline uint32_t
112 get_dist_len_price(const lzma_lzma1_encoder *const coder, const uint32_t dist,
113  const uint32_t len, const uint32_t pos_state)
114 {
115  const uint32_t dist_state = get_dist_state(len);
116  uint32_t price;
117 
118  if (dist < FULL_DISTANCES) {
119  price = coder->dist_prices[dist_state][dist];
120  } else {
121  const uint32_t dist_slot = get_dist_slot_2(dist);
122  price = coder->dist_slot_prices[dist_state][dist_slot]
123  + coder->align_prices[dist & ALIGN_MASK];
124  }
125 
126  price += get_len_price(&coder->match_len_encoder, len, pos_state);
127 
128  return price;
129 }
130 
131 
132 static void
134 {
135  for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) {
136 
137  uint32_t *const dist_slot_prices
138  = coder->dist_slot_prices[dist_state];
139 
140  // Price to encode the dist_slot.
141  for (uint32_t dist_slot = 0;
142  dist_slot < coder->dist_table_size; ++dist_slot)
143  dist_slot_prices[dist_slot] = rc_bittree_price(
144  coder->dist_slot[dist_state],
145  DIST_SLOT_BITS, dist_slot);
146 
147  // For matches with distance >= FULL_DISTANCES, add the price
148  // of the direct bits part of the match distance. (Align bits
149  // are handled by fill_align_prices()).
150  for (uint32_t dist_slot = DIST_MODEL_END;
151  dist_slot < coder->dist_table_size;
152  ++dist_slot)
153  dist_slot_prices[dist_slot] += rc_direct_price(
154  ((dist_slot >> 1) - 1) - ALIGN_BITS);
155 
156  // Distances in the range [0, 3] are fully encoded with
157  // dist_slot, so they are used for coder->dist_prices
158  // as is.
159  for (uint32_t i = 0; i < DIST_MODEL_START; ++i)
160  coder->dist_prices[dist_state][i]
161  = dist_slot_prices[i];
162  }
163 
164  // Distances in the range [4, 127] depend on dist_slot and
165  // dist_special. We do this in a loop separate from the above
166  // loop to avoid redundant calls to get_dist_slot().
167  for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) {
168  const uint32_t dist_slot = get_dist_slot(i);
169  const uint32_t footer_bits = ((dist_slot >> 1) - 1);
170  const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
171  const uint32_t price = rc_bittree_reverse_price(
172  coder->dist_special + base - dist_slot - 1,
173  footer_bits, i - base);
174 
175  for (uint32_t dist_state = 0; dist_state < DIST_STATES;
176  ++dist_state)
177  coder->dist_prices[dist_state][i]
178  = price + coder->dist_slot_prices[
179  dist_state][dist_slot];
180  }
181 
182  coder->match_price_count = 0;
183  return;
184 }
185 
186 
187 static void
189 {
190  for (uint32_t i = 0; i < ALIGN_SIZE; ++i)
192  coder->dist_align, ALIGN_BITS, i);
193 
194  coder->align_price_count = 0;
195  return;
196 }
197 
198 
200 // Optimal //
202 
203 static inline void
205 {
206  optimal->back_prev = UINT32_MAX;
207  optimal->prev_1_is_literal = false;
208 }
209 
210 
211 static inline void
213 {
214  optimal->back_prev = 0;
215  optimal->prev_1_is_literal = false;
216 }
217 
218 
219 #define is_short_rep(optimal) \
220  ((optimal).back_prev == 0)
221 
222 
223 static void
225  uint32_t *restrict back_res, uint32_t cur)
226 {
227  coder->opts_end_index = cur;
228 
229  uint32_t pos_mem = coder->opts[cur].pos_prev;
230  uint32_t back_mem = coder->opts[cur].back_prev;
231 
232  do {
233  if (coder->opts[cur].prev_1_is_literal) {
234  make_literal(&coder->opts[pos_mem]);
235  coder->opts[pos_mem].pos_prev = pos_mem - 1;
236 
237  if (coder->opts[cur].prev_2) {
238  coder->opts[pos_mem - 1].prev_1_is_literal
239  = false;
240  coder->opts[pos_mem - 1].pos_prev
241  = coder->opts[cur].pos_prev_2;
242  coder->opts[pos_mem - 1].back_prev
243  = coder->opts[cur].back_prev_2;
244  }
245  }
246 
247  const uint32_t pos_prev = pos_mem;
248  const uint32_t back_cur = back_mem;
249 
250  back_mem = coder->opts[pos_prev].back_prev;
251  pos_mem = coder->opts[pos_prev].pos_prev;
252 
253  coder->opts[pos_prev].back_prev = back_cur;
254  coder->opts[pos_prev].pos_prev = cur;
255  cur = pos_prev;
256 
257  } while (cur != 0);
258 
259  coder->opts_current_index = coder->opts[0].pos_prev;
260  *len_res = coder->opts[0].pos_prev;
261  *back_res = coder->opts[0].back_prev;
262 
263  return;
264 }
265 
266 
268 // Main //
270 
271 static inline uint32_t
273  uint32_t *restrict back_res, uint32_t *restrict len_res,
274  uint32_t position)
275 {
276  const uint32_t nice_len = mf->nice_len;
277 
278  uint32_t len_main;
279  uint32_t matches_count;
280 
281  if (mf->read_ahead == 0) {
282  len_main = mf_find(mf, &matches_count, coder->matches);
283  } else {
284  assert(mf->read_ahead == 1);
285  len_main = coder->longest_match_length;
286  matches_count = coder->matches_count;
287  }
288 
289  const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
290  if (buf_avail < 2) {
291  *back_res = UINT32_MAX;
292  *len_res = 1;
293  return UINT32_MAX;
294  }
295 
296  const uint8_t *const buf = mf_ptr(mf) - 1;
297 
298  uint32_t rep_lens[REPS];
299  uint32_t rep_max_index = 0;
300 
301  for (uint32_t i = 0; i < REPS; ++i) {
302  const uint8_t *const buf_back = buf - coder->reps[i] - 1;
303 
304  if (not_equal_16(buf, buf_back)) {
305  rep_lens[i] = 0;
306  continue;
307  }
308 
309  rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail);
310 
311  if (rep_lens[i] > rep_lens[rep_max_index])
312  rep_max_index = i;
313  }
314 
315  if (rep_lens[rep_max_index] >= nice_len) {
316  *back_res = rep_max_index;
317  *len_res = rep_lens[rep_max_index];
318  mf_skip(mf, *len_res - 1);
319  return UINT32_MAX;
320  }
321 
322 
323  if (len_main >= nice_len) {
324  *back_res = coder->matches[matches_count - 1].dist + REPS;
325  *len_res = len_main;
326  mf_skip(mf, len_main - 1);
327  return UINT32_MAX;
328  }
329 
330  const uint8_t current_byte = *buf;
331  const uint8_t match_byte = *(buf - coder->reps[0] - 1);
332 
333  if (len_main < 2 && current_byte != match_byte
334  && rep_lens[rep_max_index] < 2) {
335  *back_res = UINT32_MAX;
336  *len_res = 1;
337  return UINT32_MAX;
338  }
339 
340  coder->opts[0].state = coder->state;
341 
342  const uint32_t pos_state = position & coder->pos_mask;
343 
344  coder->opts[1].price = rc_bit_0_price(
345  coder->is_match[coder->state][pos_state])
346  + get_literal_price(coder, position, buf[-1],
347  !is_literal_state(coder->state),
348  match_byte, current_byte);
349 
350  make_literal(&coder->opts[1]);
351 
352  const uint32_t match_price = rc_bit_1_price(
353  coder->is_match[coder->state][pos_state]);
354  const uint32_t rep_match_price = match_price
355  + rc_bit_1_price(coder->is_rep[coder->state]);
356 
357  if (match_byte == current_byte) {
358  const uint32_t short_rep_price = rep_match_price
360  coder, coder->state, pos_state);
361 
362  if (short_rep_price < coder->opts[1].price) {
363  coder->opts[1].price = short_rep_price;
364  make_short_rep(&coder->opts[1]);
365  }
366  }
367 
368  const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
369 
370  if (len_end < 2) {
371  *back_res = coder->opts[1].back_prev;
372  *len_res = 1;
373  return UINT32_MAX;
374  }
375 
376  coder->opts[1].pos_prev = 0;
377 
378  for (uint32_t i = 0; i < REPS; ++i)
379  coder->opts[0].backs[i] = coder->reps[i];
380 
381  uint32_t len = len_end;
382  do {
383  coder->opts[len].price = RC_INFINITY_PRICE;
384  } while (--len >= 2);
385 
386 
387  for (uint32_t i = 0; i < REPS; ++i) {
388  uint32_t rep_len = rep_lens[i];
389  if (rep_len < 2)
390  continue;
391 
392  const uint32_t price = rep_match_price + get_pure_rep_price(
393  coder, i, coder->state, pos_state);
394 
395  do {
396  const uint32_t cur_and_len_price = price
397  + get_len_price(
398  &coder->rep_len_encoder,
399  rep_len, pos_state);
400 
401  if (cur_and_len_price < coder->opts[rep_len].price) {
402  coder->opts[rep_len].price = cur_and_len_price;
403  coder->opts[rep_len].pos_prev = 0;
404  coder->opts[rep_len].back_prev = i;
405  coder->opts[rep_len].prev_1_is_literal = false;
406  }
407  } while (--rep_len >= 2);
408  }
409 
410 
411  const uint32_t normal_match_price = match_price
412  + rc_bit_0_price(coder->is_rep[coder->state]);
413 
414  len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
415  if (len <= len_main) {
416  uint32_t i = 0;
417  while (len > coder->matches[i].len)
418  ++i;
419 
420  for(; ; ++len) {
421  const uint32_t dist = coder->matches[i].dist;
422  const uint32_t cur_and_len_price = normal_match_price
423  + get_dist_len_price(coder,
424  dist, len, pos_state);
425 
426  if (cur_and_len_price < coder->opts[len].price) {
427  coder->opts[len].price = cur_and_len_price;
428  coder->opts[len].pos_prev = 0;
429  coder->opts[len].back_prev = dist + REPS;
430  coder->opts[len].prev_1_is_literal = false;
431  }
432 
433  if (len == coder->matches[i].len)
434  if (++i == matches_count)
435  break;
436  }
437  }
438 
439  return len_end;
440 }
441 
442 
443 static inline uint32_t
445  uint32_t len_end, uint32_t position, const uint32_t cur,
446  const uint32_t nice_len, const uint32_t buf_avail_full)
447 {
448  uint32_t matches_count = coder->matches_count;
449  uint32_t new_len = coder->longest_match_length;
450  uint32_t pos_prev = coder->opts[cur].pos_prev;
452 
453  if (coder->opts[cur].prev_1_is_literal) {
454  --pos_prev;
455 
456  if (coder->opts[cur].prev_2) {
457  state = coder->opts[coder->opts[cur].pos_prev_2].state;
458 
459  if (coder->opts[cur].back_prev_2 < REPS)
461  else
463 
464  } else {
465  state = coder->opts[pos_prev].state;
466  }
467 
469 
470  } else {
471  state = coder->opts[pos_prev].state;
472  }
473 
474  if (pos_prev == cur - 1) {
475  if (is_short_rep(coder->opts[cur]))
477  else
479  } else {
480  uint32_t pos;
481  if (coder->opts[cur].prev_1_is_literal
482  && coder->opts[cur].prev_2) {
483  pos_prev = coder->opts[cur].pos_prev_2;
484  pos = coder->opts[cur].back_prev_2;
486  } else {
487  pos = coder->opts[cur].back_prev;
488  if (pos < REPS)
490  else
492  }
493 
494  if (pos < REPS) {
495  reps[0] = coder->opts[pos_prev].backs[pos];
496 
497  uint32_t i;
498  for (i = 1; i <= pos; ++i)
499  reps[i] = coder->opts[pos_prev].backs[i - 1];
500 
501  for (; i < REPS; ++i)
502  reps[i] = coder->opts[pos_prev].backs[i];
503 
504  } else {
505  reps[0] = pos - REPS;
506 
507  for (uint32_t i = 1; i < REPS; ++i)
508  reps[i] = coder->opts[pos_prev].backs[i - 1];
509  }
510  }
511 
512  coder->opts[cur].state = state;
513 
514  for (uint32_t i = 0; i < REPS; ++i)
515  coder->opts[cur].backs[i] = reps[i];
516 
517  const uint32_t cur_price = coder->opts[cur].price;
518 
519  const uint8_t current_byte = *buf;
520  const uint8_t match_byte = *(buf - reps[0] - 1);
521 
522  const uint32_t pos_state = position & coder->pos_mask;
523 
524  const uint32_t cur_and_1_price = cur_price
525  + rc_bit_0_price(coder->is_match[state][pos_state])
526  + get_literal_price(coder, position, buf[-1],
527  !is_literal_state(state), match_byte, current_byte);
528 
529  bool next_is_literal = false;
530 
531  if (cur_and_1_price < coder->opts[cur + 1].price) {
532  coder->opts[cur + 1].price = cur_and_1_price;
533  coder->opts[cur + 1].pos_prev = cur;
534  make_literal(&coder->opts[cur + 1]);
535  next_is_literal = true;
536  }
537 
538  const uint32_t match_price = cur_price
539  + rc_bit_1_price(coder->is_match[state][pos_state]);
540  const uint32_t rep_match_price = match_price
541  + rc_bit_1_price(coder->is_rep[state]);
542 
543  if (match_byte == current_byte
544  && !(coder->opts[cur + 1].pos_prev < cur
545  && coder->opts[cur + 1].back_prev == 0)) {
546 
547  const uint32_t short_rep_price = rep_match_price
548  + get_short_rep_price(coder, state, pos_state);
549 
550  if (short_rep_price <= coder->opts[cur + 1].price) {
551  coder->opts[cur + 1].price = short_rep_price;
552  coder->opts[cur + 1].pos_prev = cur;
553  make_short_rep(&coder->opts[cur + 1]);
554  next_is_literal = true;
555  }
556  }
557 
558  if (buf_avail_full < 2)
559  return len_end;
560 
561  const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
562 
563  if (!next_is_literal && match_byte != current_byte) { // speed optimization
564  // try literal + rep0
565  const uint8_t *const buf_back = buf - reps[0] - 1;
566  const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
567 
568  const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1;
569 
570  if (len_test >= 2) {
571  lzma_lzma_state state_2 = state;
572  update_literal(state_2);
573 
574  const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
575  const uint32_t next_rep_match_price = cur_and_1_price
576  + rc_bit_1_price(coder->is_match[state_2][pos_state_next])
577  + rc_bit_1_price(coder->is_rep[state_2]);
578 
579  //for (; len_test >= 2; --len_test) {
580  const uint32_t offset = cur + 1 + len_test;
581 
582  while (len_end < offset)
583  coder->opts[++len_end].price = RC_INFINITY_PRICE;
584 
585  const uint32_t cur_and_len_price = next_rep_match_price
586  + get_rep_price(coder, 0, len_test,
587  state_2, pos_state_next);
588 
589  if (cur_and_len_price < coder->opts[offset].price) {
590  coder->opts[offset].price = cur_and_len_price;
591  coder->opts[offset].pos_prev = cur + 1;
592  coder->opts[offset].back_prev = 0;
593  coder->opts[offset].prev_1_is_literal = true;
594  coder->opts[offset].prev_2 = false;
595  }
596  //}
597  }
598  }
599 
600 
601  uint32_t start_len = 2; // speed optimization
602 
603  for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) {
604  const uint8_t *const buf_back = buf - reps[rep_index] - 1;
605  if (not_equal_16(buf, buf_back))
606  continue;
607 
608  uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail);
609 
610  while (len_end < cur + len_test)
611  coder->opts[++len_end].price = RC_INFINITY_PRICE;
612 
613  const uint32_t len_test_temp = len_test;
614  const uint32_t price = rep_match_price + get_pure_rep_price(
615  coder, rep_index, state, pos_state);
616 
617  do {
618  const uint32_t cur_and_len_price = price
619  + get_len_price(&coder->rep_len_encoder,
620  len_test, pos_state);
621 
622  if (cur_and_len_price < coder->opts[cur + len_test].price) {
623  coder->opts[cur + len_test].price = cur_and_len_price;
624  coder->opts[cur + len_test].pos_prev = cur;
625  coder->opts[cur + len_test].back_prev = rep_index;
626  coder->opts[cur + len_test].prev_1_is_literal = false;
627  }
628  } while (--len_test >= 2);
629 
630  len_test = len_test_temp;
631 
632  if (rep_index == 0)
633  start_len = len_test + 1;
634 
635 
636  uint32_t len_test_2 = len_test + 1;
637  const uint32_t limit = my_min(buf_avail_full,
638  len_test_2 + nice_len);
639  // NOTE: len_test_2 may be greater than limit so the call to
640  // lzma_memcmplen() must be done conditionally.
641  if (len_test_2 < limit)
642  len_test_2 = lzma_memcmplen(buf, buf_back, len_test_2, limit);
643 
644  len_test_2 -= len_test + 1;
645 
646  if (len_test_2 >= 2) {
647  lzma_lzma_state state_2 = state;
648  update_long_rep(state_2);
649 
650  uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
651 
652  const uint32_t cur_and_len_literal_price = price
653  + get_len_price(&coder->rep_len_encoder,
654  len_test, pos_state)
655  + rc_bit_0_price(coder->is_match[state_2][pos_state_next])
656  + get_literal_price(coder, position + len_test,
657  buf[len_test - 1], true,
658  buf_back[len_test], buf[len_test]);
659 
660  update_literal(state_2);
661 
662  pos_state_next = (position + len_test + 1) & coder->pos_mask;
663 
664  const uint32_t next_rep_match_price = cur_and_len_literal_price
665  + rc_bit_1_price(coder->is_match[state_2][pos_state_next])
666  + rc_bit_1_price(coder->is_rep[state_2]);
667 
668  //for(; len_test_2 >= 2; len_test_2--) {
669  const uint32_t offset = cur + len_test + 1 + len_test_2;
670 
671  while (len_end < offset)
672  coder->opts[++len_end].price = RC_INFINITY_PRICE;
673 
674  const uint32_t cur_and_len_price = next_rep_match_price
675  + get_rep_price(coder, 0, len_test_2,
676  state_2, pos_state_next);
677 
678  if (cur_and_len_price < coder->opts[offset].price) {
679  coder->opts[offset].price = cur_and_len_price;
680  coder->opts[offset].pos_prev = cur + len_test + 1;
681  coder->opts[offset].back_prev = 0;
682  coder->opts[offset].prev_1_is_literal = true;
683  coder->opts[offset].prev_2 = true;
684  coder->opts[offset].pos_prev_2 = cur;
685  coder->opts[offset].back_prev_2 = rep_index;
686  }
687  //}
688  }
689  }
690 
691 
692  //for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
693  if (new_len > buf_avail) {
694  new_len = buf_avail;
695 
696  matches_count = 0;
697  while (new_len > coder->matches[matches_count].len)
698  ++matches_count;
699 
700  coder->matches[matches_count++].len = new_len;
701  }
702 
703 
704  if (new_len >= start_len) {
705  const uint32_t normal_match_price = match_price
706  + rc_bit_0_price(coder->is_rep[state]);
707 
708  while (len_end < cur + new_len)
709  coder->opts[++len_end].price = RC_INFINITY_PRICE;
710 
711  uint32_t i = 0;
712  while (start_len > coder->matches[i].len)
713  ++i;
714 
715  for (uint32_t len_test = start_len; ; ++len_test) {
716  const uint32_t cur_back = coder->matches[i].dist;
717  uint32_t cur_and_len_price = normal_match_price
718  + get_dist_len_price(coder,
719  cur_back, len_test, pos_state);
720 
721  if (cur_and_len_price < coder->opts[cur + len_test].price) {
722  coder->opts[cur + len_test].price = cur_and_len_price;
723  coder->opts[cur + len_test].pos_prev = cur;
724  coder->opts[cur + len_test].back_prev
725  = cur_back + REPS;
726  coder->opts[cur + len_test].prev_1_is_literal = false;
727  }
728 
729  if (len_test == coder->matches[i].len) {
730  // Try Match + Literal + Rep0
731  const uint8_t *const buf_back = buf - cur_back - 1;
732  uint32_t len_test_2 = len_test + 1;
733  const uint32_t limit = my_min(buf_avail_full,
734  len_test_2 + nice_len);
735 
736  // NOTE: len_test_2 may be greater than limit
737  // so the call to lzma_memcmplen() must be
738  // done conditionally.
739  if (len_test_2 < limit)
740  len_test_2 = lzma_memcmplen(buf, buf_back,
741  len_test_2, limit);
742 
743  len_test_2 -= len_test + 1;
744 
745  if (len_test_2 >= 2) {
746  lzma_lzma_state state_2 = state;
747  update_match(state_2);
748  uint32_t pos_state_next
749  = (position + len_test) & coder->pos_mask;
750 
751  const uint32_t cur_and_len_literal_price = cur_and_len_price
752  + rc_bit_0_price(
753  coder->is_match[state_2][pos_state_next])
754  + get_literal_price(coder,
755  position + len_test,
756  buf[len_test - 1],
757  true,
758  buf_back[len_test],
759  buf[len_test]);
760 
761  update_literal(state_2);
762  pos_state_next = (pos_state_next + 1) & coder->pos_mask;
763 
764  const uint32_t next_rep_match_price
765  = cur_and_len_literal_price
766  + rc_bit_1_price(
767  coder->is_match[state_2][pos_state_next])
768  + rc_bit_1_price(coder->is_rep[state_2]);
769 
770  // for(; len_test_2 >= 2; --len_test_2) {
771  const uint32_t offset = cur + len_test + 1 + len_test_2;
772 
773  while (len_end < offset)
774  coder->opts[++len_end].price = RC_INFINITY_PRICE;
775 
776  cur_and_len_price = next_rep_match_price
777  + get_rep_price(coder, 0, len_test_2,
778  state_2, pos_state_next);
779 
780  if (cur_and_len_price < coder->opts[offset].price) {
781  coder->opts[offset].price = cur_and_len_price;
782  coder->opts[offset].pos_prev = cur + len_test + 1;
783  coder->opts[offset].back_prev = 0;
784  coder->opts[offset].prev_1_is_literal = true;
785  coder->opts[offset].prev_2 = true;
786  coder->opts[offset].pos_prev_2 = cur;
787  coder->opts[offset].back_prev_2
788  = cur_back + REPS;
789  }
790  //}
791  }
792 
793  if (++i == matches_count)
794  break;
795  }
796  }
797  }
798 
799  return len_end;
800 }
801 
802 
803 extern void
805  lzma_mf *restrict mf,
806  uint32_t *restrict back_res, uint32_t *restrict len_res,
807  uint32_t position)
808 {
809  // If we have symbols pending, return the next pending symbol.
810  if (coder->opts_end_index != coder->opts_current_index) {
811  assert(mf->read_ahead > 0);
812  *len_res = coder->opts[coder->opts_current_index].pos_prev
813  - coder->opts_current_index;
814  *back_res = coder->opts[coder->opts_current_index].back_prev;
815  coder->opts_current_index = coder->opts[
816  coder->opts_current_index].pos_prev;
817  return;
818  }
819 
820  // Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
821  // this was done in both initialization function and in the main loop.
822  // In liblzma they were moved into this single place.
823  if (mf->read_ahead == 0) {
824  if (coder->match_price_count >= (1 << 7))
825  fill_dist_prices(coder);
826 
827  if (coder->align_price_count >= ALIGN_SIZE)
828  fill_align_prices(coder);
829  }
830 
831  // TODO: This needs quite a bit of cleaning still. But splitting
832  // the original function into two pieces makes it at least a little
833  // more readable, since those two parts don't share many variables.
834 
835  uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
836  if (len_end == UINT32_MAX)
837  return;
838 
839  uint32_t reps[REPS];
840  memcpy(reps, coder->reps, sizeof(reps));
841 
842  uint32_t cur;
843  for (cur = 1; cur < len_end; ++cur) {
844  assert(cur < OPTS);
845 
846  coder->longest_match_length = mf_find(
847  mf, &coder->matches_count, coder->matches);
848 
849  if (coder->longest_match_length >= mf->nice_len)
850  break;
851 
852  len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
853  position + cur, cur, mf->nice_len,
854  my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
855  }
856 
857  backward(coder, len_res, back_res, cur);
858  return;
859 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
RzCryptoSelector bit
Definition: crypto.c:16
Kind of two-bit version of bit scan reverse.
static uint32_t get_dist_slot(uint32_t dist)
Definition: fastpos.h:109
voidpf uLong offset
Definition: ioapi.h:144
voidpf void * buf
Definition: ioapi.h:138
#define restrict
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
static uint32_t mf_avail(const lzma_mf *mf)
Get the number of bytes that haven't been ran through the match finder yet.
Definition: lz_encoder.h:229
static const uint8_t * mf_ptr(const lzma_mf *mf)
Get pointer to the first byte not ran through the match finder.
Definition: lz_encoder.h:221
static void mf_skip(lzma_mf *mf, uint32_t amount)
Definition: lz_encoder.h:267
#define mf_find
Since everything else begins with mf_, use it also for lzma_mf_find().
Definition: lz_encoder.h:259
#define MATCH_LEN_MAX
Definition: lzma_common.h:168
#define MATCH_LEN_MIN
Definition: lzma_common.h:150
lzma_lzma_state
Definition: lzma_common.h:56
#define update_literal(state)
Indicate that the latest state was a literal.
Definition: lzma_common.h:80
#define REPS
Definition: lzma_common.h:223
#define ALIGN_BITS
Definition: lzma_common.h:217
#define DIST_SLOT_BITS
Definition: lzma_common.h:189
#define is_literal_state(state)
Test if the previous state was a literal.
Definition: lzma_common.h:100
#define DIST_STATES
Definition: lzma_common.h:179
#define update_match(state)
Indicate that the latest state was a match.
Definition: lzma_common.h:88
#define FULL_DISTANCES
Definition: lzma_common.h:213
#define ALIGN_SIZE
Definition: lzma_common.h:218
#define get_dist_state(len)
Definition: lzma_common.h:182
#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte)
Definition: lzma_common.h:124
#define ALIGN_MASK
Definition: lzma_common.h:219
#define DIST_MODEL_END
Definition: lzma_common.h:209
#define update_long_rep(state)
Indicate that the latest state was a long repeated match.
Definition: lzma_common.h:92
#define DIST_MODEL_START
Definition: lzma_common.h:198
#define update_short_rep(state)
Indicate that the latest state was a short match.
Definition: lzma_common.h:96
static void make_literal(lzma_optimal *optimal)
static uint32_t get_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos, const uint32_t prev_byte, const bool match_mode, uint32_t match_byte, uint32_t symbol)
static uint32_t get_dist_len_price(const lzma_lzma1_encoder *const coder, const uint32_t dist, const uint32_t len, const uint32_t pos_state)
static void make_short_rep(lzma_optimal *optimal)
void lzma_lzma_optimum_normal(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res, uint32_t position)
static void fill_align_prices(lzma_lzma1_encoder *coder)
static uint32_t helper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf, uint32_t len_end, uint32_t position, const uint32_t cur, const uint32_t nice_len, const uint32_t buf_avail_full)
static uint32_t get_pure_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index, const lzma_lzma_state state, uint32_t pos_state)
#define is_short_rep(optimal)
static uint32_t get_rep_price(const lzma_lzma1_encoder *const coder, const uint32_t rep_index, const uint32_t len, const lzma_lzma_state state, const uint32_t pos_state)
static void fill_dist_prices(lzma_lzma1_encoder *coder)
static uint32_t helper1(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res, uint32_t position)
static void backward(lzma_lzma1_encoder *restrict coder, uint32_t *restrict len_res, uint32_t *restrict back_res, uint32_t cur)
static uint32_t get_len_price(const lzma_length_encoder *const lencoder, const uint32_t len, const uint32_t pos_state)
static uint32_t get_short_rep_price(const lzma_lzma1_encoder *const coder, const lzma_lzma_state state, const uint32_t pos_state)
Private definitions for LZMA encoder.
#define OPTS
#define not_equal_16(a, b)
Optimized comparison of two buffers.
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
static uint32_t rc_bit_price(const probability prob, const uint32_t bit)
Definition: price.h:29
static uint32_t rc_bittree_price(const probability *const probs, const uint32_t bit_levels, uint32_t symbol)
Definition: price.h:52
static uint32_t rc_bittree_reverse_price(const probability *const probs, uint32_t bit_levels, uint32_t symbol)
Definition: price.h:69
static uint32_t rc_bit_0_price(const probability prob)
Definition: price.h:37
static uint32_t rc_bit_1_price(const probability prob)
Definition: price.h:44
#define RC_INFINITY_PRICE
Definition: price.h:21
static uint32_t rc_direct_price(const uint32_t bits)
Definition: price.h:87
uint16_t probability
Type of probabilities used with range coder.
Definition: range_common.h:69
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT32_C(val)
#define UINT32_MAX
uint32_t prices[POS_STATES_MAX][LEN_SYMBOLS]
lzma_match matches[MATCH_LEN_MAX+1]
Array of match candidates.
probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]
probability is_rep0_long[STATES][POS_STATES_MAX]
probability is_rep1[STATES]
uint32_t pos_mask
(1 << pos_bits) - 1
probability is_rep0[STATES]
probability is_rep2[STATES]
probability dist_special[FULL_DISTANCES - DIST_MODEL_END]
uint32_t align_prices[ALIGN_SIZE]
lzma_optimal opts[OPTS]
probability is_rep[STATES]
lzma_length_encoder match_len_encoder
uint32_t dist_prices[DIST_STATES][FULL_DISTANCES]
probability dist_slot[DIST_STATES][DIST_SLOTS]
uint32_t dist_slot_prices[DIST_STATES][DIST_SLOTS]
uint32_t matches_count
Number of match candidates in matches[].
probability dist_align[ALIGN_SIZE]
probability is_match[STATES][POS_STATES_MAX]
lzma_length_encoder rep_len_encoder
uint32_t len
Definition: lz_encoder.h:23
uint32_t dist
Definition: lz_encoder.h:24
uint32_t backs[REPS]
lzma_lzma_state state
Definition: dis.h:43
int pos
Definition: main.c:11
#define my_min(x, y)
Definition: sysdefs.h:185
#define my_max(x, y)
Definition: sysdefs.h:186