Rizin
unix-like reverse engineering framework and cli tools
lzma_decoder.c
Go to the documentation of this file.
1 //
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
11 //
13 
14 #include "lz_decoder.h"
15 #include "lzma_common.h"
16 #include "lzma_decoder.h"
17 #include "range_decoder.h"
18 
19 // The macros unroll loops with switch statements.
20 // Silence warnings about missing fall-through comments.
21 #if TUKLIB_GNUC_REQ(7, 0)
22 # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
23 #endif
24 
25 
26 #ifdef HAVE_SMALL
27 
28 // Macros for (somewhat) size-optimized code.
29 #define seq_4(seq) seq
30 
31 #define seq_6(seq) seq
32 
33 #define seq_8(seq) seq
34 
35 #define seq_len(seq) \
36  seq ## _CHOICE, \
37  seq ## _CHOICE2, \
38  seq ## _BITTREE
39 
40 #define len_decode(target, ld, pos_state, seq) \
41 do { \
42 case seq ## _CHOICE: \
43  rc_if_0(ld.choice, seq ## _CHOICE) { \
44  rc_update_0(ld.choice); \
45  probs = ld.low[pos_state];\
46  limit = LEN_LOW_SYMBOLS; \
47  target = MATCH_LEN_MIN; \
48  } else { \
49  rc_update_1(ld.choice); \
50 case seq ## _CHOICE2: \
51  rc_if_0(ld.choice2, seq ## _CHOICE2) { \
52  rc_update_0(ld.choice2); \
53  probs = ld.mid[pos_state]; \
54  limit = LEN_MID_SYMBOLS; \
55  target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
56  } else { \
57  rc_update_1(ld.choice2); \
58  probs = ld.high; \
59  limit = LEN_HIGH_SYMBOLS; \
60  target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
61  + LEN_MID_SYMBOLS; \
62  } \
63  } \
64  symbol = 1; \
65 case seq ## _BITTREE: \
66  do { \
67  rc_bit(probs[symbol], , , seq ## _BITTREE); \
68  } while (symbol < limit); \
69  target += symbol - limit; \
70 } while (0)
71 
72 #else // HAVE_SMALL
73 
74 // Unrolled versions
75 #define seq_4(seq) \
76  seq ## 0, \
77  seq ## 1, \
78  seq ## 2, \
79  seq ## 3
80 
81 #define seq_6(seq) \
82  seq ## 0, \
83  seq ## 1, \
84  seq ## 2, \
85  seq ## 3, \
86  seq ## 4, \
87  seq ## 5
88 
89 #define seq_8(seq) \
90  seq ## 0, \
91  seq ## 1, \
92  seq ## 2, \
93  seq ## 3, \
94  seq ## 4, \
95  seq ## 5, \
96  seq ## 6, \
97  seq ## 7
98 
99 #define seq_len(seq) \
100  seq ## _CHOICE, \
101  seq ## _LOW0, \
102  seq ## _LOW1, \
103  seq ## _LOW2, \
104  seq ## _CHOICE2, \
105  seq ## _MID0, \
106  seq ## _MID1, \
107  seq ## _MID2, \
108  seq ## _HIGH0, \
109  seq ## _HIGH1, \
110  seq ## _HIGH2, \
111  seq ## _HIGH3, \
112  seq ## _HIGH4, \
113  seq ## _HIGH5, \
114  seq ## _HIGH6, \
115  seq ## _HIGH7
116 
117 #define len_decode(target, ld, pos_state, seq) \
118 do { \
119  symbol = 1; \
120 case seq ## _CHOICE: \
121  rc_if_0(ld.choice, seq ## _CHOICE) { \
122  rc_update_0(ld.choice); \
123  rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
124  rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
125  rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
126  target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
127  } else { \
128  rc_update_1(ld.choice); \
129 case seq ## _CHOICE2: \
130  rc_if_0(ld.choice2, seq ## _CHOICE2) { \
131  rc_update_0(ld.choice2); \
132  rc_bit_case(ld.mid[pos_state][symbol], , , \
133  seq ## _MID0); \
134  rc_bit_case(ld.mid[pos_state][symbol], , , \
135  seq ## _MID1); \
136  rc_bit_case(ld.mid[pos_state][symbol], , , \
137  seq ## _MID2); \
138  target = symbol - LEN_MID_SYMBOLS \
139  + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
140  } else { \
141  rc_update_1(ld.choice2); \
142  rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
143  rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
144  rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
145  rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
146  rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
147  rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
148  rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
149  rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
150  target = symbol - LEN_HIGH_SYMBOLS \
151  + MATCH_LEN_MIN \
152  + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
153  } \
154  } \
155 } while (0)
156 
157 #endif // HAVE_SMALL
158 
159 
161 typedef struct {
168 
169 
170 typedef struct {
172  // Probabilities //
174 
177 
180 
183 
186  probability is_rep0[STATES];
187 
190  probability is_rep1[STATES];
191 
193  probability is_rep2[STATES];
194 
198 
203 
207 
211 
214 
217 
219  // Decoder state //
221 
222  // Range coder
224 
225  // Types of the most recently seen LZMA symbols
227 
232 
233  uint32_t pos_mask; // (1U << pb) - 1
236 
240 
242  // State of incomplete symbol //
244 
246  enum {
249  seq_8(SEQ_LITERAL),
250  seq_8(SEQ_LITERAL_MATCHED),
253  seq_len(SEQ_MATCH_LEN),
254  seq_6(SEQ_DIST_SLOT),
257  seq_4(SEQ_ALIGN),
264  seq_len(SEQ_REP_LEN),
266  } sequence;
267 
270 
274 
278 
282 
287 
288 
289 static lzma_ret
290 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
291  const uint8_t *restrict in,
292  size_t *restrict in_pos, size_t in_size)
293 {
294  lzma_lzma1_decoder *restrict coder = coder_ptr;
295 
297  // Initialization //
299 
300  {
301  const lzma_ret ret = rc_read_init(
302  &coder->rc, in, in_pos, in_size);
303  if (ret != LZMA_STREAM_END)
304  return ret;
305  }
306 
308  // Variables //
310 
311  // Making local copies of often-used variables improves both
312  // speed and readability.
313 
314  lzma_dict dict = *dictptr;
315 
316  const size_t dict_start = dict.pos;
317 
318  // Range decoder
319  rc_to_local(coder->rc, *in_pos);
320 
321  // State
322  uint32_t state = coder->state;
323  uint32_t rep0 = coder->rep0;
324  uint32_t rep1 = coder->rep1;
325  uint32_t rep2 = coder->rep2;
326  uint32_t rep3 = coder->rep3;
327 
328  const uint32_t pos_mask = coder->pos_mask;
329 
330  // These variables are actually needed only if we last time ran
331  // out of input in the middle of the decoder loop.
332  probability *probs = coder->probs;
333  uint32_t symbol = coder->symbol;
334  uint32_t limit = coder->limit;
335  uint32_t offset = coder->offset;
336  uint32_t len = coder->len;
337 
338  const uint32_t literal_pos_mask = coder->literal_pos_mask;
339  const uint32_t literal_context_bits = coder->literal_context_bits;
340 
341  // Temporary variables
342  uint32_t pos_state = dict.pos & pos_mask;
343 
344  lzma_ret ret = LZMA_OK;
345 
346  // If uncompressed size is known, there must be no end of payload
347  // marker.
348  const bool no_eopm = coder->uncompressed_size
349  != LZMA_VLI_UNKNOWN;
350  if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
351  dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
352 
353  // The main decoder loop. The "switch" is used to restart the decoder at
354  // correct location. Once restarted, the "switch" is no longer used.
355  switch (coder->sequence)
356  while (true) {
357  // Calculate new pos_state. This is skipped on the first loop
358  // since we already calculated it when setting up the local
359  // variables.
360  pos_state = dict.pos & pos_mask;
361 
362  case SEQ_NORMALIZE:
363  case SEQ_IS_MATCH:
364  if (unlikely(no_eopm && dict.pos == dict.limit))
365  break;
366 
367  rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
368  rc_update_0(coder->is_match[state][pos_state]);
369 
370  // It's a literal i.e. a single 8-bit byte.
371 
372  probs = literal_subcoder(coder->literal,
373  literal_context_bits, literal_pos_mask,
374  dict.pos, dict_get(&dict, 0));
375  symbol = 1;
376 
377  if (is_literal_state(state)) {
378  // Decode literal without match byte.
379 #ifdef HAVE_SMALL
380  case SEQ_LITERAL:
381  do {
382  rc_bit(probs[symbol], , , SEQ_LITERAL);
383  } while (symbol < (1 << 8));
384 #else
385  rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
386  rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
387  rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
388  rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
389  rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
390  rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
391  rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
392  rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
393 #endif
394  } else {
395  // Decode literal with match byte.
396  //
397  // We store the byte we compare against
398  // ("match byte") to "len" to minimize the
399  // number of variables we need to store
400  // between decoder calls.
401  len = (uint32_t)(dict_get(&dict, rep0)) << 1;
402 
403  // The usage of "offset" allows omitting some
404  // branches, which should give tiny speed
405  // improvement on some CPUs. "offset" gets
406  // set to zero if match_bit didn't match.
407  offset = 0x100;
408 
409 #ifdef HAVE_SMALL
410  case SEQ_LITERAL_MATCHED:
411  do {
412  const uint32_t match_bit
413  = len & offset;
414  const uint32_t subcoder_index
415  = offset + match_bit
416  + symbol;
417 
418  rc_bit(probs[subcoder_index],
419  offset &= ~match_bit,
420  offset &= match_bit,
421  SEQ_LITERAL_MATCHED);
422 
423  // It seems to be faster to do this
424  // here instead of putting it to the
425  // beginning of the loop and then
426  // putting the "case" in the middle
427  // of the loop.
428  len <<= 1;
429 
430  } while (symbol < (1 << 8));
431 #else
432  // Unroll the loop.
433  uint32_t match_bit;
434  uint32_t subcoder_index;
435 
436 # define d(seq) \
437  case seq: \
438  match_bit = len & offset; \
439  subcoder_index = offset + match_bit + symbol; \
440  rc_bit(probs[subcoder_index], \
441  offset &= ~match_bit, \
442  offset &= match_bit, \
443  seq)
444 
445  d(SEQ_LITERAL_MATCHED0);
446  len <<= 1;
447  d(SEQ_LITERAL_MATCHED1);
448  len <<= 1;
449  d(SEQ_LITERAL_MATCHED2);
450  len <<= 1;
451  d(SEQ_LITERAL_MATCHED3);
452  len <<= 1;
453  d(SEQ_LITERAL_MATCHED4);
454  len <<= 1;
455  d(SEQ_LITERAL_MATCHED5);
456  len <<= 1;
457  d(SEQ_LITERAL_MATCHED6);
458  len <<= 1;
459  d(SEQ_LITERAL_MATCHED7);
460 # undef d
461 #endif
462  }
463 
464  //update_literal(state);
465  // Use a lookup table to update to literal state,
466  // since compared to other state updates, this would
467  // need two branches.
468  static const lzma_lzma_state next_state[] = {
481  };
482  state = next_state[state];
483 
484  case SEQ_LITERAL_WRITE:
485  if (unlikely(dict_put(&dict, symbol))) {
486  coder->sequence = SEQ_LITERAL_WRITE;
487  goto out;
488  }
489 
490  continue;
491  }
492 
493  // Instead of a new byte we are going to get a byte range
494  // (distance and length) which will be repeated from our
495  // output history.
496 
497  rc_update_1(coder->is_match[state][pos_state]);
498 
499  case SEQ_IS_REP:
500  rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
501  // Not a repeated match
502  rc_update_0(coder->is_rep[state]);
504 
505  // The latest three match distances are kept in
506  // memory in case there are repeated matches.
507  rep3 = rep2;
508  rep2 = rep1;
509  rep1 = rep0;
510 
511  // Decode the length of the match.
512  len_decode(len, coder->match_len_decoder,
513  pos_state, SEQ_MATCH_LEN);
514 
515  // Prepare to decode the highest two bits of the
516  // match distance.
517  probs = coder->dist_slot[get_dist_state(len)];
518  symbol = 1;
519 
520 #ifdef HAVE_SMALL
521  case SEQ_DIST_SLOT:
522  do {
523  rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
524  } while (symbol < DIST_SLOTS);
525 #else
526  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
527  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
528  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
529  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
530  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
531  rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
532 #endif
533  // Get rid of the highest bit that was needed for
534  // indexing of the probability array.
535  symbol -= DIST_SLOTS;
536  assert(symbol <= 63);
537 
538  if (symbol < DIST_MODEL_START) {
539  // Match distances [0, 3] have only two bits.
540  rep0 = symbol;
541  } else {
542  // Decode the lowest [1, 29] bits of
543  // the match distance.
544  limit = (symbol >> 1) - 1;
545  assert(limit >= 1 && limit <= 30);
546  rep0 = 2 + (symbol & 1);
547 
548  if (symbol < DIST_MODEL_END) {
549  // Prepare to decode the low bits for
550  // a distance of [4, 127].
551  assert(limit <= 5);
552  rep0 <<= limit;
553  assert(rep0 <= 96);
554  // -1 is fine, because we start
555  // decoding at probs[1], not probs[0].
556  // NOTE: This violates the C standard,
557  // since we are doing pointer
558  // arithmetic past the beginning of
559  // the array.
560  assert((int32_t)(rep0 - symbol - 1)
561  >= -1);
562  assert((int32_t)(rep0 - symbol - 1)
563  <= 82);
564  probs = coder->pos_special + rep0
565  - symbol - 1;
566  symbol = 1;
567  offset = 0;
568  case SEQ_DIST_MODEL:
569 #ifdef HAVE_SMALL
570  do {
571  rc_bit(probs[symbol], ,
572  rep0 += 1U << offset,
573  SEQ_DIST_MODEL);
574  } while (++offset < limit);
575 #else
576  switch (limit) {
577  case 5:
578  assert(offset == 0);
579  rc_bit(probs[symbol], ,
580  rep0 += 1U,
581  SEQ_DIST_MODEL);
582  ++offset;
583  --limit;
584  case 4:
585  rc_bit(probs[symbol], ,
586  rep0 += 1U << offset,
587  SEQ_DIST_MODEL);
588  ++offset;
589  --limit;
590  case 3:
591  rc_bit(probs[symbol], ,
592  rep0 += 1U << offset,
593  SEQ_DIST_MODEL);
594  ++offset;
595  --limit;
596  case 2:
597  rc_bit(probs[symbol], ,
598  rep0 += 1U << offset,
599  SEQ_DIST_MODEL);
600  ++offset;
601  --limit;
602  case 1:
603  // We need "symbol" only for
604  // indexing the probability
605  // array, thus we can use
606  // rc_bit_last() here to omit
607  // the unneeded updating of
608  // "symbol".
609  rc_bit_last(probs[symbol], ,
610  rep0 += 1U << offset,
611  SEQ_DIST_MODEL);
612  }
613 #endif
614  } else {
615  // The distance is >= 128. Decode the
616  // lower bits without probabilities
617  // except the lowest four bits.
618  assert(symbol >= 14);
619  assert(limit >= 6);
620  limit -= ALIGN_BITS;
621  assert(limit >= 2);
622  case SEQ_DIRECT:
623  // Not worth manual unrolling
624  do {
625  rc_direct(rep0, SEQ_DIRECT);
626  } while (--limit > 0);
627 
628  // Decode the lowest four bits using
629  // probabilities.
630  rep0 <<= ALIGN_BITS;
631  symbol = 1;
632 #ifdef HAVE_SMALL
633  offset = 0;
634  case SEQ_ALIGN:
635  do {
636  rc_bit(coder->pos_align[
637  symbol], ,
638  rep0 += 1U << offset,
639  SEQ_ALIGN);
640  } while (++offset < ALIGN_BITS);
641 #else
642  case SEQ_ALIGN0:
643  rc_bit(coder->pos_align[symbol], ,
644  rep0 += 1, SEQ_ALIGN0);
645  case SEQ_ALIGN1:
646  rc_bit(coder->pos_align[symbol], ,
647  rep0 += 2, SEQ_ALIGN1);
648  case SEQ_ALIGN2:
649  rc_bit(coder->pos_align[symbol], ,
650  rep0 += 4, SEQ_ALIGN2);
651  case SEQ_ALIGN3:
652  // Like in SEQ_DIST_MODEL, we don't
653  // need "symbol" for anything else
654  // than indexing the probability array.
655  rc_bit_last(coder->pos_align[symbol], ,
656  rep0 += 8, SEQ_ALIGN3);
657 #endif
658 
659  if (rep0 == UINT32_MAX) {
660  // End of payload marker was
661  // found. It must not be
662  // present if uncompressed
663  // size is known.
664  if (coder->uncompressed_size
665  != LZMA_VLI_UNKNOWN) {
666  ret = LZMA_DATA_ERROR;
667  goto out;
668  }
669 
670  case SEQ_EOPM:
671  // LZMA1 stream with
672  // end-of-payload marker.
673  rc_normalize(SEQ_EOPM);
674  ret = LZMA_STREAM_END;
675  goto out;
676  }
677  }
678  }
679 
680  // Validate the distance we just decoded.
681  if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
682  ret = LZMA_DATA_ERROR;
683  goto out;
684  }
685 
686  } else {
687  rc_update_1(coder->is_rep[state]);
688 
689  // Repeated match
690  //
691  // The match distance is a value that we have had
692  // earlier. The latest four match distances are
693  // available as rep0, rep1, rep2 and rep3. We will
694  // now decode which of them is the new distance.
695  //
696  // There cannot be a match if we haven't produced
697  // any output, so check that first.
698  if (unlikely(!dict_is_distance_valid(&dict, 0))) {
699  ret = LZMA_DATA_ERROR;
700  goto out;
701  }
702 
703  case SEQ_IS_REP0:
704  rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
705  rc_update_0(coder->is_rep0[state]);
706  // The distance is rep0.
707 
708  case SEQ_IS_REP0_LONG:
709  rc_if_0(coder->is_rep0_long[state][pos_state],
710  SEQ_IS_REP0_LONG) {
711  rc_update_0(coder->is_rep0_long[
712  state][pos_state]);
713 
715 
716  case SEQ_SHORTREP:
717  if (unlikely(dict_put(&dict, dict_get(
718  &dict, rep0)))) {
719  coder->sequence = SEQ_SHORTREP;
720  goto out;
721  }
722 
723  continue;
724  }
725 
726  // Repeating more than one byte at
727  // distance of rep0.
728  rc_update_1(coder->is_rep0_long[
729  state][pos_state]);
730 
731  } else {
732  rc_update_1(coder->is_rep0[state]);
733 
734  case SEQ_IS_REP1:
735  // The distance is rep1, rep2 or rep3. Once
736  // we find out which one of these three, it
737  // is stored to rep0 and rep1, rep2 and rep3
738  // are updated accordingly.
739  rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
740  rc_update_0(coder->is_rep1[state]);
741 
742  const uint32_t distance = rep1;
743  rep1 = rep0;
744  rep0 = distance;
745 
746  } else {
747  rc_update_1(coder->is_rep1[state]);
748  case SEQ_IS_REP2:
749  rc_if_0(coder->is_rep2[state],
750  SEQ_IS_REP2) {
751  rc_update_0(coder->is_rep2[
752  state]);
753 
754  const uint32_t distance = rep2;
755  rep2 = rep1;
756  rep1 = rep0;
757  rep0 = distance;
758 
759  } else {
760  rc_update_1(coder->is_rep2[
761  state]);
762 
763  const uint32_t distance = rep3;
764  rep3 = rep2;
765  rep2 = rep1;
766  rep1 = rep0;
767  rep0 = distance;
768  }
769  }
770  }
771 
773 
774  // Decode the length of the repeated match.
775  len_decode(len, coder->rep_len_decoder,
776  pos_state, SEQ_REP_LEN);
777  }
778 
780  // Repeat from history buffer. //
782 
783  // The length is always between these limits. There is no way
784  // to trigger the algorithm to set len outside this range.
787 
788  case SEQ_COPY:
789  // Repeat len bytes from distance of rep0.
790  if (unlikely(dict_repeat(&dict, rep0, &len))) {
791  coder->sequence = SEQ_COPY;
792  goto out;
793  }
794  }
795 
796  rc_normalize(SEQ_NORMALIZE);
797  coder->sequence = SEQ_IS_MATCH;
798 
799 out:
800  // Save state
801 
802  // NOTE: Must not copy dict.limit.
803  dictptr->pos = dict.pos;
804  dictptr->full = dict.full;
805 
806  rc_from_local(coder->rc, *in_pos);
807 
808  coder->state = state;
809  coder->rep0 = rep0;
810  coder->rep1 = rep1;
811  coder->rep2 = rep2;
812  coder->rep3 = rep3;
813 
814  coder->probs = probs;
815  coder->symbol = symbol;
816  coder->limit = limit;
817  coder->offset = offset;
818  coder->len = len;
819 
820  // Update the remaining amount of uncompressed data if uncompressed
821  // size was known.
822  if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
823  coder->uncompressed_size -= dict.pos - dict_start;
824 
825  // Since there cannot be end of payload marker if the
826  // uncompressed size was known, we check here if we
827  // finished decoding.
828  if (coder->uncompressed_size == 0 && ret == LZMA_OK
829  && coder->sequence != SEQ_NORMALIZE)
830  ret = coder->sequence == SEQ_IS_MATCH
832  }
833 
834  // We can do an additional check in the range decoder to catch some
835  // corrupted files.
836  if (ret == LZMA_STREAM_END) {
837  if (!rc_is_finished(coder->rc))
838  ret = LZMA_DATA_ERROR;
839 
840  // Reset the range decoder so that it is ready to reinitialize
841  // for a new LZMA2 chunk.
842  rc_reset(coder->rc);
843  }
844 
845  return ret;
846 }
847 
848 
849 
850 static void
852 {
853  lzma_lzma1_decoder *coder = coder_ptr;
855 }
856 
857 
858 static void
859 lzma_decoder_reset(void *coder_ptr, const void *opt)
860 {
861  lzma_lzma1_decoder *coder = coder_ptr;
862  const lzma_options_lzma *options = opt;
863 
864  // NOTE: We assume that lc/lp/pb are valid since they were
865  // successfully decoded with lzma_lzma_decode_properties().
866 
867  // Calculate pos_mask. We don't need pos_bits as is for anything.
868  coder->pos_mask = (1U << options->pb) - 1;
869 
870  // Initialize the literal decoder.
871  literal_init(coder->literal, options->lc, options->lp);
872 
873  coder->literal_context_bits = options->lc;
874  coder->literal_pos_mask = (1U << options->lp) - 1;
875 
876  // State
877  coder->state = STATE_LIT_LIT;
878  coder->rep0 = 0;
879  coder->rep1 = 0;
880  coder->rep2 = 0;
881  coder->rep3 = 0;
882  coder->pos_mask = (1U << options->pb) - 1;
883 
884  // Range decoder
885  rc_reset(coder->rc);
886 
887  // Bit and bittree decoders
888  for (uint32_t i = 0; i < STATES; ++i) {
889  for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
890  bit_reset(coder->is_match[i][j]);
891  bit_reset(coder->is_rep0_long[i][j]);
892  }
893 
894  bit_reset(coder->is_rep[i]);
895  bit_reset(coder->is_rep0[i]);
896  bit_reset(coder->is_rep1[i]);
897  bit_reset(coder->is_rep2[i]);
898  }
899 
900  for (uint32_t i = 0; i < DIST_STATES; ++i)
902 
903  for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
904  bit_reset(coder->pos_special[i]);
905 
907 
908  // Len decoders (also bit/bittree)
909  const uint32_t num_pos_states = 1U << options->pb;
914 
915  for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
916  bittree_reset(coder->match_len_decoder.low[pos_state],
917  LEN_LOW_BITS);
918  bittree_reset(coder->match_len_decoder.mid[pos_state],
919  LEN_MID_BITS);
920 
921  bittree_reset(coder->rep_len_decoder.low[pos_state],
922  LEN_LOW_BITS);
923  bittree_reset(coder->rep_len_decoder.mid[pos_state],
924  LEN_MID_BITS);
925  }
926 
929 
930  coder->sequence = SEQ_IS_MATCH;
931  coder->probs = NULL;
932  coder->symbol = 0;
933  coder->limit = 0;
934  coder->offset = 0;
935  coder->len = 0;
936 
937  return;
938 }
939 
940 
941 extern lzma_ret
943  const void *opt, lzma_lz_options *lz_options)
944 {
945  if (lz->coder == NULL) {
947  if (lz->coder == NULL)
948  return LZMA_MEM_ERROR;
949 
950  lz->code = &lzma_decode;
951  lz->reset = &lzma_decoder_reset;
953  }
954 
955  // All dictionary sizes are OK here. LZ decoder will take care of
956  // the special cases.
957  const lzma_options_lzma *options = opt;
958  lz_options->dict_size = options->dict_size;
959  lz_options->preset_dict = options->preset_dict;
960  lz_options->preset_dict_size = options->preset_dict_size;
961 
962  return LZMA_OK;
963 }
964 
965 
969 static lzma_ret
971  const void *options, lzma_lz_options *lz_options)
972 {
973  if (!is_lclppb_valid(options))
974  return LZMA_PROG_ERROR;
975 
977  lz, allocator, options, lz_options));
978 
981 
982  return LZMA_OK;
983 }
984 
985 
986 extern lzma_ret
988  const lzma_filter_info *filters)
989 {
990  // LZMA can only be the last filter in the chain. This is enforced
991  // by the raw_decoder initialization.
992  assert(filters[1].init == NULL);
993 
994  return lzma_lz_decoder_init(next, allocator, filters,
996 }
997 
998 
999 extern bool
1001 {
1002  if (byte > (4 * 5 + 4) * 9 + 8)
1003  return true;
1004 
1005  // See the file format specification to understand this.
1006  options->pb = byte / (9 * 5);
1007  byte -= options->pb * 9 * 5;
1008  options->lp = byte / 9;
1009  options->lc = byte - options->lp * 9;
1010 
1011  return options->lc + options->lp > LZMA_LCLP_MAX;
1012 }
1013 
1014 
1015 extern uint64_t
1017 {
1018  const lzma_options_lzma *const opt = options;
1019  return sizeof(lzma_lzma1_decoder)
1021 }
1022 
1023 
1024 extern uint64_t
1026 {
1027  if (!is_lclppb_valid(options))
1028  return UINT64_MAX;
1029 
1031 }
1032 
1033 
1034 extern lzma_ret
1036  const uint8_t *props, size_t props_size)
1037 {
1038  if (props_size != 5)
1039  return LZMA_OPTIONS_ERROR;
1040 
1041  lzma_options_lzma *opt
1043  if (opt == NULL)
1044  return LZMA_MEM_ERROR;
1045 
1046  if (lzma_lzma_lclppb_decode(opt, props[0]))
1047  goto error;
1048 
1049  // All dictionary sizes are accepted, including zero. LZ decoder
1050  // will automatically use a dictionary at least a few KiB even if
1051  // a smaller dictionary is requested.
1052  opt->dict_size = read32le(props + 1);
1053 
1054  opt->preset_dict = NULL;
1055  opt->preset_dict_size = 0;
1056 
1057  *options = opt;
1058 
1059  return LZMA_OK;
1060 
1061 error:
1062  lzma_free(opt, allocator);
1063  return LZMA_OPTIONS_ERROR;
1064 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
const lzma_allocator const uint8_t size_t * in_pos
Definition: block.h:579
const lzma_allocator const uint8_t size_t in_size
Definition: block.h:527
const lzma_allocator * allocator
Definition: block.h:377
const lzma_allocator const uint8_t * in
Definition: block.h:527
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
const lzma_filter * filters
Definition: container.h:315
#define NULL
Definition: cris-opc.c:27
const lzma_allocator const uint8_t * props
Definition: filter.h:362
voidpf uLong offset
Definition: ioapi.h:144
#define restrict
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
#define unlikely(expr)
Definition: lz4.c:177
lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret(*lz_init)(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options))
Definition: lz_decoder.c:212
uint64_t lzma_lz_decoder_memusage(size_t dictionary_size)
Definition: lz_decoder.c:300
LZ out window.
static bool dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
Repeat *len bytes at distance.
Definition: lz_decoder.h:128
static bool dict_put(lzma_dict *dict, uint8_t byte)
Definition: lz_decoder.h:187
static uint8_t dict_get(const lzma_dict *const dict, const uint32_t distance)
Get a byte from the history buffer.
Definition: lz_decoder.h:103
static bool dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
Validate the match distance.
Definition: lz_decoder.h:120
#define LZMA_LCLP_MAX
Definition: lzma12.h:283
Private definitions common to LZMA encoder and decoder.
#define MATCH_LEN_MAX
Definition: lzma_common.h:168
#define MATCH_LEN_MIN
Definition: lzma_common.h:150
lzma_lzma_state
Definition: lzma_common.h:56
@ STATE_MATCH_LIT_LIT
Definition: lzma_common.h:58
@ STATE_LIT_LIT
Definition: lzma_common.h:57
@ STATE_SHORTREP_LIT
Definition: lzma_common.h:63
@ STATE_REP_LIT_LIT
Definition: lzma_common.h:59
@ STATE_REP_LIT
Definition: lzma_common.h:62
@ STATE_MATCH_LIT
Definition: lzma_common.h:61
@ STATE_SHORTREP_LIT_LIT
Definition: lzma_common.h:60
#define LEN_LOW_SYMBOLS
Definition: lzma_common.h:159
#define LITERAL_CODER_SIZE
Definition: lzma_common.h:115
#define ALIGN_BITS
Definition: lzma_common.h:217
#define DIST_SLOT_BITS
Definition: lzma_common.h:189
#define is_literal_state(state)
Test if the previous state was a literal.
Definition: lzma_common.h:100
#define DIST_STATES
Definition: lzma_common.h:179
#define LEN_MID_SYMBOLS
Definition: lzma_common.h:161
#define update_match(state)
Indicate that the latest state was a match.
Definition: lzma_common.h:88
#define FULL_DISTANCES
Definition: lzma_common.h:213
#define LEN_MID_BITS
Definition: lzma_common.h:160
#define ALIGN_SIZE
Definition: lzma_common.h:218
#define get_dist_state(len)
Definition: lzma_common.h:182
#define POS_STATES_MAX
Definition: lzma_common.h:28
#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte)
Definition: lzma_common.h:124
#define DIST_SLOTS
Definition: lzma_common.h:190
static void literal_init(probability(*probs)[LITERAL_CODER_SIZE], uint32_t lc, uint32_t lp)
Definition: lzma_common.h:130
#define DIST_MODEL_END
Definition: lzma_common.h:209
#define LEN_HIGH_SYMBOLS
Definition: lzma_common.h:163
#define STATES
Total number of states.
Definition: lzma_common.h:73
#define LEN_LOW_BITS
Definition: lzma_common.h:158
#define update_long_rep(state)
Indicate that the latest state was a long repeated match.
Definition: lzma_common.h:92
#define LEN_HIGH_BITS
Definition: lzma_common.h:162
#define DIST_MODEL_START
Definition: lzma_common.h:198
#define LITERAL_CODERS_MAX
Maximum number of literal coders.
Definition: lzma_common.h:118
static bool is_lclppb_valid(const lzma_options_lzma *options)
Validates lc, lp, and pb.
Definition: lzma_common.h:33
#define update_short_rep(state)
Indicate that the latest state was a short match.
Definition: lzma_common.h:96
lzma_ret lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *opt, lzma_lz_options *lz_options)
Definition: lzma_decoder.c:942
bool lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
Decodes the LZMA Properties byte (lc/lp/pb)
static void lzma_decoder_reset(void *coder_ptr, const void *opt)
Definition: lzma_decoder.c:859
static void lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
Definition: lzma_decoder.c:851
uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options)
#define seq_len(seq)
Definition: lzma_decoder.c:99
lzma_ret lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, const uint8_t *props, size_t props_size)
uint64_t lzma_lzma_decoder_memusage(const void *options)
lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters)
Allocates and initializes LZMA decoder.
Definition: lzma_decoder.c:987
#define seq_8(seq)
Definition: lzma_decoder.c:89
static lzma_ret lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size)
Definition: lzma_decoder.c:290
#define len_decode(target, ld, pos_state, seq)
Definition: lzma_decoder.c:117
#define d(seq)
static lzma_ret lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)
Definition: lzma_decoder.c:970
LZMA decoder API.
static void literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position)
Definition: lzma_encoder.c:46
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
#define bittree_reset(probs, bit_levels)
Definition: range_common.h:42
uint16_t probability
Type of probabilities used with range coder.
Definition: range_common.h:69
#define bit_reset(prob)
Definition: range_common.h:37
Range Decoder.
#define rc_from_local(range_decoder, in_pos)
Stores the local copes back to the range decoder structure.
Definition: range_decoder.h:61
#define rc_bit_case(prob, action0, action1, seq)
#define rc_normalize(seq)
Definition: range_decoder.h:87
#define rc_direct(dest, seq)
Decode a bit without using a probability.
#define rc_update_0(prob)
#define rc_update_1(prob)
#define rc_is_finished(range_decoder)
Definition: range_decoder.h:80
#define rc_bit_last(prob, action0, action1, seq)
#define rc_if_0(prob, seq)
#define rc_to_local(range_decoder, in_pos)
Definition: range_decoder.h:54
#define rc_bit(prob, action0, action1, seq)
#define rc_reset(range_decoder)
Resets the range decoder structure.
Definition: range_decoder.h:69
static lzma_ret rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size)
Reads the first five bytes to initialize the range decoder.
Definition: range_decoder.h:29
int int32_t
Definition: sftypes.h:33
int size_t
Definition: sftypes.h:40
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT64_MAX
#define UINT32_MAX
Custom functions for memory handling.
Definition: base.h:372
size_t limit
Write limit.
Definition: lz_decoder.h:36
size_t full
Definition: lz_decoder.h:33
size_t pos
Definition: lz_decoder.h:28
Length decoder probabilities; see comments in lzma_common.h.
Definition: lzma_decoder.c:161
probability high[LEN_HIGH_SYMBOLS]
Definition: lzma_decoder.c:166
probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]
Definition: lzma_decoder.c:164
probability choice2
Definition: lzma_decoder.c:163
probability choice
Definition: lzma_decoder.c:162
probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]
Definition: lzma_decoder.c:165
void * coder
Data specific to the LZ-based decoder.
Definition: lz_decoder.h:56
void(* set_uncompressed)(void *coder, lzma_vli uncompressed_size)
Set the uncompressed size.
Definition: lz_decoder.h:66
lzma_ret(* code)(void *coder, lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size)
Function to decode from in[] to *dict.
Definition: lz_decoder.h:59
void(* reset)(void *coder, const void *options)
Definition: lz_decoder.h:63
const uint8_t * preset_dict
TODO: Comment.
Definition: lz_decoder.h:49
size_t preset_dict_size
Definition: lz_decoder.h:50
size_t dict_size
Size of the history buffer.
Definition: lz_decoder.h:48
probability is_rep1[STATES]
Definition: lzma_decoder.c:190
probability is_rep0_long[STATES][POS_STATES_MAX]
Definition: lzma_decoder.c:197
probability is_rep0[STATES]
Definition: lzma_decoder.c:186
lzma_vli uncompressed_size
Definition: lzma_decoder.c:239
probability is_rep2[STATES]
If 0, distance of a repeated match is rep2. Otherwise it is rep3.
Definition: lzma_decoder.c:193
uint32_t rep0
Distance of the latest match.
Definition: lzma_decoder.c:228
probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]
Literals; see comments in lzma_common.h.
Definition: lzma_decoder.c:176
probability is_rep[STATES]
If 1, it's a repeated match. The distance is one of rep0 .. rep3.
Definition: lzma_decoder.c:182
uint32_t literal_context_bits
Definition: lzma_decoder.c:234
probability pos_align[ALIGN_SIZE]
Definition: lzma_decoder.c:210
uint32_t literal_pos_mask
Definition: lzma_decoder.c:235
probability * probs
Base of the current probability tree.
Definition: lzma_decoder.c:269
enum lzma_lzma1_decoder::@657 sequence
Position where to continue the decoder loop.
uint32_t rep3
Distance of fourth latest match.
Definition: lzma_decoder.c:231
probability is_match[STATES][POS_STATES_MAX]
If 1, it's a match. Otherwise it's a single 8-bit literal.
Definition: lzma_decoder.c:179
probability dist_slot[DIST_STATES][DIST_SLOTS]
Definition: lzma_decoder.c:202
lzma_length_decoder rep_len_decoder
Length of a repeated match.
Definition: lzma_decoder.c:216
lzma_range_decoder rc
Definition: lzma_decoder.c:223
lzma_length_decoder match_len_decoder
Length of a normal match.
Definition: lzma_decoder.c:213
uint32_t rep2
Distance of third latest match.
Definition: lzma_decoder.c:230
probability pos_special[FULL_DISTANCES - DIST_MODEL_END]
Definition: lzma_decoder.c:206
lzma_lzma_state state
Definition: lzma_decoder.c:226
uint32_t rep1
Distance of second latest match.
Definition: lzma_decoder.c:229
Hold data and function pointers of the next filter in the chain.
Definition: common.h:135
Options specific to the LZMA1 and LZMA2 filters.
Definition: lzma12.h:185
const uint8_t * preset_dict
Pointer to an initial dictionary.
Definition: lzma12.h:240
uint32_t preset_dict_size
Size of the preset dictionary.
Definition: lzma12.h:254
uint32_t dict_size
Dictionary size in bytes.
Definition: lzma12.h:217
Definition: dis.h:43
bool init
Definition: core.c:77
#define return_if_error(expr)
Return if expression doesn't evaluate to LZMA_OK.
Definition: common.h:278
void * lzma_alloc(size_t size, const lzma_allocator *allocator) lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
Allocates memory.
uint64_t uncompressed_size
Definition: list.c:106
static uint32_t read32le(const uint8_t *buf)
void error(const char *msg)
Definition: untgz.c:593
uint64_t lzma_vli
Variable-length integer type.
Definition: vli.h:63
#define LZMA_VLI_UNKNOWN
VLI value to denote that the value is unknown.
Definition: vli.h:39
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_PROG_ERROR
Programming error.
Definition: base.h:218
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_MEM_ERROR
Cannot allocate memory.
Definition: base.h:128
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
void lzma_free(void *ptr, const lzma_allocator *allocator)
Frees memory.
Definition: common.c:78