Rizin
unix-like reverse engineering framework and cli tools
lzxd.c
Go to the documentation of this file.
1 /* This file is part of libmspack.
2  * (C) 2003-2013 Stuart Caie.
3  *
4  * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
5  * by Microsoft Corporation.
6  *
7  * libmspack is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU Lesser General Public License (LGPL) version 2.1
9  *
10  * For further details, see the file COPYING.LIB distributed with libmspack
11  */
12 
13 /* LZX decompression implementation */
14 
15 #include <system.h>
16 #include <lzx.h>
17 
18 /* Microsoft's LZX document (in cab-sdk.exe) and their implementation
19  * of the com.ms.util.cab Java package do not concur.
20  *
21  * In the LZX document, there is a table showing the correlation between
22  * window size and the number of position slots. It states that the 1MB
23  * window = 40 slots and the 2MB window = 42 slots. In the implementation,
24  * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
25  * first slot whose position base is equal to or more than the required
26  * window size'. This would explain why other tables in the document refer
27  * to 50 slots rather than 42.
28  *
29  * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
30  * is not defined in the specification.
31  *
32  * The LZX document does not state the uncompressed block has an
33  * uncompressed length field. Where does this length field come from, so
34  * we can know how large the block is? The implementation has it as the 24
35  * bits following after the 3 blocktype bits, before the alignment
36  * padding.
37  *
38  * The LZX document states that aligned offset blocks have their aligned
39  * offset huffman tree AFTER the main and length trees. The implementation
40  * suggests that the aligned offset tree is BEFORE the main and length
41  * trees.
42  *
43  * The LZX document decoding algorithm states that, in an aligned offset
44  * block, if an extra_bits value is 1, 2 or 3, then that number of bits
45  * should be read and the result added to the match offset. This is
46  * correct for 1 and 2, but not 3, where just a huffman symbol (using the
47  * aligned tree) should be read.
48  *
49  * Regarding the E8 preprocessing, the LZX document states 'No translation
50  * may be performed on the last 6 bytes of the input block'. This is
51  * correct. However, the pseudocode provided checks for the *E8 leader*
52  * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
53  * from the end, this would cause the next four bytes to be modified, at
54  * least one of which would be in the last 6 bytes, which is not allowed
55  * according to the spec.
56  *
57  * The specification states that the huffman trees must always contain at
58  * least one element. However, many CAB files contain blocks where the
59  * length tree is completely empty (because there are no matches), and
60  * this is expected to succeed.
61  *
62  * The errors in LZX documentation appear have been corrected in the
63  * new documentation for the LZX DELTA format.
64  *
65  * http://msdn.microsoft.com/en-us/library/cc483133.aspx
66  *
67  * However, this is a different format, an extension of regular LZX.
68  * I have noticed the following differences, there may be more:
69  *
70  * The maximum window size has increased from 2MB to 32MB. This also
71  * increases the maximum number of position slots, etc.
72  *
73  * If the match length is 257 (the maximum possible), this signals
74  * a further length decoding step, that allows for matches up to
75  * 33024 bytes long.
76  *
77  * The format now allows for "reference data", supplied by the caller.
78  * If match offsets go further back than the number of bytes
79  * decompressed so far, that is them accessing the reference data.
80  */
81 
82 /* import bit-reading macros and code */
83 #define BITS_TYPE struct lzxd_stream
84 #define BITS_VAR lzx
85 #define BITS_ORDER_MSB
86 #define READ_BYTES do { \
87  unsigned char b0, b1; \
88  READ_IF_NEEDED; b0 = *i_ptr++; \
89  READ_IF_NEEDED; b1 = *i_ptr++; \
90  INJECT_BITS((b1 << 8) | b0, 16); \
91 } while (0)
92 #include <readbits.h>
93 
94 /* import huffman-reading macros and code */
95 #define TABLEBITS(tbl) LZX_##tbl##_TABLEBITS
96 #define MAXSYMBOLS(tbl) LZX_##tbl##_MAXSYMBOLS
97 #define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx]
98 #define HUFF_LEN(tbl,idx) lzx->tbl##_len[idx]
99 #define HUFF_ERROR return lzx->error = MSPACK_ERR_DECRUNCH
100 #include <readhuff.h>
101 
102 /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
103 #define BUILD_TABLE(tbl) \
104  if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \
105  &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \
106  { \
107  D(("failed to build %s table", #tbl)) \
108  return lzx->error = MSPACK_ERR_DECRUNCH; \
109  }
110 
111 #define BUILD_TABLE_MAYBE_EMPTY(tbl) do { \
112  lzx->tbl##_empty = 0; \
113  if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \
114  &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \
115  { \
116  for (i = 0; i < MAXSYMBOLS(tbl); i++) { \
117  if (HUFF_LEN(tbl, i) > 0) { \
118  D(("failed to build %s table", #tbl)) \
119  return lzx->error = MSPACK_ERR_DECRUNCH; \
120  } \
121  } \
122  /* empty tree - allow it, but don't decode symbols with it */ \
123  lzx->tbl##_empty = 1; \
124  } \
125 } while (0)
126 
127 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
128  * first to last in the given table. The code lengths are stored in their
129  * own special LZX way.
130  */
131 #define READ_LENGTHS(tbl, first, last) do { \
132  STORE_BITS; \
133  if (lzxd_read_lens(lzx, &HUFF_LEN(tbl, 0), (first), \
134  (unsigned int)(last))) return lzx->error; \
135  RESTORE_BITS; \
136 } while (0)
137 
138 static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
139  unsigned int first, unsigned int last)
140 {
141  /* bit buffer and huffman symbol decode variables */
142  register unsigned int bit_buffer;
143  register int bits_left, i;
144  register unsigned short sym;
145  unsigned char *i_ptr, *i_end;
146 
147  unsigned int x, y;
148  int z;
149 
150  RESTORE_BITS;
151 
152  /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
153  for (x = 0; x < 20; x++) {
154  READ_BITS(y, 4);
155  lzx->PRETREE_len[x] = y;
156  }
157  BUILD_TABLE(PRETREE);
158 
159  for (x = first; x < last; ) {
160  READ_HUFFSYM(PRETREE, z);
161  if (z == 17) {
162  /* code = 17, run of ([read 4 bits]+4) zeros */
163  READ_BITS(y, 4); y += 4;
164  while (y--) lens[x++] = 0;
165  }
166  else if (z == 18) {
167  /* code = 18, run of ([read 5 bits]+20) zeros */
168  READ_BITS(y, 5); y += 20;
169  while (y--) lens[x++] = 0;
170  }
171  else if (z == 19) {
172  /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
173  READ_BITS(y, 1); y += 4;
174  READ_HUFFSYM(PRETREE, z);
175  z = lens[x] - z; if (z < 0) z += 17;
176  while (y--) lens[x++] = z;
177  }
178  else {
179  /* code = 0 to 16, delta current length entry */
180  z = lens[x] - z; if (z < 0) z += 17;
181  lens[x++] = z;
182  }
183  }
184 
185  STORE_BITS;
186 
187  return MSPACK_ERR_OK;
188 }
189 
190 /* LZX static data tables:
191  *
192  * LZX uses 'position slots' to represent match offsets. For every match,
193  * a small 'position slot' number and a small offset from that slot are
194  * encoded instead of one large offset.
195  *
196  * The number of slots is decided by how many are needed to encode the
197  * largest offset for a given window size. This is easy when the gap between
198  * slots is less than 128Kb, it's a linear relationship. But when extra_bits
199  * reaches its limit of 17 (because LZX can only ensure reading 17 bits of
200  * data at a time), we can only jump 128Kb at a time and have to start
201  * using more and more position slots as each window size doubles.
202  *
203  * position_base[] is an index to the position slot bases
204  *
205  * extra_bits[] states how many bits of offset-from-base data is needed.
206  *
207  * They are calculated as follows:
208  * extra_bits[i] = 0 where i < 4
209  * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36
210  * extra_bits[i] = 17 where i >= 36
211  * position_base[0] = 0
212  * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1])
213  */
214 static const unsigned int position_slots[11] = {
215  30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
216 };
217 static const unsigned char extra_bits[36] = {
218  0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
219  9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
220 };
221 static const unsigned int position_base[290] = {
222  0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
223  768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
224  49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360,
225  786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
226  1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512,
227  2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088,
228  3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664,
229  4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240,
230  6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816,
231  7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392,
232  8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968,
233  9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544,
234  10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048,
235  11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552,
236  12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056,
237  12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560,
238  13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064,
239  14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568,
240  15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072,
241  16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576,
242  17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080,
243  18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584,
244  19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088,
245  20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592,
246  21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096,
247  22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600,
248  23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104,
249  23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608,
250  24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112,
251  25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616,
252  26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120,
253  27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624,
254  28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128,
255  29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632,
256  30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136,
257  31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640,
258  32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144,
259  33161216, 33292288, 33423360
260 };
261 
262 static void lzxd_reset_state(struct lzxd_stream *lzx) {
263  int i;
264 
265  lzx->R0 = 1;
266  lzx->R1 = 1;
267  lzx->R2 = 1;
268  lzx->header_read = 0;
269  lzx->block_remaining = 0;
271 
272  /* initialise tables to 0 (because deltas will be applied to them) */
273  for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
274  for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0;
275 }
276 
277 /*-------- main LZX code --------*/
278 
279 struct lzxd_stream *lzxd_init(struct mspack_system *system,
280  struct mspack_file *input,
281  struct mspack_file *output,
282  int window_bits,
283  int reset_interval,
284  int input_buffer_size,
285  off_t output_length,
286  char is_delta)
287 {
288  unsigned int window_size = 1 << window_bits;
289  struct lzxd_stream *lzx;
290 
291  if (!system) return NULL;
292 
293  /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB),
294  * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB)
295  */
296  if (is_delta) {
297  if (window_bits < 17 || window_bits > 25) return NULL;
298  }
299  else {
300  if (window_bits < 15 || window_bits > 21) return NULL;
301  }
302 
303  if (reset_interval < 0 || output_length < 0) {
304  D(("reset interval or output length < 0"))
305  return NULL;
306  }
307 
308  /* round up input buffer size to multiple of two */
309  input_buffer_size = (input_buffer_size + 1) & -2;
310  if (input_buffer_size < 2) return NULL;
311 
312  /* allocate decompression state */
313  if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) {
314  return NULL;
315  }
316 
317  /* allocate decompression window and input buffer */
318  lzx->window = (unsigned char *) system->alloc(system, (size_t) window_size);
319  lzx->inbuf = (unsigned char *) system->alloc(system, (size_t) input_buffer_size);
320  if (!lzx->window || !lzx->inbuf) {
321  system->free(lzx->window);
322  system->free(lzx->inbuf);
323  system->free(lzx);
324  return NULL;
325  }
326 
327  /* initialise decompression state */
328  lzx->sys = system;
329  lzx->input = input;
330  lzx->output = output;
331  lzx->offset = 0;
332  lzx->length = output_length;
333 
334  lzx->inbuf_size = input_buffer_size;
335  lzx->window_size = 1 << window_bits;
336  lzx->ref_data_size = 0;
337  lzx->window_posn = 0;
338  lzx->frame_posn = 0;
339  lzx->frame = 0;
341  lzx->intel_filesize = 0;
342  lzx->intel_started = 0;
343  lzx->error = MSPACK_ERR_OK;
344  lzx->num_offsets = position_slots[window_bits - 15] << 3;
345  lzx->is_delta = is_delta;
346 
347  lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
348  lzxd_reset_state(lzx);
349  INIT_BITS;
350  return lzx;
351 }
352 
354  struct mspack_system *system,
355  struct mspack_file *input,
356  unsigned int length)
357 {
358  if (!lzx) return MSPACK_ERR_ARGS;
359 
360  if (!lzx->is_delta) {
361  D(("only LZX DELTA streams support reference data"))
362  return MSPACK_ERR_ARGS;
363  }
364  if (lzx->offset) {
365  D(("too late to set reference data after decoding starts"))
366  return MSPACK_ERR_ARGS;
367  }
368  if (length > lzx->window_size) {
369  D(("reference length (%u) is longer than the window", length))
370  return MSPACK_ERR_ARGS;
371  }
372  if (length > 0 && (!system || !input)) {
373  D(("length > 0 but no system or input"))
374  return MSPACK_ERR_ARGS;
375  }
376 
377  lzx->ref_data_size = length;
378  if (length > 0) {
379  /* copy reference data */
380  unsigned char *pos = &lzx->window[lzx->window_size - length];
381  int bytes = system->read(input, pos, length);
382  /* length can't be more than 2^25, so no signedness problem */
383  if (bytes < (int)length) return MSPACK_ERR_READ;
384  }
385  lzx->ref_data_size = length;
386  return MSPACK_ERR_OK;
387 }
388 
389 void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
390  if (lzx && out_bytes > 0) lzx->length = out_bytes;
391 }
392 
393 int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
394  /* bitstream and huffman reading variables */
395  register unsigned int bit_buffer;
396  register int bits_left, i=0;
397  unsigned char *i_ptr, *i_end;
398  register unsigned short sym;
399 
400  int match_length, length_footer, extra, verbatim_bits, bytes_todo;
401  int this_run, main_element, aligned_bits, j, warned = 0;
402  unsigned char *window, *runsrc, *rundest, buf[12];
403  unsigned int frame_size=0, end_frame, match_offset, window_posn;
404  unsigned int R0, R1, R2;
405 
406  /* easy answers */
407  if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS;
408  if (lzx->error) return lzx->error;
409 
410  /* flush out any stored-up bytes before we begin */
411  i = lzx->o_end - lzx->o_ptr;
412  if ((off_t) i > out_bytes) i = (int) out_bytes;
413  if (i) {
414  if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
415  return lzx->error = MSPACK_ERR_WRITE;
416  }
417  lzx->o_ptr += i;
418  lzx->offset += i;
419  out_bytes -= i;
420  }
421  if (out_bytes == 0) return MSPACK_ERR_OK;
422 
423  /* restore local state */
424  RESTORE_BITS;
425  window = lzx->window;
426  window_posn = lzx->window_posn;
427  R0 = lzx->R0;
428  R1 = lzx->R1;
429  R2 = lzx->R2;
430 
431  end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
432 
433  while (lzx->frame < end_frame) {
434  /* have we reached the reset interval? (if there is one?) */
435  if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
436  if (lzx->block_remaining) {
437  /* this is a file format error, we can make a best effort to extract what we can */
438  D(("%d bytes remaining at reset interval", lzx->block_remaining))
439  if (!warned) {
440  lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression");
441  warned++;
442  }
443  }
444 
445  /* re-read the intel header and reset the huffman lengths */
446  lzxd_reset_state(lzx);
447  R0 = lzx->R0;
448  R1 = lzx->R1;
449  R2 = lzx->R2;
450  }
451 
452  /* LZX DELTA format has chunk_size, not present in LZX format */
453  if (lzx->is_delta) {
454  ENSURE_BITS(16);
455  REMOVE_BITS(16);
456  }
457 
458  /* read header if necessary */
459  if (!lzx->header_read) {
460  /* read 1 bit. if bit=0, intel filesize = 0.
461  * if bit=1, read intel filesize (32 bits) */
462  j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); }
463  lzx->intel_filesize = (i << 16) | j;
464  lzx->header_read = 1;
465  }
466 
467  /* calculate size of frame: all frames are 32k except the final frame
468  * which is 32kb or less. this can only be calculated when lzx->length
469  * has been filled in. */
470  frame_size = LZX_FRAME_SIZE;
471  if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
472  frame_size = lzx->length - lzx->offset;
473  }
474 
475  /* decode until one more frame is available */
476  bytes_todo = lzx->frame_posn + frame_size - window_posn;
477  while (bytes_todo > 0) {
478  /* initialise new block, if one is needed */
479  if (lzx->block_remaining == 0) {
480  /* realign if previous block was an odd-sized UNCOMPRESSED block */
481  if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
482  (lzx->block_length & 1))
483  {
485  i_ptr++;
486  }
487 
488  /* read block type (3 bits) and block length (24 bits) */
489  READ_BITS(lzx->block_type, 3);
490  READ_BITS(i, 16); READ_BITS(j, 8);
491  lzx->block_remaining = lzx->block_length = (i << 8) | j;
492  /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
493 
494  /* read individual block headers */
495  switch (lzx->block_type) {
497  /* read lengths of and build aligned huffman decoding tree */
498  for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
499  BUILD_TABLE(ALIGNED);
500  /* rest of aligned header is same as verbatim */ /*@fallthrough@*/
502  /* read lengths of and build main huffman decoding tree */
503  READ_LENGTHS(MAINTREE, 0, 256);
504  READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets);
505  BUILD_TABLE(MAINTREE);
506  /* if the literal 0xE8 is anywhere in the block... */
507  if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
508  /* read lengths of and build lengths huffman decoding tree */
511  break;
512 
514  /* because we can't assume otherwise */
515  lzx->intel_started = 1;
516 
517  /* read 1-16 (not 0-15) bits to align to bytes */
518  if (bits_left == 0) ENSURE_BITS(16);
519  bits_left = 0; bit_buffer = 0;
520 
521  /* read 12 bytes of stored R0 / R1 / R2 values */
522  for (rundest = &buf[0], i = 0; i < 12; i++) {
524  *rundest++ = *i_ptr++;
525  }
526  R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
527  R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24);
528  R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
529  break;
530 
531  default:
532  D(("bad block type"))
533  return lzx->error = MSPACK_ERR_DECRUNCH;
534  }
535  }
536 
537  /* decode more of the block:
538  * run = min(what's available, what's needed) */
539  this_run = lzx->block_remaining;
540  if (this_run > bytes_todo) this_run = bytes_todo;
541 
542  /* assume we decode exactly this_run bytes, for now */
543  bytes_todo -= this_run;
544  lzx->block_remaining -= this_run;
545 
546  /* decode at least this_run bytes */
547  switch (lzx->block_type) {
550  while (this_run > 0) {
551  READ_HUFFSYM(MAINTREE, main_element);
552  if (main_element < LZX_NUM_CHARS) {
553  /* literal: 0 to LZX_NUM_CHARS-1 */
554  window[window_posn++] = main_element;
555  this_run--;
556  }
557  else {
558  /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
559  main_element -= LZX_NUM_CHARS;
560 
561  /* get match length */
562  match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
563  if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
564  if (lzx->LENGTH_empty) {
565  D(("LENGTH symbol needed but tree is empty"))
566  return lzx->error = MSPACK_ERR_DECRUNCH;
567  }
568  READ_HUFFSYM(LENGTH, length_footer);
569  match_length += length_footer;
570  }
571  match_length += LZX_MIN_MATCH;
572 
573  /* get match offset */
574  switch ((match_offset = (main_element >> 3))) {
575  case 0: match_offset = R0; break;
576  case 1: match_offset = R1; R1=R0; R0 = match_offset; break;
577  case 2: match_offset = R2; R2=R0; R0 = match_offset; break;
578  default:
579  if (lzx->block_type == LZX_BLOCKTYPE_VERBATIM) {
580  if (match_offset == 3) {
581  match_offset = 1;
582  }
583  else {
584  extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
585  READ_BITS(verbatim_bits, extra);
586  match_offset = position_base[match_offset] - 2 + verbatim_bits;
587  }
588  }
589  else { /* LZX_BLOCKTYPE_ALIGNED */
590  extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
591  match_offset = position_base[match_offset] - 2;
592  if (extra > 3) { /* >3: verbatim and aligned bits */
593  extra -= 3;
594  READ_BITS(verbatim_bits, extra);
595  match_offset += (verbatim_bits << 3);
596  READ_HUFFSYM(ALIGNED, aligned_bits);
597  match_offset += aligned_bits;
598  }
599  else if (extra == 3) { /* 3: aligned bits only */
600  READ_HUFFSYM(ALIGNED, aligned_bits);
601  match_offset += aligned_bits;
602  }
603  else if (extra > 0) { /* 1-2: verbatim bits only */
604  READ_BITS(verbatim_bits, extra);
605  match_offset += verbatim_bits;
606  }
607  else { /* 0: not defined in LZX specification! */
608  match_offset = 1;
609  }
610  }
611  /* update repeated offset LRU queue */
612  R2 = R1; R1 = R0; R0 = match_offset;
613  }
614 
615  /* LZX DELTA uses max match length to signal even longer match */
616  if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
617  int extra_len = 0;
618  ENSURE_BITS(3); /* 4 entry huffman tree */
619  if (PEEK_BITS(1) == 0) {
620  REMOVE_BITS(1); /* '0' -> 8 extra length bits */
621  READ_BITS(extra_len, 8);
622  }
623  else if (PEEK_BITS(2) == 2) {
624  REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
625  READ_BITS(extra_len, 10);
626  extra_len += 0x100;
627  }
628  else if (PEEK_BITS(3) == 6) {
629  REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
630  READ_BITS(extra_len, 12);
631  extra_len += 0x500;
632  }
633  else {
634  REMOVE_BITS(3); /* '111' -> 15 extra length bits */
635  READ_BITS(extra_len, 15);
636  }
637  match_length += extra_len;
638  }
639 
640  if ((window_posn + match_length) > lzx->window_size) {
641  D(("match ran over window wrap"))
642  return lzx->error = MSPACK_ERR_DECRUNCH;
643  }
644 
645  /* copy match */
646  rundest = &window[window_posn];
647  i = match_length;
648  /* does match offset wrap the window? */
649  if (match_offset > window_posn) {
650  if (match_offset > lzx->offset &&
651  (match_offset - window_posn) > lzx->ref_data_size)
652  {
653  D(("match offset beyond LZX stream"))
654  return lzx->error = MSPACK_ERR_DECRUNCH;
655  }
656  /* j = length from match offset to end of window */
657  j = match_offset - window_posn;
658  if (j > (int) lzx->window_size) {
659  D(("match offset beyond window boundaries"))
660  return lzx->error = MSPACK_ERR_DECRUNCH;
661  }
662  runsrc = &window[lzx->window_size - j];
663  if (j < i) {
664  /* if match goes over the window edge, do two copy runs */
665  i -= j; while (j-- > 0) *rundest++ = *runsrc++;
666  runsrc = window;
667  }
668  while (i-- > 0) *rundest++ = *runsrc++;
669  }
670  else {
671  runsrc = rundest - match_offset;
672  while (i-- > 0) *rundest++ = *runsrc++;
673  }
674 
675  this_run -= match_length;
676  window_posn += match_length;
677  }
678  } /* while (this_run > 0) */
679  break;
680 
682  /* as this_run is limited not to wrap a frame, this also means it
683  * won't wrap the window (as the window is a multiple of 32k) */
684  rundest = &window[window_posn];
685  window_posn += this_run;
686  while (this_run > 0) {
687  if ((i = i_end - i_ptr) == 0) {
689  }
690  else {
691  if (i > this_run) i = this_run;
692  lzx->sys->copy(i_ptr, rundest, (size_t) i);
693  rundest += i;
694  i_ptr += i;
695  this_run -= i;
696  }
697  }
698  break;
699 
700  default:
701  return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */
702  }
703 
704  /* did the final match overrun our desired this_run length? */
705  if (this_run < 0) {
706  if ((unsigned int)(-this_run) > lzx->block_remaining) {
707  D(("overrun went past end of block by %d (%d remaining)",
708  -this_run, lzx->block_remaining ))
709  return lzx->error = MSPACK_ERR_DECRUNCH;
710  }
711  lzx->block_remaining -= -this_run;
712  }
713  } /* while (bytes_todo > 0) */
714 
715  /* streams don't extend over frame boundaries */
716  if ((window_posn - lzx->frame_posn) != frame_size) {
717  D(("decode beyond output frame limits! %d != %d",
718  window_posn - lzx->frame_posn, frame_size))
719  return lzx->error = MSPACK_ERR_DECRUNCH;
720  }
721 
722  /* re-align input bitstream */
723  if (bits_left > 0) ENSURE_BITS(16);
724  if (bits_left & 15) REMOVE_BITS(bits_left & 15);
725 
726  /* check that we've used all of the previous frame first */
727  if (lzx->o_ptr != lzx->o_end) {
728  D(("%ld avail bytes, new %d frame",
729  (long)(lzx->o_end - lzx->o_ptr), frame_size))
730  return lzx->error = MSPACK_ERR_DECRUNCH;
731  }
732 
733  /* does this intel block _really_ need decoding? */
734  if (lzx->intel_started && lzx->intel_filesize &&
735  (lzx->frame < 32768) && (frame_size > 10))
736  {
737  unsigned char *data = &lzx->e8_buf[0];
738  unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
739  signed int curpos = (int) lzx->offset;
740  signed int filesize = lzx->intel_filesize;
741  signed int abs_off, rel_off;
742 
743  /* copy e8 block to the e8 buffer and tweak if needed */
744  lzx->o_ptr = data;
745  lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size);
746 
747  while (data < dataend) {
748  if (*data++ != 0xE8) { curpos++; continue; }
749  abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
750  if ((abs_off >= -curpos) && (abs_off < filesize)) {
751  rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
752  data[0] = (unsigned char) rel_off;
753  data[1] = (unsigned char) (rel_off >> 8);
754  data[2] = (unsigned char) (rel_off >> 16);
755  data[3] = (unsigned char) (rel_off >> 24);
756  }
757  data += 4;
758  curpos += 5;
759  }
760  }
761  else {
762  lzx->o_ptr = &lzx->window[lzx->frame_posn];
763  }
764  lzx->o_end = &lzx->o_ptr[frame_size];
765 
766  /* write a frame */
767  i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
768  if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
769  return lzx->error = MSPACK_ERR_WRITE;
770  }
771  lzx->o_ptr += i;
772  lzx->offset += i;
773  out_bytes -= i;
774 
775  /* advance frame start position */
776  lzx->frame_posn += frame_size;
777  lzx->frame++;
778 
779  /* wrap window / frame position pointers */
780  if (window_posn == lzx->window_size) window_posn = 0;
781  if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
782 
783  } /* while (lzx->frame < end_frame) */
784 
785  if (out_bytes) {
786  D(("bytes left to output"))
787  return lzx->error = MSPACK_ERR_DECRUNCH;
788  }
789 
790  /* store local state */
791  STORE_BITS;
792  lzx->window_posn = window_posn;
793  lzx->R0 = R0;
794  lzx->R1 = R1;
795  lzx->R2 = R2;
796 
797  return MSPACK_ERR_OK;
798 }
799 
800 void lzxd_free(struct lzxd_stream *lzx) {
801  struct mspack_system *sys;
802  if (lzx) {
803  sys = lzx->sys;
804  sys->free(lzx->inbuf);
805  sys->free(lzx->window);
806  sys->free(lzx);
807  }
808 }
lzma_index ** i
Definition: index.h:629
static ut8 bytes[32]
Definition: asm_arc.c:23
#define D
Definition: block.c:38
#define LZX_NUM_CHARS
Definition: lzx.h:25
#define LZX_BLOCKTYPE_INVALID
Definition: lzx.h:26
#define LZX_MAINTREE_MAXSYMBOLS
Definition: lzx.h:38
#define LZX_BLOCKTYPE_ALIGNED
Definition: lzx.h:28
#define LZX_NUM_SECONDARY_LENGTHS
Definition: lzx.h:33
#define LZX_BLOCKTYPE_VERBATIM
Definition: lzx.h:27
#define LZX_LENGTH_MAXSYMBOLS
Definition: lzx.h:40
#define LZX_NUM_PRIMARY_LENGTHS
Definition: lzx.h:32
#define LZX_BLOCKTYPE_UNCOMPRESSED
Definition: lzx.h:29
#define LZX_FRAME_SIZE
Definition: lzx.h:46
#define LZX_MAX_MATCH
Definition: lzx.h:24
#define LZX_MIN_MATCH
Definition: lzx.h:23
static void lzxd_reset_state(struct lzxd_stream *lzx)
Definition: lzxd.c:262
int lzxd_set_reference_data(struct lzxd_stream *lzx, struct mspack_system *system, struct mspack_file *input, unsigned int length)
Definition: lzxd.c:353
static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, unsigned int first, unsigned int last)
Definition: lzxd.c:138
#define READ_LENGTHS(tbl, first, last)
Definition: lzxd.c:131
void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes)
Definition: lzxd.c:389
static const unsigned char extra_bits[36]
Definition: lzxd.c:217
struct lzxd_stream * lzxd_init(struct mspack_system *system, struct mspack_file *input, struct mspack_file *output, int window_bits, int reset_interval, int input_buffer_size, off_t output_length, char is_delta)
Definition: lzxd.c:279
void lzxd_free(struct lzxd_stream *lzx)
Definition: lzxd.c:800
int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes)
Definition: lzxd.c:393
static const unsigned int position_base[290]
Definition: lzxd.c:221
static const unsigned int position_slots[11]
Definition: lzxd.c:214
#define BUILD_TABLE(tbl)
Definition: lzxd.c:103
#define BUILD_TABLE_MAYBE_EMPTY(tbl)
Definition: lzxd.c:111
#define MSPACK_ERR_OK
Definition: mspack.h:485
#define MSPACK_ERR_WRITE
Definition: mspack.h:493
#define MSPACK_ERR_ARGS
Definition: mspack.h:487
#define MSPACK_ERR_DECRUNCH
Definition: mspack.h:507
#define MSPACK_ERR_READ
Definition: mspack.h:491
#define INIT_BITS
Definition: readbits.h:103
#define ENSURE_BITS(nbits)
Definition: readbits.h:125
#define RESTORE_BITS
Definition: readbits.h:118
#define STORE_BITS
Definition: readbits.h:111
#define REMOVE_BITS(nbits)
Definition: readbits.h:154
#define READ_BITS(val, nbits)
Definition: readbits.h:129
#define READ_IF_NEEDED
Definition: readbits.h:174
#define PEEK_BITS(nbits)
Definition: readbits.h:153
#define READ_HUFFSYM(tbl, var)
Definition: readhuff.h:34
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
@ LENGTH
Definition: inflate.h:48
voidpf void * buf
Definition: ioapi.h:138
int x
Definition: mipsasm.c:20
static int
Definition: sfsocketcall.h:114
int off_t
Definition: sftypes.h:41
#define R0(i)
Definition: sha256.c:54
#define R2(i)
Definition: sha256.c:55
off_t length
Definition: lzx.h:54
unsigned int R1
Definition: lzx.h:65
unsigned char LENGTH_len[LZX_LENGTH_MAXSYMBOLS+LZX_LENTABLE_SAFETY]
Definition: lzx.h:86
unsigned int frame
Definition: lzx.h:62
off_t offset
Definition: lzx.h:53
struct mspack_system * sys
Definition: lzx.h:49
unsigned char PRETREE_len[LZX_PRETREE_MAXSYMBOLS+LZX_LENTABLE_SAFETY]
Definition: lzx.h:84
unsigned int bit_buffer
Definition: lzx.h:81
unsigned char intel_started
Definition: lzx.h:71
unsigned int ref_data_size
Definition: lzx.h:58
unsigned char * window
Definition: lzx.h:56
unsigned char * i_ptr
Definition: lzx.h:80
unsigned int R2
Definition: lzx.h:65
struct mspack_file * output
Definition: lzx.h:51
unsigned char * o_end
Definition: lzx.h:80
struct mspack_file * input
Definition: lzx.h:50
unsigned int block_remaining
Definition: lzx.h:67
unsigned int window_size
Definition: lzx.h:57
unsigned char ALIGNED_len[LZX_ALIGNED_MAXSYMBOLS+LZX_LENTABLE_SAFETY]
Definition: lzx.h:87
unsigned int bits_left
Definition: lzx.h:81
int error
Definition: lzx.h:77
signed int intel_filesize
Definition: lzx.h:69
unsigned char e8_buf[LZX_FRAME_SIZE]
Definition: lzx.h:101
unsigned char * o_ptr
Definition: lzx.h:80
unsigned char MAINTREE_len[LZX_MAINTREE_MAXSYMBOLS+LZX_LENTABLE_SAFETY]
Definition: lzx.h:85
unsigned int reset_interval
Definition: lzx.h:63
unsigned int block_length
Definition: lzx.h:66
unsigned int R0
Definition: lzx.h:65
unsigned int frame_posn
Definition: lzx.h:61
unsigned int num_offsets
Definition: lzx.h:59
unsigned int inbuf_size
Definition: lzx.h:81
unsigned int window_posn
Definition: lzx.h:60
unsigned char LENGTH_empty
Definition: lzx.h:98
unsigned char block_type
Definition: lzx.h:72
unsigned char * i_end
Definition: lzx.h:80
unsigned char is_delta
Definition: lzx.h:75
unsigned char * inbuf
Definition: lzx.h:80
unsigned char header_read
Definition: lzx.h:73
void(* copy)(void *src, void *dest, size_t bytes)
Definition: mspack.h:444
void(* message)(struct mspack_file *file, const char *format,...)
Definition: mspack.h:407
void(* free)(void *ptr)
Definition: mspack.h:430
int(* read)(struct mspack_file *file, void *buffer, int bytes)
Definition: mspack.h:336
int(* write)(struct mspack_file *file, void *buffer, int bytes)
Definition: mspack.h:353
void *(* alloc)(struct mspack_system *self, size_t bytes)
Definition: mspack.h:421
int pos
Definition: main.c:11
struct _window window
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)
diff_output_t output
Definition: zipcmp.c:237