Rizin
unix-like reverse engineering framework and cli tools
zip_algorithm_xz.c
Go to the documentation of this file.
1 /*
2  zip_algorithm_xz.c -- LZMA/XZ (de)compression routines
3  Bazed on zip_algorithm_deflate.c -- deflate (de)compression routines
4  Copyright (C) 2017-2021 Dieter Baron and Thomas Klausner
5 
6  This file is part of libzip, a library to manipulate ZIP archives.
7  The authors can be contacted at <info@libzip.org>
8 
9  Redistribution and use in source and binary forms, with or without
10  modification, are permitted provided that the following conditions
11  are met:
12  1. Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  2. Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in
16  the documentation and/or other materials provided with the
17  distribution.
18  3. The names of the authors may not be used to endorse or promote
19  products derived from this software without specific prior
20  written permission.
21 
22  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
23  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
26  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
28  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
32  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include "zipint.h"
36 
37 #include <limits.h>
38 #include <lzma.h>
39 #include <stdlib.h>
40 #include <zlib.h>
41 
43 
44 #define HEADER_BYTES_ZIP 9
45 #define HEADER_MAGIC_LENGTH 4
46 #define HEADER_MAGIC1_OFFSET 0
47 #define HEADER_MAGIC2_OFFSET 2
48 #define HEADER_SIZE_OFFSET 9
49 #define HEADER_SIZE_LENGTH 8
50 #define HEADER_PARAMETERS_LENGTH 5
51 #define HEADER_LZMA_ALONE_LENGTH (HEADER_PARAMETERS_LENGTH + HEADER_SIZE_LENGTH)
52 
53 struct ctx {
55  bool compress;
57  bool end_of_input;
60  /* header member is used for converting from zip to "lzma alone"
61  * format
62  *
63  * "lzma alone" file format starts with:
64  * 5 bytes lzma parameters
65  * 8 bytes uncompressed size
66  * compressed data
67  *
68  * zip archive on-disk format starts with
69  * 4 bytes magic (first two bytes vary, e.g. 0x0914 or 0x1002, next bytes are 0x0500)
70  * 5 bytes lzma parameters
71  * compressed data
72  *
73  * we read the data into a header of the form
74  * 4 bytes magic
75  * 5 bytes lzma parameters
76  * 8 bytes uncompressed size
77  */
82 };
83 
84 
85 static zip_uint64_t
87  /*
88  According to https://sourceforge.net/p/sevenzip/discussion/45797/thread/b6bd62f8/
89 
90  1) you can use
91  outSize = 1.10 * originalSize + 64 KB.
92  in most cases outSize is less then 1.02 from originalSize.
93  2) You can try LZMA2, where
94  outSize can be = 1.001 * originalSize + 1 KB.
95  */
96  /* 13 bytes added for lzma alone header */
97  zip_uint64_t compressed_size = (zip_uint64_t)((double)uncompressed_size * 1.1) + 64 * 1024 + 13;
98 
100  return ZIP_UINT64_MAX;
101  }
102  return compressed_size;
103 }
104 
105 
106 static void *
107 allocate(bool compress, int compression_flags, zip_error_t *error, zip_uint16_t method) {
108  struct ctx *ctx;
109 
110  if ((ctx = (struct ctx *)malloc(sizeof(*ctx))) == NULL) {
112  return NULL;
113  }
114 
115  ctx->error = error;
116  ctx->compress = compress;
117  if (compression_flags < 0 || compression_flags > 9) {
118  ctx->compression_flags = 6; /* default value */
119  } else {
121  }
123  ctx->end_of_input = false;
124  memset(ctx->header, 0, sizeof(ctx->header));
126  if (ZIP_CM_LZMA) {
128  }
129  else {
130  ctx->header_state = DONE;
131  }
132  memset(&ctx->zstr, 0, sizeof(ctx->zstr));
133  ctx->method = method;
134  return ctx;
135 }
136 
137 
138 static void *
140  return allocate(true, compression_flags, error, method);
141 }
142 
143 
144 static void *
146  return allocate(false, compression_flags, error, method);
147 }
148 
149 
150 static void
151 deallocate(void *ud) {
152  struct ctx *ctx = (struct ctx *)ud;
153  free(ctx);
154 }
155 
156 
157 static zip_uint16_t
159  struct ctx *ctx = (struct ctx *)ud;
160 
161  if (!ctx->compress) {
162  return 0;
163  }
164 
165  if (ctx->method == ZIP_CM_LZMA) {
166  /* liblzma always returns an EOS/EOPM marker, see
167  * https://sourceforge.net/p/lzmautils/discussion/708858/thread/84c5dbb9/#a5e4/3764 */
168  return 1 << 1;
169  }
170  return 0;
171 }
172 
173 static int
175  switch (ret) {
176  case LZMA_DATA_ERROR:
178  return ZIP_ER_COMPRESSED_DATA;
179 
180  case LZMA_MEM_ERROR:
181  return ZIP_ER_MEMORY;
182 
183  case LZMA_OPTIONS_ERROR:
184  return ZIP_ER_INVAL;
185 
186  default:
187  return ZIP_ER_INTERNAL;
188  }
189 }
190 
191 
192 static bool
193 start(void *ud, zip_stat_t *st, zip_file_attributes_t *attributes) {
194  struct ctx *ctx = (struct ctx *)ud;
195  lzma_ret ret;
196 
198  lzma_lzma_preset(&opt_lzma, ctx->compression_flags);
199  lzma_filter filters[] = {
201  {.id = LZMA_VLI_UNKNOWN, .options = NULL},
202  };
203 
204  ctx->zstr.avail_in = 0;
205  ctx->zstr.next_in = NULL;
206  ctx->zstr.avail_out = 0;
207  ctx->zstr.next_out = NULL;
208 
209  if (ctx->compress) {
210  if (ctx->method == ZIP_CM_LZMA)
211  ret = lzma_alone_encoder(&ctx->zstr, filters[0].options);
212  else
213  ret = lzma_stream_encoder(&ctx->zstr, filters, LZMA_CHECK_CRC64);
214  }
215  else {
216  if (ctx->method == ZIP_CM_LZMA)
217  ret = lzma_alone_decoder(&ctx->zstr, UINT64_MAX);
218  else
219  ret = lzma_stream_decoder(&ctx->zstr, UINT64_MAX, LZMA_CONCATENATED);
220  }
221 
222  if (ret != LZMA_OK) {
223  zip_error_set(ctx->error, map_error(ret), 0);
224  return false;
225  }
226 
227  /* If general purpose bits 1 & 2 are both zero, write real uncompressed size in header. */
228  if ((attributes->valid & ZIP_FILE_ATTRIBUTES_GENERAL_PURPOSE_BIT_FLAGS) && (attributes->general_purpose_bit_mask & 0x6) == 0x6 && (attributes->general_purpose_bit_flags & 0x06) == 0 && (st->valid & ZIP_STAT_SIZE)) {
229  ctx->uncompresssed_size = st->size;
230  }
231  else {
233  }
234 
235  return true;
236 }
237 
238 
239 static bool
240 end(void *ud) {
241  struct ctx *ctx = (struct ctx *)ud;
242 
243  lzma_end(&ctx->zstr);
244  return true;
245 }
246 
247 
248 static bool
250  struct ctx *ctx = (struct ctx *)ud;
251 
252  if (length > UINT_MAX || ctx->zstr.avail_in > 0) {
254  return false;
255  }
256 
257  /* For decompression of LZMA1: Have we read the full "lzma alone" header yet? */
259  /* if not, get more of the data */
261  memcpy(ctx->header + ctx->header_bytes_offset, data, got);
262  ctx->header_bytes_offset += got;
263  length -= got;
264  data += got;
265  /* Do we have a complete header now? */
267  Bytef empty_buffer[1];
269  /* check magic */
270  if (ctx->header[HEADER_MAGIC2_OFFSET] != 0x05 || ctx->header[HEADER_MAGIC2_OFFSET + 1] != 0x00) {
271  /* magic does not match */
273  return false;
274  }
275  /* set size of uncompressed data in "lzma alone" header to "unknown" */
278  return false;
279  }
282  /* Feed header into "lzma alone" decoder, for
283  * initialization; this should not produce output. */
284  ctx->zstr.next_in = (void *)(ctx->header + HEADER_MAGIC_LENGTH);
285  ctx->zstr.avail_in = HEADER_LZMA_ALONE_LENGTH;
286  ctx->zstr.total_in = 0;
287  ctx->zstr.next_out = empty_buffer;
288  ctx->zstr.avail_out = sizeof(*empty_buffer);
289  ctx->zstr.total_out = 0;
290  /* this just initializes the decoder and does not produce output, so it consumes the complete header */
291  if (lzma_code(&ctx->zstr, LZMA_RUN) != LZMA_OK || ctx->zstr.total_out > 0) {
293  return false;
294  }
295  ctx->header_state = DONE;
296  }
297  }
298  ctx->zstr.avail_in = (uInt)length;
299  ctx->zstr.next_in = (Bytef *)data;
300 
301  return true;
302 }
303 
304 
305 static void
306 end_of_input(void *ud) {
307  struct ctx *ctx = (struct ctx *)ud;
308 
309  ctx->end_of_input = true;
310 }
311 
312 
315  struct ctx *ctx = (struct ctx *)ud;
316  lzma_ret ret;
317  /* for compression of LZMA1 */
318  if (ctx->method == ZIP_CM_LZMA && ctx->compress) {
319  if (ctx->header_state == INCOMPLETE) {
320  /* write magic to output buffer */
321  ctx->header[0] = 0x09;
322  ctx->header[1] = 0x14;
323  ctx->header[2] = 0x05;
324  ctx->header[3] = 0x00;
325  /* generate lzma parameters into output buffer */
326  ctx->zstr.avail_out = HEADER_LZMA_ALONE_LENGTH;
327  ctx->zstr.next_out = ctx->header + HEADER_MAGIC_LENGTH;
328  ret = lzma_code(&ctx->zstr, LZMA_RUN);
329  if (ret != LZMA_OK || ctx->zstr.avail_out != 0) {
330  /* assume that the whole header will be provided with the first call to lzma_code */
331  return ZIP_COMPRESSION_ERROR;
332  }
334  }
335  if (ctx->header_state == OUTPUT) {
336  /* write header */
338  memcpy(data, ctx->header + ctx->header_bytes_offset, write_len);
339  ctx->header_bytes_offset += write_len;
340  *length = write_len;
342  ctx->header_state = DONE;
343  }
344  return ZIP_COMPRESSION_OK;
345  }
346  }
347 
348  ctx->zstr.avail_out = (uInt)ZIP_MIN(UINT_MAX, *length);
349  ctx->zstr.next_out = (Bytef *)data;
350 
351  ret = lzma_code(&ctx->zstr, ctx->end_of_input ? LZMA_FINISH : LZMA_RUN);
352  *length = *length - ctx->zstr.avail_out;
353 
354  switch (ret) {
355  case LZMA_OK:
356  return ZIP_COMPRESSION_OK;
357 
358  case LZMA_STREAM_END:
359  return ZIP_COMPRESSION_END;
360 
361  case LZMA_BUF_ERROR:
362  if (ctx->zstr.avail_in == 0) {
364  }
365 
366  /* fallthrough */
367  default:
368  zip_error_set(ctx->error, map_error(ret), 0);
369  return ZIP_COMPRESSION_ERROR;
370  }
371 }
372 
373 /* Version Required should be set to 63 (6.3) because this compression
374  method was only defined in appnote.txt version 6.3.8, but Winzip
375  does not unpack it if the value is not 20. */
376 
377 /* clang-format off */
378 
382  deallocate,
384  20,
385  start,
386  end,
387  input,
388  end_of_input,
389  process
390 };
391 
392 
396  deallocate,
398  20,
399  start,
400  end,
401  input,
402  end_of_input,
403  process
404 };
405 
406 /* clang-format on */
@ LZMA_CHECK_CRC64
Definition: check.h:42
struct buffer buffer
int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)
Definition: compress.c:68
#define LZMA_CONCATENATED
Definition: container.h:515
const lzma_filter * filters
Definition: container.h:315
#define LZMA_PRESET_EXTREME
Extreme compression preset.
Definition: container.h:60
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Definition: sflib.h:133
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
return memset(p, 0, total)
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
#define ZIP_ER_INTERNAL
Definition: zip.h:125
ZIP_EXTERN void zip_error_set(zip_error_t *_Nullable, int, int)
Definition: zip_error.c:126
#define ZIP_ER_COMPRESSED_DATA
Definition: zip.h:136
#define ZIP_ER_MEMORY
Definition: zip.h:119
#define ZIP_STAT_SIZE
Definition: zip.h:292
#define ZIP_CM_LZMA
Definition: zip.h:163
#define ZIP_FILE_ATTRIBUTES_GENERAL_PURPOSE_BIT_FLAGS
Definition: zip.h:333
#define ZIP_ER_INVAL
Definition: zip.h:123
void * malloc(size_t size)
Definition: malloc.c:123
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
#define LZMA_FILTER_LZMA2
LZMA2 Filter ID.
Definition: lzma12.h:40
#define LZMA_FILTER_LZMA1
LZMA1 Filter ID.
Definition: lzma12.h:30
The public API of liblzma data compression library.
#define UINT64_MAX
Definition: buffer.h:15
lzma_stream zstr
zip_uint8_t header_bytes_offset
zip_uint64_t uncompresssed_size
zip_uint16_t method
bool end_of_input
zip_uint8_t header[HEADER_MAGIC_LENGTH+HEADER_LZMA_ALONE_LENGTH]
bz_stream zstr
int compression_flags
zip_uint32_t compression_flags
enum header_state header_state
bool compress
zip_error_t * error
Filter options.
Definition: filter.h:43
void * options
Pointer to filter-specific options structure.
Definition: filter.h:63
lzma_vli id
Filter ID.
Definition: filter.h:54
Options specific to the LZMA1 and LZMA2 filters.
Definition: lzma12.h:185
Passing data to and from liblzma.
Definition: base.h:485
Definition: zip.h:284
zip_uint16_t general_purpose_bit_mask
Definition: zip.h:326
zip_uint16_t general_purpose_bit_flags
Definition: zip.h:325
zip_uint64_t valid
Definition: zip.h:319
Definition: zip.h:300
zip_uint64_t valid
Definition: zip.h:301
zip_uint64_t size
Definition: zip.h:304
#define UINT_MAX
Definition: md5.h:55
uint64_t compressed_size
Definition: list.c:105
uint64_t uncompressed_size
Definition: list.c:106
static lzma_options_lzma opt_lzma
void error(const char *msg)
Definition: untgz.c:593
#define LZMA_VLI_UNKNOWN
VLI value to denote that the value is unknown.
Definition: vli.h:39
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_MEM_ERROR
Cannot allocate memory.
Definition: base.h:128
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_UNSUPPORTED_CHECK
Cannot calculate the integrity check.
Definition: base.h:90
@ LZMA_BUF_ERROR
No progress is possible.
Definition: base.h:191
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
@ LZMA_FINISH
Finish the coding operation.
Definition: base.h:328
@ LZMA_RUN
Continue coding.
Definition: base.h:251
unsigned int uInt
Definition: zconf.h:393
Byte FAR Bytef
Definition: zconf.h:400
static void * allocate(bool compress, int compression_flags, zip_error_t *error, zip_uint16_t method)
static zip_uint64_t maximum_compressed_size(zip_uint64_t uncompressed_size)
#define HEADER_MAGIC2_OFFSET
zip_compression_algorithm_t zip_algorithm_xz_compress
static void deallocate(void *ud)
#define HEADER_SIZE_OFFSET
#define HEADER_LZMA_ALONE_LENGTH
static int map_error(lzma_ret ret)
#define HEADER_MAGIC_LENGTH
static void * decompress_allocate(zip_uint16_t method, int compression_flags, zip_error_t *error)
header_state
@ INCOMPLETE
@ OUTPUT
@ DONE
static void * compress_allocate(zip_uint16_t method, int compression_flags, zip_error_t *error)
zip_compression_algorithm_t zip_algorithm_xz_decompress
#define HEADER_BYTES_ZIP
static bool start(void *ud, zip_stat_t *st, zip_file_attributes_t *attributes)
#define HEADER_SIZE_LENGTH
static bool end(void *ud)
static zip_uint16_t general_purpose_bit_flags(void *ud)
static void end_of_input(void *ud)
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)
static zip_compression_status_t process(void *ud, zip_uint8_t *data, zip_uint64_t *length)
zip_buffer_t * _zip_buffer_new(zip_uint8_t *data, zip_uint64_t size)
Definition: zip_buffer.c:146
void _zip_buffer_free(zip_buffer_t *buffer)
Definition: zip_buffer.c:46
int _zip_buffer_put_64(zip_buffer_t *buffer, zip_uint64_t i)
Definition: zip_buffer.c:262
uint64_t zip_uint64_t
Definition: zipconf.h:39
uint32_t zip_uint32_t
Definition: zipconf.h:37
uint8_t zip_uint8_t
Definition: zipconf.h:33
uint16_t zip_uint16_t
Definition: zipconf.h:35
#define ZIP_UINT64_MAX
Definition: zipconf.h:55
enum zip_compression_status zip_compression_status_t
Definition: zipint.h:122
#define ZIP_MIN(a, b)
Definition: zipint.h:473
@ ZIP_COMPRESSION_NEED_DATA
Definition: zipint.h:119
@ ZIP_COMPRESSION_ERROR
Definition: zipint.h:118
@ ZIP_COMPRESSION_END
Definition: zipint.h:117
@ ZIP_COMPRESSION_OK
Definition: zipint.h:116