Rizin
unix-like reverse engineering framework and cli tools
block_buffer_encoder.c
Go to the documentation of this file.
1 //
5 //
6 // Author: Lasse Collin
7 //
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
10 //
12 
13 #include "block_buffer_encoder.h"
14 #include "block_encoder.h"
15 #include "filter_encoder.h"
16 #include "lzma2_encoder.h"
17 #include "check.h"
18 
19 
28 #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
29  + LZMA_CHECK_SIZE_MAX + 3) & ~3)
30 
31 
32 static uint64_t
34 {
35  // Prevent integer overflow in overhead calculation.
37  return 0;
38 
39  // Calculate the exact overhead of the LZMA2 headers: Round
40  // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
41  // multiply by the size of per-chunk header, and add one byte for
42  // the end marker.
43  const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
46 
47  // Catch the possible integer overflow.
48  if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
49  return 0;
50 
51  return uncompressed_size + overhead;
52 }
53 
54 
55 extern uint64_t
57 {
58  // If the data doesn't compress, we always use uncompressed
59  // LZMA2 chunks.
61  if (lzma2_size == 0)
62  return 0;
63 
64  // Take Block Padding into account.
65  lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
66 
67  // No risk of integer overflow because lzma2_bound() already takes
68  // into account the size of the headers in the Block.
69  return HEADERS_BOUND + lzma2_size;
70 }
71 
72 
73 extern LZMA_API(size_t)
74 lzma_block_buffer_bound(size_t uncompressed_size)
75 {
77 
78 #if SIZE_MAX < UINT64_MAX
79  // Catch the possible integer overflow on 32-bit systems.
80  if (ret > SIZE_MAX)
81  return 0;
82 #endif
83 
84  return ret;
85 }
86 
87 
88 static lzma_ret
90  uint8_t *out, size_t *out_pos, size_t out_size)
91 {
92  // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
93  // all, but LZMA2 always requires a dictionary, so use the minimum
94  // value to minimize memory usage of the decoder.
95  lzma_options_lzma lzma2 = {
97  };
98 
101  filters[0].options = &lzma2;
103 
104  // Set the above filter options to *block temporarily so that we can
105  // encode the Block Header.
106  lzma_filter *filters_orig = block->filters;
107  block->filters = filters;
108 
109  if (lzma_block_header_size(block) != LZMA_OK) {
110  block->filters = filters_orig;
111  return LZMA_PROG_ERROR;
112  }
113 
114  // Check that there's enough output space. The caller has already
115  // set block->compressed_size to what lzma2_bound() has returned,
116  // so we can reuse that value. We know that compressed_size is a
117  // known valid VLI and header_size is a small value so their sum
118  // will never overflow.
120  if (out_size - *out_pos
121  < block->header_size + block->compressed_size) {
122  block->filters = filters_orig;
123  return LZMA_BUF_ERROR;
124  }
125 
126  if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
127  block->filters = filters_orig;
128  return LZMA_PROG_ERROR;
129  }
130 
131  block->filters = filters_orig;
132  *out_pos += block->header_size;
133 
134  // Encode the data using LZMA2 uncompressed chunks.
135  size_t in_pos = 0;
136  uint8_t control = 0x01; // Dictionary reset
137 
138  while (in_pos < in_size) {
139  // Control byte: Indicate uncompressed chunk, of which
140  // the first resets the dictionary.
141  out[(*out_pos)++] = control;
142  control = 0x02; // No dictionary reset
143 
144  // Size of the uncompressed chunk
145  const size_t copy_size
147  out[(*out_pos)++] = (copy_size - 1) >> 8;
148  out[(*out_pos)++] = (copy_size - 1) & 0xFF;
149 
150  // The actual data
151  assert(*out_pos + copy_size <= out_size);
152  memcpy(out + *out_pos, in + in_pos, copy_size);
153 
154  in_pos += copy_size;
155  *out_pos += copy_size;
156  }
157 
158  // End marker
159  out[(*out_pos)++] = 0x00;
160  assert(*out_pos <= out_size);
161 
162  return LZMA_OK;
163 }
164 
165 
166 static lzma_ret
168  const uint8_t *in, size_t in_size,
169  uint8_t *out, size_t *out_pos, size_t out_size)
170 {
171  // Find out the size of the Block Header.
172  return_if_error(lzma_block_header_size(block));
173 
174  // Reserve space for the Block Header and skip it for now.
175  if (out_size - *out_pos <= block->header_size)
176  return LZMA_BUF_ERROR;
177 
178  const size_t out_start = *out_pos;
179  *out_pos += block->header_size;
180 
181  // Limit out_size so that we stop encoding if the output would grow
182  // bigger than what uncompressed Block would be.
183  if (out_size - *out_pos > block->compressed_size)
184  out_size = *out_pos + block->compressed_size;
185 
186  // TODO: In many common cases this could be optimized to use
187  // significantly less memory.
190  &raw_encoder, allocator, block->filters);
191 
192  if (ret == LZMA_OK) {
193  size_t in_pos = 0;
194  ret = raw_encoder.code(raw_encoder.coder, allocator,
195  in, &in_pos, in_size, out, out_pos, out_size,
196  LZMA_FINISH);
197  }
198 
199  // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
200  lzma_next_end(&raw_encoder, allocator);
201 
202  if (ret == LZMA_STREAM_END) {
203  // Compression was successful. Write the Block Header.
204  block->compressed_size
205  = *out_pos - (out_start + block->header_size);
206  ret = lzma_block_header_encode(block, out + out_start);
207  if (ret != LZMA_OK)
208  ret = LZMA_PROG_ERROR;
209 
210  } else if (ret == LZMA_OK) {
211  // Output buffer became full.
212  ret = LZMA_BUF_ERROR;
213  }
214 
215  // Reset *out_pos if something went wrong.
216  if (ret != LZMA_OK)
217  *out_pos = out_start;
218 
219  return ret;
220 }
221 
222 
223 static lzma_ret
225  const uint8_t *in, size_t in_size,
226  uint8_t *out, size_t *out_pos, size_t out_size,
227  bool try_to_compress)
228 {
229  // Validate the arguments.
230  if (block == NULL || (in == NULL && in_size != 0) || out == NULL
231  || out_pos == NULL || *out_pos > out_size)
232  return LZMA_PROG_ERROR;
233 
234  // The contents of the structure may depend on the version so
235  // check the version before validating the contents of *block.
236  if (block->version > 1)
237  return LZMA_OPTIONS_ERROR;
238 
239  if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
240  || (try_to_compress && block->filters == NULL))
241  return LZMA_PROG_ERROR;
242 
243  if (!lzma_check_is_supported(block->check))
244  return LZMA_UNSUPPORTED_CHECK;
245 
246  // Size of a Block has to be a multiple of four, so limit the size
247  // here already. This way we don't need to check it again when adding
248  // Block Padding.
249  out_size -= (out_size - *out_pos) & 3;
250 
251  // Get the size of the Check field.
252  const size_t check_size = lzma_check_size(block->check);
253  assert(check_size != UINT32_MAX);
254 
255  // Reserve space for the Check field.
256  if (out_size - *out_pos <= check_size)
257  return LZMA_BUF_ERROR;
258 
259  out_size -= check_size;
260 
261  // Initialize block->uncompressed_size and calculate the worst-case
262  // value for block->compressed_size.
263  block->uncompressed_size = in_size;
265  if (block->compressed_size == 0)
266  return LZMA_DATA_ERROR;
267 
268  // Do the actual compression.
269  lzma_ret ret = LZMA_BUF_ERROR;
270  if (try_to_compress)
271  ret = block_encode_normal(block, allocator,
272  in, in_size, out, out_pos, out_size);
273 
274  if (ret != LZMA_OK) {
275  // If the error was something else than output buffer
276  // becoming full, return the error now.
277  if (ret != LZMA_BUF_ERROR)
278  return ret;
279 
280  // The data was uncompressible (at least with the options
281  // given to us) or the output buffer was too small. Use the
282  // uncompressed chunks of LZMA2 to wrap the data into a valid
283  // Block. If we haven't been given enough output space, even
284  // this may fail.
286  out, out_pos, out_size));
287  }
288 
289  assert(*out_pos <= out_size);
290 
291  // Block Padding. No buffer overflow here, because we already adjusted
292  // out_size so that (out_size - out_start) is a multiple of four.
293  // Thus, if the buffer is full, the loop body can never run.
294  for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
295  assert(*out_pos < out_size);
296  out[(*out_pos)++] = 0x00;
297  }
298 
299  // If there's no Check field, we are done now.
300  if (check_size > 0) {
301  // Calculate the integrity check. We reserved space for
302  // the Check field earlier so we don't need to check for
303  // available output space here.
305  lzma_check_init(&check, block->check);
306  lzma_check_update(&check, block->check, in, in_size);
307  lzma_check_finish(&check, block->check);
308 
309  memcpy(block->raw_check, check.buffer.u8, check_size);
310  memcpy(out + *out_pos, check.buffer.u8, check_size);
311  *out_pos += check_size;
312  }
313 
314  return LZMA_OK;
315 }
316 
317 
319 lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
320  const uint8_t *in, size_t in_size,
321  uint8_t *out, size_t *out_pos, size_t out_size)
322 {
323  return block_buffer_encode(block, allocator,
324  in, in_size, out, out_pos, out_size, true);
325 }
326 
327 
329 lzma_block_uncomp_encode(lzma_block *block,
330  const uint8_t *in, size_t in_size,
331  uint8_t *out, size_t *out_pos, size_t out_size)
332 {
333  // It won't allocate any memory from heap so no need
334  // for lzma_allocator.
335  return block_buffer_encode(block, NULL,
336  in, in_size, out, out_pos, out_size, false);
337 }
#define LZMA_CHECK_ID_MAX
Maximum valid Check ID.
Definition: check.h:68
lzma_index ** i
Definition: index.h:629
const lzma_allocator const uint8_t size_t uint8_t size_t * out_pos
Definition: block.h:528
const lzma_allocator const uint8_t size_t * in_pos
Definition: block.h:579
const lzma_allocator const uint8_t size_t in_size
Definition: block.h:527
const lzma_allocator * allocator
Definition: block.h:377
const lzma_allocator const uint8_t * in
Definition: block.h:527
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528
uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size)
LZMA_API(size_t)
Calculate maximum output size for single-call Block encoding.
static lzma_ret block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size)
static uint64_t lzma2_bound(uint64_t uncompressed_size)
#define HEADERS_BOUND
static lzma_ret block_encode_normal(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size)
static lzma_ret block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size, bool try_to_compress)
Single-call .xz Block encoder.
Encodes .xz Blocks.
#define COMPRESSED_SIZE_MAX
Biggest Compressed Size value that the Block encoder supports.
Definition: block_encoder.h:40
void lzma_check_finish(lzma_check_state *check, lzma_check type)
Finish the check calculation and store the result to check->buffer.u8.
Definition: check.c:148
void lzma_check_update(lzma_check_state *check, lzma_check type, const uint8_t *buf, size_t size)
Update the check state.
Definition: check.c:117
void lzma_check_init(lzma_check_state *check, lzma_check type)
Initialize *check depending on type.
Definition: check.c:84
lzma_check check
Definition: container.h:292
const lzma_filter * filters
Definition: container.h:315
#define NULL
Definition: cris-opc.c:27
lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options)
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
#define LZMA_FILTER_LZMA2
LZMA2 Filter ID.
Definition: lzma12.h:40
#define LZMA_DICT_SIZE_MIN
Definition: lzma12.h:218
LZMA2 encoder.
#define LZMA2_HEADER_UNCOMPRESSED
Size of a header for uncompressed chunk.
Definition: lzma2_encoder.h:30
#define LZMA2_CHUNK_MAX
Maximum number of bytes of actual data per chunk (no headers)
Definition: lzma2_encoder.h:21
assert(limit<=UINT32_MAX/2)
unsigned long uint64_t
Definition: sftypes.h:28
unsigned char uint8_t
Definition: sftypes.h:31
#define SIZE_MAX
#define UINT64_C(val)
#define UINT32_MAX
Custom functions for memory handling.
Definition: base.h:372
Options for the Block and Block Header encoders and decoders.
Definition: block.h:30
lzma_vli uncompressed_size
Uncompressed Size in bytes.
Definition: block.h:172
uint8_t raw_check[LZMA_CHECK_SIZE_MAX]
Raw value stored in the Check field.
Definition: block.h:217
lzma_filter * filters
Array of filters.
Definition: block.h:200
uint32_t header_size
Size of the Block Header field.
Definition: block.h:72
lzma_check check
Type of integrity Check.
Definition: block.h:93
lzma_vli compressed_size
Size of the Compressed Data in bytes.
Definition: block.h:148
uint32_t version
Block format version.
Definition: block.h:52
Structure to hold internal state of the check being calculated.
Definition: check.h:81
Filter options.
Definition: filter.h:43
void * options
Pointer to filter-specific options structure.
Definition: filter.h:63
lzma_vli id
Filter ID.
Definition: filter.h:54
Hold data and function pointers of the next filter in the chain.
Definition: common.h:135
lzma_code_function code
Pointer to function to do the actual coding.
Definition: common.h:150
void * coder
Pointer to coder-specific data.
Definition: common.h:137
Options specific to the LZMA1 and LZMA2 filters.
Definition: lzma12.h:185
uint32_t dict_size
Dictionary size in bytes.
Definition: lzma12.h:217
#define LZMA_NEXT_CODER_INIT
Macro to initialize lzma_next_coder structure.
Definition: common.h:180
#define return_if_error(expr)
Return if expression doesn't evaluate to LZMA_OK.
Definition: common.h:278
uint64_t uncompressed_size
Definition: list.c:106
#define my_min(x, y)
Definition: sysdefs.h:185
control
#define LZMA_VLI_UNKNOWN
VLI value to denote that the value is unknown.
Definition: vli.h:39
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_PROG_ERROR
Programming error.
Definition: base.h:218
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_UNSUPPORTED_CHECK
Cannot calculate the integrity check.
Definition: base.h:90
@ LZMA_BUF_ERROR
No progress is possible.
Definition: base.h:191
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
@ LZMA_FINISH
Finish the coding operation.
Definition: base.h:328
void lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator)
Definition: common.c:145