Rizin
unix-like reverse engineering framework and cli tools
coder.h File Reference

Compresses or uncompresses a file. More...

Go to the source code of this file.

Enumerations

enum  operation_mode { MODE_COMPRESS , MODE_DECOMPRESS , MODE_TEST , MODE_LIST }
 
enum  format_type { FORMAT_AUTO , FORMAT_XZ , FORMAT_LZMA , FORMAT_RAW }
 

Functions

void coder_set_check (lzma_check check)
 Set the integrity check type used when compressing. More...
 
void coder_set_preset (uint32_t new_preset)
 Set preset number. More...
 
void coder_set_extreme (void)
 Enable extreme mode. More...
 
void coder_add_filter (lzma_vli id, void *options)
 Add a filter to the custom filter chain. More...
 
void coder_set_compression_settings (void)
 
void coder_run (const char *filename)
 Compress or decompress the given file. More...
 
void coder_free (void)
 Free the memory allocated for the coder and kill the worker threads. More...
 

Variables

enum operation_mode opt_mode
 
enum format_type opt_format
 
bool opt_auto_adjust
 
bool opt_single_stream
 If true, stop after decoding the first stream. More...
 
uint64_t opt_block_size
 
uint64_topt_block_list
 

Detailed Description

Compresses or uncompresses a file.

Definition in file coder.h.

Enumeration Type Documentation

◆ format_type

Enumerator
FORMAT_AUTO 
FORMAT_XZ 
FORMAT_LZMA 
FORMAT_RAW 

Definition at line 22 of file coder.h.

22  {
24  FORMAT_XZ,
26  // HEADER_GZIP,
27  FORMAT_RAW,
28 };
@ FORMAT_RAW
Definition: coder.h:27
@ FORMAT_AUTO
Definition: coder.h:23
@ FORMAT_XZ
Definition: coder.h:24
@ FORMAT_LZMA
Definition: coder.h:25

◆ operation_mode

Enumerator
MODE_COMPRESS 
MODE_DECOMPRESS 
MODE_TEST 
MODE_LIST 

Definition at line 13 of file coder.h.

13  {
16  MODE_TEST,
17  MODE_LIST,
18 };
@ MODE_COMPRESS
Definition: coder.h:14
@ MODE_TEST
Definition: coder.h:16
@ MODE_LIST
Definition: coder.h:17
@ MODE_DECOMPRESS
Definition: coder.h:15

Function Documentation

◆ coder_add_filter()

void coder_add_filter ( lzma_vli  id,
void *  options 
)

Add a filter to the custom filter chain.

Definition at line 107 of file coder.c.

108 {
110  message_fatal(_("Maximum number of filters is four"));
111 
114  ++filters_count;
115 
116  // Setting a custom filter chain makes us forget the preset options.
117  // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
118  // where the custom filter chain resets the preset level back to
119  // the default 6, making the example equivalent to "xz -6e".
121 
122  return;
123 }
static uint32_t preset_number
Number of the preset (0-9)
Definition: coder.c:46
static lzma_filter filters[LZMA_FILTERS_MAX+1]
Filters needed for all encoding all formats, and also decoding in raw data.
Definition: coder.c:36
static uint32_t filters_count
Number of filters. Zero indicates that we are using a preset.
Definition: coder.c:43
#define LZMA_PRESET_DEFAULT
Default compression preset.
Definition: container.h:31
#define LZMA_FILTERS_MAX
Maximum number of filters in a chain.
Definition: filter.h:26
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
void message_fatal(const char *fmt,...)
Definition: message.c:777
int id
Definition: op.c:540
#define _(String)
Definition: opintl.h:53
void * options
Pointer to filter-specific options structure.
Definition: filter.h:63
lzma_vli id
Filter ID.
Definition: filter.h:54

References _, filters, filters_count, id, lzma_filter::id, LZMA_FILTERS_MAX, LZMA_PRESET_DEFAULT, message_fatal(), options, lzma_filter::options, and preset_number.

Referenced by parse_real().

◆ coder_free()

void coder_free ( void  )

Free the memory allocated for the coder and kill the worker threads.

Definition at line 939 of file coder.c.

940 {
941  lzma_end(&strm);
942  return;
943 }
static lzma_stream strm
Stream used to communicate with liblzma.
Definition: coder.c:33

References strm.

Referenced by main().

◆ coder_run()

void coder_run ( const char *  filename)

Compress or decompress the given file.

Definition at line 869 of file coder.c.

870 {
871  // Set and possibly print the filename for the progress message.
873 
874  // Try to open the input file.
875  file_pair *pair = io_open_src(filename);
876  if (pair == NULL)
877  return;
878 
879  // Assume that something goes wrong.
880  bool success = false;
881 
882  if (opt_mode == MODE_COMPRESS) {
883  strm.next_in = NULL;
884  strm.avail_in = 0;
885  } else {
886  // Read the first chunk of input data. This is needed
887  // to detect the input file type.
888  strm.next_in = in_buf.u8;
890  }
891 
892  if (strm.avail_in != SIZE_MAX) {
893  // Initialize the coder. This will detect the file format
894  // and, in decompression or testing mode, check the memory
895  // usage of the first Block too. This way we don't try to
896  // open the destination file if we see that coding wouldn't
897  // work at all anyway. This also avoids deleting the old
898  // "target" file if --force was used.
899  const enum coder_init_ret init_ret = coder_init(pair);
900 
901  if (init_ret != CODER_INIT_ERROR && !user_abort) {
902  // Don't open the destination file when --test
903  // is used.
904  if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
905  // Remember the current time. It is needed
906  // for progress indicator.
908 
909  // Initialize the progress indicator.
910  const bool is_passthru = init_ret
912  const uint64_t in_size
913  = pair->src_st.st_size <= 0
914  ? 0 : (uint64_t)(pair->src_st.st_size);
916  is_passthru, in_size);
917 
918  // Do the actual coding or passthru.
919  if (is_passthru)
920  success = coder_passthru(pair);
921  else
922  success = coder_normal(pair);
923 
924  message_progress_end(success);
925  }
926  }
927  }
928 
929  // Close the file pair. It needs to know if coding was successful to
930  // know if the source or target file should be unlinked.
931  io_close(pair, success);
932 
933  return;
934 }
const lzma_allocator const uint8_t size_t in_size
Definition: block.h:527
static bool coder_normal(file_pair *pair)
Compress or decompress using liblzma.
Definition: coder.c:631
static enum coder_init_ret coder_init(file_pair *pair)
Definition: coder.c:435
coder_init_ret
Return value type for coder_init().
Definition: coder.c:17
@ CODER_INIT_ERROR
Definition: coder.c:20
@ CODER_INIT_PASSTHRU
Definition: coder.c:19
static io_buf in_buf
Input and output buffers.
Definition: coder.c:39
static bool coder_passthru(file_pair *pair)
Definition: coder.c:846
enum operation_mode opt_mode
Definition: coder.c:24
#define NULL
Definition: cris-opc.c:27
bool io_open_dest(file_pair *pair)
Open the destination file.
Definition: file_io.c:991
void io_close(file_pair *pair, bool success)
Closes the file descriptors and frees possible allocated memory.
Definition: file_io.c:1052
file_pair * io_open_src(const char *src_name)
Open the source file.
Definition: file_io.c:741
size_t io_read(file_pair *pair, io_buf *buf, size_t size)
Reads from the source file to a buffer.
Definition: file_io.c:1116
#define IO_BUFFER_SIZE
Definition: file_io.h:16
const char * filename
Definition: ioapi.h:137
void message_progress_end(bool success)
Finishes the progress message if we were in verbose mode.
Definition: message.c:707
void message_filename(const char *src_name)
Set the name of the current file and possibly print it too.
Definition: message.c:232
void message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size)
Start progress info handling.
Definition: message.c:249
void mytime_set_start_time(void)
Store the time when (de)compression was started.
Definition: mytime.c:51
unsigned long uint64_t
Definition: sftypes.h:28
volatile sig_atomic_t user_abort
Definition: signals.c:16
#define SIZE_MAX
struct stat src_st
Stat of the source file.
Definition: file_io.h:69
const uint8_t * next_in
Definition: base.h:486
size_t avail_in
Definition: base.h:487
uint8_t u8[IO_BUFFER_SIZE]
Definition: file_io.h:28

References lzma_stream::avail_in, coder_init(), CODER_INIT_ERROR, CODER_INIT_PASSTHRU, coder_normal(), coder_passthru(), in_buf, in_size, IO_BUFFER_SIZE, io_close(), io_open_dest(), io_open_src(), io_read(), message_filename(), message_progress_end(), message_progress_start(), MODE_COMPRESS, MODE_TEST, mytime_set_start_time(), lzma_stream::next_in, NULL, opt_mode, SIZE_MAX, file_pair::src_st, strm, io_buf::u8, and user_abort.

Referenced by main().

◆ coder_set_check()

void coder_set_check ( lzma_check  check)

Set the integrity check type used when compressing.

Definition at line 64 of file coder.c.

65 {
66  check = new_check;
67  check_default = false;
68  return;
69 }
static lzma_check check
Integrity check type.
Definition: coder.c:49
static bool check_default
This becomes false if the –check=CHECK option is used.
Definition: coder.c:52

References check, and check_default.

Referenced by parse_real().

◆ coder_set_compression_settings()

void coder_set_compression_settings ( void  )

Definition at line 137 of file coder.c.

138 {
139  // The default check type is CRC64, but fallback to CRC32
140  // if CRC64 isn't supported by the copy of liblzma we are
141  // using. CRC32 is always supported.
142  if (check_default) {
144  if (!lzma_check_is_supported(check))
146  }
147 
148  // Options for LZMA1 or LZMA2 in case we are using a preset.
150 
151  if (filters_count == 0) {
152  // We are using a preset. This is not a good idea in raw mode
153  // except when playing around with things. Different versions
154  // of this software may use different options in presets, and
155  // thus make uncompressing the raw data difficult.
156  if (opt_format == FORMAT_RAW) {
157  // The message is shown only if warnings are allowed
158  // but the exit status isn't changed.
159  message(V_WARNING, _("Using a preset in raw mode "
160  "is discouraged."));
161  message(V_WARNING, _("The exact options of the "
162  "presets may vary between software "
163  "versions."));
164  }
165 
166  // Get the preset for LZMA1 or LZMA2.
167  if (lzma_lzma_preset(&opt_lzma, preset_number))
168  message_bug();
169 
170  // Use LZMA2 except with --format=lzma we use LZMA1.
173  filters[0].options = &opt_lzma;
174  filters_count = 1;
175  }
176 
177  // Terminate the filter options array.
179 
180  // If we are using the .lzma format, allow exactly one filter
181  // which has to be LZMA1.
182  if (opt_format == FORMAT_LZMA && (filters_count != 1
183  || filters[0].id != LZMA_FILTER_LZMA1))
184  message_fatal(_("The .lzma format supports only "
185  "the LZMA1 filter"));
186 
187  // If we are using the .xz format, make sure that there is no LZMA1
188  // filter to prevent LZMA_PROG_ERROR.
189  if (opt_format == FORMAT_XZ)
190  for (size_t i = 0; i < filters_count; ++i)
191  if (filters[i].id == LZMA_FILTER_LZMA1)
192  message_fatal(_("LZMA1 cannot be used "
193  "with the .xz format"));
194 
195  // Print the selected filter chain.
197 
198  // The --flush-timeout option requires LZMA_SYNC_FLUSH support
199  // from the filter chain. Currently threaded encoder doesn't support
200  // LZMA_SYNC_FLUSH so single-threaded mode must be used.
201  if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
202  for (size_t i = 0; i < filters_count; ++i) {
203  switch (filters[i].id) {
204  case LZMA_FILTER_LZMA2:
205  case LZMA_FILTER_DELTA:
206  break;
207 
208  default:
209  message_fatal(_("The filter chain is "
210  "incompatible with --flush-timeout"));
211  }
212  }
213 
214  if (hardware_threads_get() > 1) {
215  message(V_WARNING, _("Switching to single-threaded "
216  "mode due to --flush-timeout"));
218  }
219  }
220 
221  // Get the memory usage. Note that if --format=raw was used,
222  // we can be decompressing.
223  const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
224  uint64_t memory_usage = UINT64_MAX;
225  if (opt_mode == MODE_COMPRESS) {
226 #ifdef HAVE_ENCODERS
227 # ifdef MYTHREAD_ENABLED
228  if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
229  mt_options.threads = hardware_threads_get();
230  mt_options.block_size = opt_block_size;
231  mt_options.check = check;
232  memory_usage = lzma_stream_encoder_mt_memusage(
233  &mt_options);
234  if (memory_usage != UINT64_MAX)
235  message(V_DEBUG, _("Using up to %" PRIu32
236  " threads."),
237  mt_options.threads);
238  } else
239 # endif
240  {
241  memory_usage = lzma_raw_encoder_memusage(filters);
242  }
243 #endif
244  } else {
245 #ifdef HAVE_DECODERS
246  memory_usage = lzma_raw_decoder_memusage(filters);
247 #endif
248  }
249 
250  if (memory_usage == UINT64_MAX)
251  message_fatal(_("Unsupported filter chain or filter options"));
252 
253  // Print memory usage info before possible dictionary
254  // size auto-adjusting.
255  //
256  // NOTE: If only encoder support was built, we cannot show the
257  // what the decoder memory usage will be.
258  message_mem_needed(V_DEBUG, memory_usage);
259 #ifdef HAVE_DECODERS
260  if (opt_mode == MODE_COMPRESS) {
261  const uint64_t decmem = lzma_raw_decoder_memusage(filters);
262  if (decmem != UINT64_MAX)
263  message(V_DEBUG, _("Decompression will need "
264  "%s MiB of memory."), uint64_to_str(
265  round_up_to_mib(decmem), 0));
266  }
267 #endif
268 
269  if (memory_usage <= memory_limit)
270  return;
271 
272  // If --no-adjust was used or we didn't find LZMA1 or
273  // LZMA2 as the last filter, give an error immediately.
274  // --format=raw implies --no-adjust.
276  memlimit_too_small(memory_usage);
277 
279 
280 #ifdef HAVE_ENCODERS
281 # ifdef MYTHREAD_ENABLED
282  if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
283  // Try to reduce the number of threads before
284  // adjusting the compression settings down.
285  do {
286  // FIXME? The real single-threaded mode has
287  // lower memory usage, but it's not comparable
288  // because it doesn't write the size info
289  // into Block Headers.
290  if (--mt_options.threads == 0)
291  memlimit_too_small(memory_usage);
292 
293  memory_usage = lzma_stream_encoder_mt_memusage(
294  &mt_options);
295  if (memory_usage == UINT64_MAX)
296  message_bug();
297 
298  } while (memory_usage > memory_limit);
299 
300  message(V_WARNING, _("Adjusted the number of threads "
301  "from %s to %s to not exceed "
302  "the memory usage limit of %s MiB"),
304  uint64_to_str(mt_options.threads, 1),
306  memory_limit), 2));
307  }
308 # endif
309 
310  if (memory_usage <= memory_limit)
311  return;
312 
313  // Look for the last filter if it is LZMA2 or LZMA1, so we can make
314  // it use less RAM. With other filters we don't know what to do.
315  size_t i = 0;
316  while (filters[i].id != LZMA_FILTER_LZMA2
317  && filters[i].id != LZMA_FILTER_LZMA1) {
318  if (filters[i].id == LZMA_VLI_UNKNOWN)
319  memlimit_too_small(memory_usage);
320 
321  ++i;
322  }
323 
324  // Decrease the dictionary size until we meet the memory
325  // usage limit. First round down to full mebibytes.
327  const uint32_t orig_dict_size = opt->dict_size;
328  opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
329  while (true) {
330  // If it is below 1 MiB, auto-adjusting failed. We could be
331  // more sophisticated and scale it down even more, but let's
332  // see if many complain about this version.
333  //
334  // FIXME: Displays the scaled memory usage instead
335  // of the original.
336  if (opt->dict_size < (UINT32_C(1) << 20))
337  memlimit_too_small(memory_usage);
338 
339  memory_usage = lzma_raw_encoder_memusage(filters);
340  if (memory_usage == UINT64_MAX)
341  message_bug();
342 
343  // Accept it if it is low enough.
344  if (memory_usage <= memory_limit)
345  break;
346 
347  // Otherwise 1 MiB down and try again. I hope this
348  // isn't too slow method for cases where the original
349  // dict_size is very big.
350  opt->dict_size -= UINT32_C(1) << 20;
351  }
352 
353  // Tell the user that we decreased the dictionary size.
354  message(V_WARNING, _("Adjusted LZMA%c dictionary size "
355  "from %s MiB to %s MiB to not exceed "
356  "the memory usage limit of %s MiB"),
358  ? '2' : '1',
359  uint64_to_str(orig_dict_size >> 20, 0),
360  uint64_to_str(opt->dict_size >> 20, 1),
361  uint64_to_str(round_up_to_mib(memory_limit), 2));
362 #endif
363 
364  return;
365 }
@ LZMA_CHECK_CRC32
Definition: check.h:35
@ LZMA_CHECK_CRC64
Definition: check.h:42
lzma_index ** i
Definition: index.h:629
enum format_type opt_format
Definition: coder.c:25
bool opt_auto_adjust
Definition: coder.c:26
uint64_t opt_block_size
Definition: coder.c:28
#define LZMA_FILTER_DELTA
Filter ID.
Definition: delta.h:25
void hardware_threads_set(uint32_t n)
Set the maximum number of worker threads.
Definition: hardware.c:31
uint32_t hardware_threads_get(void)
Get the maximum number of worker threads.
Definition: hardware.c:55
uint64_t hardware_memlimit_get(enum operation_mode mode)
Get the current memory usage limit for compression or decompression.
Definition: hardware.c:112
#define PRIu32
Definition: macros.h:20
#define LZMA_FILTER_LZMA2
LZMA2 Filter ID.
Definition: lzma12.h:40
#define LZMA_FILTER_LZMA1
LZMA1 Filter ID.
Definition: lzma12.h:30
assert(limit<=UINT32_MAX/2)
void message_mem_needed(enum message_verbosity v, uint64_t memusage)
Display how much memory was needed and how much the limit was.
Definition: message.c:846
void message_bug(void)
Definition: message.c:789
void message_filters_show(enum message_verbosity v, const lzma_filter *filters)
Print the filter chain.
Definition: message.c:1050
@ V_DEBUG
Very verbose.
Definition: message.h:19
@ V_WARNING
Errors and warnings.
Definition: message.h:17
uint64_t opt_flush_timeout
Number of milliseconds to between LZMA_SYNC_FLUSHes.
Definition: mytime.c:19
unsigned int uint32_t
Definition: sftypes.h:29
#define UINT32_C(val)
#define UINT64_MAX
Options specific to the LZMA1 and LZMA2 filters.
Definition: lzma12.h:185
uint32_t dict_size
Dictionary size in bytes.
Definition: lzma12.h:217
char * message
Definition: main.c:12
const char * uint64_to_str(uint64_t value, uint32_t slot)
Convert uint64_t to a string.
Definition: util.c:171
uint64_t round_up_to_mib(uint64_t n)
Round an integer up to the next full MiB and convert to MiB.
Definition: util.c:139
static lzma_options_lzma opt_lzma
#define LZMA_VLI_UNKNOWN
VLI value to denote that the value is unknown.
Definition: vli.h:39

Referenced by args_parse().

◆ coder_set_extreme()

void coder_set_extreme ( void  )

Enable extreme mode.

Definition at line 98 of file coder.c.

99 {
102  return;
103 }
static void forget_filter_chain(void)
Definition: coder.c:73
#define LZMA_PRESET_EXTREME
Extreme compression preset.
Definition: container.h:60

References forget_filter_chain(), LZMA_PRESET_EXTREME, and preset_number.

Referenced by parse_real().

◆ coder_set_preset()

void coder_set_preset ( uint32_t  new_preset)

Set preset number.

Definition at line 88 of file coder.c.

89 {
91  preset_number |= new_preset;
93  return;
94 }
#define LZMA_PRESET_LEVEL_MASK
Mask for preset level.
Definition: container.h:40

References forget_filter_chain(), LZMA_PRESET_LEVEL_MASK, and preset_number.

Referenced by parse_real().

Variable Documentation

◆ opt_auto_adjust

bool opt_auto_adjust
extern

If true, the compression settings are automatically adjusted down if they exceed the memory usage limit.

Definition at line 26 of file coder.c.

Referenced by coder_init(), and parse_real().

◆ opt_block_list

uint64_t* opt_block_list
extern

This is non-NULL if –block-list was used. This contains the Block sizes as an array that is terminated with 0.

Definition at line 29 of file coder.c.

Referenced by args_free(), coder_normal(), parse_block_list(), and split_block().

◆ opt_block_size

uint64_t opt_block_size
extern

If non-zero, start a new .xz Block after every opt_block_size bytes of input. This has an effect only when compressing to the .xz format.

Definition at line 28 of file coder.c.

Referenced by coder_init(), coder_normal(), parse_real(), and split_block().

◆ opt_format

enum format_type opt_format
extern

File format to use when encoding or what format(s) to accept when decoding. This is a global because it's needed also in suffix.c. This is set in args.c.

Definition at line 1 of file coder.c.

Referenced by args_parse(), coder_init(), coder_normal(), compressed_name(), list_file(), parse_real(), and uncompressed_name().

◆ opt_mode

enum operation_mode opt_mode
extern

Operation mode of the command line tool. This is set in args.c and read in several files.

Definition at line 1 of file coder.c.

Referenced by args_parse(), coder_init(), coder_normal(), coder_run(), coder_write_output(), io_open_dest_real(), io_open_src_real(), main(), message_filename(), message_mem_needed(), mytime_get_flush_timeout(), parse_real(), print_filename(), progress_pos(), and suffix_get_dest_name().

◆ opt_single_stream

bool opt_single_stream
extern

If true, stop after decoding the first stream.

Definition at line 27 of file coder.c.

Referenced by coder_normal(), and parse_real().