Rizin
unix-like reverse engineering framework and cli tools
coder.c File Reference

Compresses or uncompresses a file. More...

#include "private.h"

Go to the source code of this file.

Enumerations

enum  coder_init_ret { CODER_INIT_NORMAL , CODER_INIT_PASSTHRU , CODER_INIT_ERROR }
 Return value type for coder_init(). More...
 

Functions

void coder_set_check (lzma_check new_check)
 Set the integrity check type used when compressing. More...
 
static void forget_filter_chain (void)
 
void coder_set_preset (uint32_t new_preset)
 Set preset number. More...
 
void coder_set_extreme (void)
 Enable extreme mode. More...
 
void coder_add_filter (lzma_vli id, void *options)
 Add a filter to the custom filter chain. More...
 
static void lzma_attribute ((__noreturn__))
 
void coder_set_compression_settings (void)
 
static enum coder_init_ret coder_init (file_pair *pair)
 
static void split_block (uint64_t *block_remaining, uint64_t *next_block_remaining, size_t *list_pos)
 
static bool coder_write_output (file_pair *pair)
 
static bool coder_normal (file_pair *pair)
 Compress or decompress using liblzma. More...
 
static bool coder_passthru (file_pair *pair)
 
void coder_run (const char *filename)
 Compress or decompress the given file. More...
 
void coder_free (void)
 Free the memory allocated for the coder and kill the worker threads. More...
 

Variables

enum operation_mode opt_mode = MODE_COMPRESS
 
enum format_type opt_format = FORMAT_AUTO
 
bool opt_auto_adjust = true
 
bool opt_single_stream = false
 If true, stop after decoding the first stream. More...
 
uint64_t opt_block_size = 0
 
uint64_topt_block_list = NULL
 
static lzma_stream strm = LZMA_STREAM_INIT
 Stream used to communicate with liblzma. More...
 
static lzma_filter filters [LZMA_FILTERS_MAX+1]
 Filters needed for all encoding all formats, and also decoding in raw data. More...
 
static io_buf in_buf
 Input and output buffers. More...
 
static io_buf out_buf
 
static uint32_t filters_count = 0
 Number of filters. Zero indicates that we are using a preset. More...
 
static uint32_t preset_number = LZMA_PRESET_DEFAULT
 Number of the preset (0-9) More...
 
static lzma_check check
 Integrity check type. More...
 
static bool check_default = true
 This becomes false if the –check=CHECK option is used. More...
 

Detailed Description

Compresses or uncompresses a file.

Definition in file coder.c.

Enumeration Type Documentation

◆ coder_init_ret

Return value type for coder_init().

Enumerator
CODER_INIT_NORMAL 
CODER_INIT_PASSTHRU 
CODER_INIT_ERROR 

Definition at line 17 of file coder.c.

17  {
21 };
@ CODER_INIT_ERROR
Definition: coder.c:20
@ CODER_INIT_NORMAL
Definition: coder.c:18
@ CODER_INIT_PASSTHRU
Definition: coder.c:19

Function Documentation

◆ coder_add_filter()

void coder_add_filter ( lzma_vli  id,
void *  options 
)

Add a filter to the custom filter chain.

Definition at line 107 of file coder.c.

108 {
110  message_fatal(_("Maximum number of filters is four"));
111 
114  ++filters_count;
115 
116  // Setting a custom filter chain makes us forget the preset options.
117  // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
118  // where the custom filter chain resets the preset level back to
119  // the default 6, making the example equivalent to "xz -6e".
121 
122  return;
123 }
static uint32_t preset_number
Number of the preset (0-9)
Definition: coder.c:46
static lzma_filter filters[LZMA_FILTERS_MAX+1]
Filters needed for all encoding all formats, and also decoding in raw data.
Definition: coder.c:36
static uint32_t filters_count
Number of filters. Zero indicates that we are using a preset.
Definition: coder.c:43
#define LZMA_PRESET_DEFAULT
Default compression preset.
Definition: container.h:31
#define LZMA_FILTERS_MAX
Maximum number of filters in a chain.
Definition: filter.h:26
static const char struct stat static buf struct stat static buf static vhangup int options
Definition: sflib.h:145
void message_fatal(const char *fmt,...)
Definition: message.c:777
int id
Definition: op.c:540
#define _(String)
Definition: opintl.h:53
void * options
Pointer to filter-specific options structure.
Definition: filter.h:63
lzma_vli id
Filter ID.
Definition: filter.h:54

References _, filters, filters_count, id, lzma_filter::id, LZMA_FILTERS_MAX, LZMA_PRESET_DEFAULT, message_fatal(), options, lzma_filter::options, and preset_number.

Referenced by parse_real().

◆ coder_free()

void coder_free ( void  )

Free the memory allocated for the coder and kill the worker threads.

Definition at line 939 of file coder.c.

940 {
941  lzma_end(&strm);
942  return;
943 }
static lzma_stream strm
Stream used to communicate with liblzma.
Definition: coder.c:33

References strm.

Referenced by main().

◆ coder_init()

static enum coder_init_ret coder_init ( file_pair pair)
static

Detect the input file type (for now, this done only when decompressing), and initialize an appropriate coder. Return value indicates if a normal liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru mode should be used (CODER_INIT_PASSTHRU), or if an error occurred (CODER_INIT_ERROR).

Definition at line 137 of file coder.c.

436 {
438 
439  if (opt_mode == MODE_COMPRESS) {
440 #ifdef HAVE_ENCODERS
441  switch (opt_format) {
442  case FORMAT_AUTO:
443  // args.c ensures this.
444  assert(0);
445  break;
446 
447  case FORMAT_XZ:
448 # ifdef MYTHREAD_ENABLED
449  if (hardware_threads_get() > 1)
450  ret = lzma_stream_encoder_mt(
451  &strm, &mt_options);
452  else
453 # endif
454  ret = lzma_stream_encoder(
455  &strm, filters, check);
456  break;
457 
458  case FORMAT_LZMA:
459  ret = lzma_alone_encoder(&strm, filters[0].options);
460  break;
461 
462  case FORMAT_RAW:
463  ret = lzma_raw_encoder(&strm, filters);
464  break;
465  }
466 #endif
467  } else {
468 #ifdef HAVE_DECODERS
469  uint32_t flags = 0;
470 
471  // It seems silly to warn about unsupported check if the
472  // check won't be verified anyway due to --ignore-check.
473  if (opt_ignore_check)
475  else
477 
478  if (!opt_single_stream)
480 
481  // We abuse FORMAT_AUTO to indicate unknown file format,
482  // for which we may consider passthru mode.
483  enum format_type init_format = FORMAT_AUTO;
484 
485  switch (opt_format) {
486  case FORMAT_AUTO:
487  if (is_format_xz())
488  init_format = FORMAT_XZ;
489  else if (is_format_lzma())
490  init_format = FORMAT_LZMA;
491  break;
492 
493  case FORMAT_XZ:
494  if (is_format_xz())
495  init_format = FORMAT_XZ;
496  break;
497 
498  case FORMAT_LZMA:
499  if (is_format_lzma())
500  init_format = FORMAT_LZMA;
501  break;
502 
503  case FORMAT_RAW:
504  init_format = FORMAT_RAW;
505  break;
506  }
507 
508  switch (init_format) {
509  case FORMAT_AUTO:
510  // Unknown file format. If --decompress --stdout
511  // --force have been given, then we copy the input
512  // as is to stdout. Checking for MODE_DECOMPRESS
513  // is needed, because we don't want to do use
514  // passthru mode with --test.
516  && opt_stdout && opt_force)
517  return CODER_INIT_PASSTHRU;
518 
519  ret = LZMA_FORMAT_ERROR;
520  break;
521 
522  case FORMAT_XZ:
523  ret = lzma_stream_decoder(&strm,
526  break;
527 
528  case FORMAT_LZMA:
529  ret = lzma_alone_decoder(&strm,
531  MODE_DECOMPRESS));
532  break;
533 
534  case FORMAT_RAW:
535  // Memory usage has already been checked in
536  // coder_set_compression_settings().
537  ret = lzma_raw_decoder(&strm, filters);
538  break;
539  }
540 
541  // Try to decode the headers. This will catch too low
542  // memory usage limit in case it happens in the first
543  // Block of the first Stream, which is where it very
544  // probably will happen if it is going to happen.
545  if (ret == LZMA_OK && init_format != FORMAT_RAW) {
546  strm.next_out = NULL;
547  strm.avail_out = 0;
548  ret = lzma_code(&strm, LZMA_RUN);
549  }
550 #endif
551  }
552 
553  if (ret != LZMA_OK) {
554  message_error("%s: %s", pair->src_name, message_strm(ret));
555  if (ret == LZMA_MEMLIMIT_ERROR)
556  message_mem_needed(V_ERROR, lzma_memusage(&strm));
557 
558  return CODER_INIT_ERROR;
559  }
560 
561  return CODER_INIT_NORMAL;
562 }
bool opt_stdout
Definition: args.c:21
bool opt_force
Definition: args.c:22
bool opt_ignore_check
Definition: args.c:25
static lzma_check check
Integrity check type.
Definition: coder.c:49
enum format_type opt_format
Definition: coder.c:25
bool opt_single_stream
If true, stop after decoding the first stream.
Definition: coder.c:27
enum operation_mode opt_mode
Definition: coder.c:24
@ MODE_COMPRESS
Definition: coder.h:14
@ MODE_DECOMPRESS
Definition: coder.h:15
format_type
Definition: coder.h:22
@ FORMAT_RAW
Definition: coder.h:27
@ FORMAT_AUTO
Definition: coder.h:23
@ FORMAT_XZ
Definition: coder.h:24
@ FORMAT_LZMA
Definition: coder.h:25
#define LZMA_IGNORE_CHECK
Definition: container.h:498
#define LZMA_CONCATENATED
Definition: container.h:515
#define LZMA_TELL_UNSUPPORTED_CHECK
Definition: container.h:466
#define NULL
Definition: cris-opc.c:27
uint32_t hardware_threads_get(void)
Get the maximum number of worker threads.
Definition: hardware.c:55
uint64_t hardware_memlimit_get(enum operation_mode mode)
Get the current memory usage limit for compression or decompression.
Definition: hardware.c:112
assert(limit<=UINT32_MAX/2)
void message_mem_needed(enum message_verbosity v, uint64_t memusage)
Display how much memory was needed and how much the limit was.
Definition: message.c:846
void message_error(const char *fmt,...)
Definition: message.c:764
const char * message_strm(lzma_ret code)
Convert lzma_ret to a string.
Definition: message.c:803
@ V_ERROR
Only error messages.
Definition: message.h:16
static struct sockaddr static addrlen static backlog const void static flags void flags
Definition: sfsocketcall.h:123
unsigned int uint32_t
Definition: sftypes.h:29
const char * src_name
Definition: file_io.h:37
uint8_t * next_out
Definition: base.h:490
size_t avail_out
Definition: base.h:491
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_PROG_ERROR
Programming error.
Definition: base.h:218
@ LZMA_FORMAT_ERROR
Memory usage limit was reached.
Definition: base.h:150
@ LZMA_MEMLIMIT_ERROR
Definition: base.h:140
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
@ LZMA_RUN
Continue coding.
Definition: base.h:251

References _, assert(), lzma_mt::block_size, lzma_mt::check, check, check_default, lzma_options_lzma::dict_size, filters, filters_count, FORMAT_LZMA, FORMAT_RAW, FORMAT_XZ, hardware_memlimit_get(), hardware_threads_get(), hardware_threads_set(), i, lzma_filter::id, LZMA_CHECK_CRC32, LZMA_CHECK_CRC64, LZMA_FILTER_DELTA, LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA2, LZMA_VLI_UNKNOWN, message, message_bug(), message_fatal(), message_filters_show(), message_mem_needed(), MODE_COMPRESS, opt_auto_adjust, opt_block_size, opt_flush_timeout, opt_format, opt_lzma, opt_mode, lzma_filter::options, preset_number, PRIu32, round_up_to_mib(), lzma_mt::threads, UINT32_C, UINT64_MAX, uint64_to_str(), V_DEBUG, and V_WARNING.

Referenced by coder_run().

◆ coder_normal()

static bool coder_normal ( file_pair pair)
static

Compress or decompress using liblzma.

Definition at line 631 of file coder.c.

632 {
633  // Encoder needs to know when we have given all the input to it.
634  // The decoders need to know it too when we are using
635  // LZMA_CONCATENATED. We need to check for src_eof here, because
636  // the first input chunk has been already read if decompressing,
637  // and that may have been the only chunk we will read.
639 
640  lzma_ret ret;
641 
642  // Assume that something goes wrong.
643  bool success = false;
644 
645  // block_remaining indicates how many input bytes to encode before
646  // finishing the current .xz Block. The Block size is set with
647  // --block-size=SIZE and --block-list. They have an effect only when
648  // compressing to the .xz format. If block_remaining == UINT64_MAX,
649  // only a single block is created.
650  uint64_t block_remaining = UINT64_MAX;
651 
652  // next_block_remaining for when we are in single-threaded mode and
653  // the Block in --block-list is larger than the --block-size=SIZE.
654  uint64_t next_block_remaining = 0;
655 
656  // Position in opt_block_list. Unused if --block-list wasn't used.
657  size_t list_pos = 0;
658 
659  // Handle --block-size for single-threaded mode and the first step
660  // of --block-list.
662  // --block-size doesn't do anything here in threaded mode,
663  // because the threaded encoder will take care of splitting
664  // to fixed-sized Blocks.
665  if (hardware_threads_get() == 1 && opt_block_size > 0)
666  block_remaining = opt_block_size;
667 
668  // If --block-list was used, start with the first size.
669  //
670  // For threaded case, --block-size specifies how big Blocks
671  // the encoder needs to be prepared to create at maximum
672  // and --block-list will simultaneously cause new Blocks
673  // to be started at specified intervals. To keep things
674  // logical, the same is done in single-threaded mode. The
675  // output is still not identical because in single-threaded
676  // mode the size info isn't written into Block Headers.
677  if (opt_block_list != NULL) {
678  if (block_remaining < opt_block_list[list_pos]) {
680  next_block_remaining = opt_block_list[list_pos]
681  - block_remaining;
682  } else {
683  block_remaining = opt_block_list[list_pos];
684  }
685  }
686  }
687 
690 
691  while (!user_abort) {
692  // Fill the input buffer if it is empty and we aren't
693  // flushing or finishing.
694  if (strm.avail_in == 0 && action == LZMA_RUN) {
695  strm.next_in = in_buf.u8;
696  strm.avail_in = io_read(pair, &in_buf,
697  my_min(block_remaining,
698  IO_BUFFER_SIZE));
699 
700  if (strm.avail_in == SIZE_MAX)
701  break;
702 
703  if (pair->src_eof) {
705 
706  } else if (block_remaining != UINT64_MAX) {
707  // Start a new Block after every
708  // opt_block_size bytes of input.
709  block_remaining -= strm.avail_in;
710  if (block_remaining == 0)
712  }
713 
714  if (action == LZMA_RUN && pair->flush_needed)
716  }
717 
718  // Let liblzma do the actual work.
719  ret = lzma_code(&strm, action);
720 
721  // Write out if the output buffer became full.
722  if (strm.avail_out == 0) {
723  if (coder_write_output(pair))
724  break;
725  }
726 
727  if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
728  || action == LZMA_FULL_BARRIER)) {
729  if (action == LZMA_SYNC_FLUSH) {
730  // Flushing completed. Write the pending data
731  // out immediately so that the reading side
732  // can decompress everything compressed so far.
733  if (coder_write_output(pair))
734  break;
735 
736  // Mark that we haven't seen any new input
737  // since the previous flush.
738  pair->src_has_seen_input = false;
739  pair->flush_needed = false;
740  } else {
741  // Start a new Block after LZMA_FULL_BARRIER.
742  if (opt_block_list == NULL) {
744  assert(opt_block_size > 0);
745  block_remaining = opt_block_size;
746  } else {
747  split_block(&block_remaining,
748  &next_block_remaining,
749  &list_pos);
750  }
751  }
752 
753  // Start a new Block after LZMA_FULL_FLUSH or continue
754  // the same block after LZMA_SYNC_FLUSH.
755  action = LZMA_RUN;
756 
757  } else if (ret != LZMA_OK) {
758  // Determine if the return value indicates that we
759  // won't continue coding.
760  const bool stop = ret != LZMA_NO_CHECK
761  && ret != LZMA_UNSUPPORTED_CHECK;
762 
763  if (stop) {
764  // Write the remaining bytes even if something
765  // went wrong, because that way the user gets
766  // as much data as possible, which can be good
767  // when trying to get at least some useful
768  // data out of damaged files.
769  if (coder_write_output(pair))
770  break;
771  }
772 
773  if (ret == LZMA_STREAM_END) {
774  if (opt_single_stream) {
776  success = true;
777  break;
778  }
779 
780  // Check that there is no trailing garbage.
781  // This is needed for LZMA_Alone and raw
782  // streams.
783  if (strm.avail_in == 0 && !pair->src_eof) {
784  // Try reading one more byte.
785  // Hopefully we don't get any more
786  // input, and thus pair->src_eof
787  // becomes true.
789  pair, &in_buf, 1);
790  if (strm.avail_in == SIZE_MAX)
791  break;
792 
793  assert(strm.avail_in == 0
794  || strm.avail_in == 1);
795  }
796 
797  if (strm.avail_in == 0) {
798  assert(pair->src_eof);
799  success = true;
800  break;
801  }
802 
803  // We hadn't reached the end of the file.
804  ret = LZMA_DATA_ERROR;
805  assert(stop);
806  }
807 
808  // If we get here and stop is true, something went
809  // wrong and we print an error. Otherwise it's just
810  // a warning and coding can continue.
811  if (stop) {
812  message_error("%s: %s", pair->src_name,
813  message_strm(ret));
814  } else {
815  message_warning("%s: %s", pair->src_name,
816  message_strm(ret));
817 
818  // When compressing, all possible errors set
819  // stop to true.
821  }
822 
823  if (ret == LZMA_MEMLIMIT_ERROR) {
824  // Display how much memory it would have
825  // actually needed.
827  lzma_memusage(&strm));
828  }
829 
830  if (stop)
831  break;
832  }
833 
834  // Show progress information under certain conditions.
836  }
837 
838  return success;
839 }
static io_buf out_buf
Definition: coder.c:40
uint64_t * opt_block_list
Definition: coder.c:29
static void split_block(uint64_t *block_remaining, uint64_t *next_block_remaining, size_t *list_pos)
Definition: coder.c:571
static bool coder_write_output(file_pair *pair)
Definition: coder.c:616
static io_buf in_buf
Input and output buffers.
Definition: coder.c:39
uint64_t opt_block_size
Definition: coder.c:28
void io_fix_src_pos(file_pair *pair, size_t rewind_size)
Fix the position in src_fd.
Definition: file_io.c:1101
size_t io_read(file_pair *pair, io_buf *buf, size_t size)
Reads from the source file to a buffer.
Definition: file_io.c:1116
#define IO_BUFFER_SIZE
Definition: file_io.h:16
void message_warning(const char *fmt,...)
Definition: message.c:751
void message_progress_update(void)
Definition: message.c:545
unsigned long uint64_t
Definition: sftypes.h:28
volatile sig_atomic_t user_abort
Definition: signals.c:16
#define UINT64_MAX
#define SIZE_MAX
bool flush_needed
For –flush-timeout: True when flushing is needed.
Definition: file_io.h:57
bool src_has_seen_input
Definition: file_io.h:54
bool src_eof
True once end of the source file has been detected.
Definition: file_io.h:50
const uint8_t * next_in
Definition: base.h:486
size_t avail_in
Definition: base.h:487
#define my_min(x, y)
Definition: sysdefs.h:185
uint8_t u8[IO_BUFFER_SIZE]
Definition: file_io.h:28
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_UNSUPPORTED_CHECK
Cannot calculate the integrity check.
Definition: base.h:90
@ LZMA_NO_CHECK
Input stream has no integrity check.
Definition: base.h:75
lzma_action
The ‘action’ argument for lzma_code()
Definition: base.h:250
@ LZMA_SYNC_FLUSH
Make all the input available at output.
Definition: base.h:265
@ LZMA_FINISH
Finish the coding operation.
Definition: base.h:328
@ LZMA_FULL_BARRIER
Finish encoding of the current Block.
Definition: base.h:305

References test-lz4-speed::action, assert(), lzma_stream::avail_in, lzma_stream::avail_out, coder_write_output(), file_pair::flush_needed, FORMAT_XZ, hardware_threads_get(), in_buf, IO_BUFFER_SIZE, io_fix_src_pos(), io_read(), LZMA_DATA_ERROR, LZMA_FINISH, LZMA_FULL_BARRIER, LZMA_MEMLIMIT_ERROR, LZMA_NO_CHECK, LZMA_OK, LZMA_RUN, LZMA_STREAM_END, LZMA_SYNC_FLUSH, LZMA_UNSUPPORTED_CHECK, message_error(), message_mem_needed(), message_progress_update(), message_strm(), message_warning(), MODE_COMPRESS, my_min, lzma_stream::next_in, lzma_stream::next_out, NULL, opt_block_list, opt_block_size, opt_format, opt_mode, opt_single_stream, out_buf, SIZE_MAX, split_block(), file_pair::src_eof, file_pair::src_has_seen_input, file_pair::src_name, strm, io_buf::u8, UINT64_MAX, user_abort, and V_ERROR.

Referenced by coder_run().

◆ coder_passthru()

static bool coder_passthru ( file_pair pair)
static

Copy from input file to output file without processing the data in any way. This is used only when trying to decompress unrecognized files with –decompress –stdout –force, so the output is always stdout.

Definition at line 846 of file coder.c.

847 {
848  while (strm.avail_in != 0) {
849  if (user_abort)
850  return false;
851 
852  if (io_write(pair, &in_buf, strm.avail_in))
853  return false;
854 
858 
860  if (strm.avail_in == SIZE_MAX)
861  return false;
862  }
863 
864  return true;
865 }
bool io_write(file_pair *pair, const io_buf *buf, size_t size)
Writes a buffer to the destination file.
Definition: file_io.c:1275
uint64_t total_in
Definition: base.h:488
uint64_t total_out
Definition: base.h:492

References lzma_stream::avail_in, in_buf, IO_BUFFER_SIZE, io_read(), io_write(), message_progress_update(), SIZE_MAX, strm, lzma_stream::total_in, lzma_stream::total_out, and user_abort.

Referenced by coder_run().

◆ coder_run()

void coder_run ( const char *  filename)

Compress or decompress the given file.

Definition at line 869 of file coder.c.

870 {
871  // Set and possibly print the filename for the progress message.
873 
874  // Try to open the input file.
875  file_pair *pair = io_open_src(filename);
876  if (pair == NULL)
877  return;
878 
879  // Assume that something goes wrong.
880  bool success = false;
881 
882  if (opt_mode == MODE_COMPRESS) {
883  strm.next_in = NULL;
884  strm.avail_in = 0;
885  } else {
886  // Read the first chunk of input data. This is needed
887  // to detect the input file type.
888  strm.next_in = in_buf.u8;
890  }
891 
892  if (strm.avail_in != SIZE_MAX) {
893  // Initialize the coder. This will detect the file format
894  // and, in decompression or testing mode, check the memory
895  // usage of the first Block too. This way we don't try to
896  // open the destination file if we see that coding wouldn't
897  // work at all anyway. This also avoids deleting the old
898  // "target" file if --force was used.
899  const enum coder_init_ret init_ret = coder_init(pair);
900 
901  if (init_ret != CODER_INIT_ERROR && !user_abort) {
902  // Don't open the destination file when --test
903  // is used.
904  if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
905  // Remember the current time. It is needed
906  // for progress indicator.
908 
909  // Initialize the progress indicator.
910  const bool is_passthru = init_ret
912  const uint64_t in_size
913  = pair->src_st.st_size <= 0
914  ? 0 : (uint64_t)(pair->src_st.st_size);
916  is_passthru, in_size);
917 
918  // Do the actual coding or passthru.
919  if (is_passthru)
920  success = coder_passthru(pair);
921  else
922  success = coder_normal(pair);
923 
924  message_progress_end(success);
925  }
926  }
927  }
928 
929  // Close the file pair. It needs to know if coding was successful to
930  // know if the source or target file should be unlinked.
931  io_close(pair, success);
932 
933  return;
934 }
const lzma_allocator const uint8_t size_t in_size
Definition: block.h:527
static bool coder_normal(file_pair *pair)
Compress or decompress using liblzma.
Definition: coder.c:631
static enum coder_init_ret coder_init(file_pair *pair)
Definition: coder.c:435
coder_init_ret
Return value type for coder_init().
Definition: coder.c:17
static bool coder_passthru(file_pair *pair)
Definition: coder.c:846
@ MODE_TEST
Definition: coder.h:16
bool io_open_dest(file_pair *pair)
Open the destination file.
Definition: file_io.c:991
void io_close(file_pair *pair, bool success)
Closes the file descriptors and frees possible allocated memory.
Definition: file_io.c:1052
file_pair * io_open_src(const char *src_name)
Open the source file.
Definition: file_io.c:741
const char * filename
Definition: ioapi.h:137
void message_progress_end(bool success)
Finishes the progress message if we were in verbose mode.
Definition: message.c:707
void message_filename(const char *src_name)
Set the name of the current file and possibly print it too.
Definition: message.c:232
void message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size)
Start progress info handling.
Definition: message.c:249
void mytime_set_start_time(void)
Store the time when (de)compression was started.
Definition: mytime.c:51
struct stat src_st
Stat of the source file.
Definition: file_io.h:69

References lzma_stream::avail_in, coder_init(), CODER_INIT_ERROR, CODER_INIT_PASSTHRU, coder_normal(), coder_passthru(), in_buf, in_size, IO_BUFFER_SIZE, io_close(), io_open_dest(), io_open_src(), io_read(), message_filename(), message_progress_end(), message_progress_start(), MODE_COMPRESS, MODE_TEST, mytime_set_start_time(), lzma_stream::next_in, NULL, opt_mode, SIZE_MAX, file_pair::src_st, strm, io_buf::u8, and user_abort.

Referenced by main().

◆ coder_set_check()

void coder_set_check ( lzma_check  new_check)

Set the integrity check type used when compressing.

Definition at line 64 of file coder.c.

65 {
66  check = new_check;
67  check_default = false;
68  return;
69 }
static bool check_default
This becomes false if the –check=CHECK option is used.
Definition: coder.c:52

References check, and check_default.

Referenced by parse_real().

◆ coder_set_compression_settings()

void coder_set_compression_settings ( void  )

Definition at line 137 of file coder.c.

138 {
139  // The default check type is CRC64, but fallback to CRC32
140  // if CRC64 isn't supported by the copy of liblzma we are
141  // using. CRC32 is always supported.
142  if (check_default) {
144  if (!lzma_check_is_supported(check))
146  }
147 
148  // Options for LZMA1 or LZMA2 in case we are using a preset.
150 
151  if (filters_count == 0) {
152  // We are using a preset. This is not a good idea in raw mode
153  // except when playing around with things. Different versions
154  // of this software may use different options in presets, and
155  // thus make uncompressing the raw data difficult.
156  if (opt_format == FORMAT_RAW) {
157  // The message is shown only if warnings are allowed
158  // but the exit status isn't changed.
159  message(V_WARNING, _("Using a preset in raw mode "
160  "is discouraged."));
161  message(V_WARNING, _("The exact options of the "
162  "presets may vary between software "
163  "versions."));
164  }
165 
166  // Get the preset for LZMA1 or LZMA2.
167  if (lzma_lzma_preset(&opt_lzma, preset_number))
168  message_bug();
169 
170  // Use LZMA2 except with --format=lzma we use LZMA1.
173  filters[0].options = &opt_lzma;
174  filters_count = 1;
175  }
176 
177  // Terminate the filter options array.
179 
180  // If we are using the .lzma format, allow exactly one filter
181  // which has to be LZMA1.
182  if (opt_format == FORMAT_LZMA && (filters_count != 1
183  || filters[0].id != LZMA_FILTER_LZMA1))
184  message_fatal(_("The .lzma format supports only "
185  "the LZMA1 filter"));
186 
187  // If we are using the .xz format, make sure that there is no LZMA1
188  // filter to prevent LZMA_PROG_ERROR.
189  if (opt_format == FORMAT_XZ)
190  for (size_t i = 0; i < filters_count; ++i)
191  if (filters[i].id == LZMA_FILTER_LZMA1)
192  message_fatal(_("LZMA1 cannot be used "
193  "with the .xz format"));
194 
195  // Print the selected filter chain.
197 
198  // The --flush-timeout option requires LZMA_SYNC_FLUSH support
199  // from the filter chain. Currently threaded encoder doesn't support
200  // LZMA_SYNC_FLUSH so single-threaded mode must be used.
201  if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
202  for (size_t i = 0; i < filters_count; ++i) {
203  switch (filters[i].id) {
204  case LZMA_FILTER_LZMA2:
205  case LZMA_FILTER_DELTA:
206  break;
207 
208  default:
209  message_fatal(_("The filter chain is "
210  "incompatible with --flush-timeout"));
211  }
212  }
213 
214  if (hardware_threads_get() > 1) {
215  message(V_WARNING, _("Switching to single-threaded "
216  "mode due to --flush-timeout"));
218  }
219  }
220 
221  // Get the memory usage. Note that if --format=raw was used,
222  // we can be decompressing.
223  const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
224  uint64_t memory_usage = UINT64_MAX;
225  if (opt_mode == MODE_COMPRESS) {
226 #ifdef HAVE_ENCODERS
227 # ifdef MYTHREAD_ENABLED
228  if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
229  mt_options.threads = hardware_threads_get();
230  mt_options.block_size = opt_block_size;
231  mt_options.check = check;
232  memory_usage = lzma_stream_encoder_mt_memusage(
233  &mt_options);
234  if (memory_usage != UINT64_MAX)
235  message(V_DEBUG, _("Using up to %" PRIu32
236  " threads."),
237  mt_options.threads);
238  } else
239 # endif
240  {
241  memory_usage = lzma_raw_encoder_memusage(filters);
242  }
243 #endif
244  } else {
245 #ifdef HAVE_DECODERS
246  memory_usage = lzma_raw_decoder_memusage(filters);
247 #endif
248  }
249 
250  if (memory_usage == UINT64_MAX)
251  message_fatal(_("Unsupported filter chain or filter options"));
252 
253  // Print memory usage info before possible dictionary
254  // size auto-adjusting.
255  //
256  // NOTE: If only encoder support was built, we cannot show the
257  // what the decoder memory usage will be.
258  message_mem_needed(V_DEBUG, memory_usage);
259 #ifdef HAVE_DECODERS
260  if (opt_mode == MODE_COMPRESS) {
261  const uint64_t decmem = lzma_raw_decoder_memusage(filters);
262  if (decmem != UINT64_MAX)
263  message(V_DEBUG, _("Decompression will need "
264  "%s MiB of memory."), uint64_to_str(
265  round_up_to_mib(decmem), 0));
266  }
267 #endif
268 
269  if (memory_usage <= memory_limit)
270  return;
271 
272  // If --no-adjust was used or we didn't find LZMA1 or
273  // LZMA2 as the last filter, give an error immediately.
274  // --format=raw implies --no-adjust.
276  memlimit_too_small(memory_usage);
277 
279 
280 #ifdef HAVE_ENCODERS
281 # ifdef MYTHREAD_ENABLED
282  if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
283  // Try to reduce the number of threads before
284  // adjusting the compression settings down.
285  do {
286  // FIXME? The real single-threaded mode has
287  // lower memory usage, but it's not comparable
288  // because it doesn't write the size info
289  // into Block Headers.
290  if (--mt_options.threads == 0)
291  memlimit_too_small(memory_usage);
292 
293  memory_usage = lzma_stream_encoder_mt_memusage(
294  &mt_options);
295  if (memory_usage == UINT64_MAX)
296  message_bug();
297 
298  } while (memory_usage > memory_limit);
299 
300  message(V_WARNING, _("Adjusted the number of threads "
301  "from %s to %s to not exceed "
302  "the memory usage limit of %s MiB"),
304  uint64_to_str(mt_options.threads, 1),
306  memory_limit), 2));
307  }
308 # endif
309 
310  if (memory_usage <= memory_limit)
311  return;
312 
313  // Look for the last filter if it is LZMA2 or LZMA1, so we can make
314  // it use less RAM. With other filters we don't know what to do.
315  size_t i = 0;
316  while (filters[i].id != LZMA_FILTER_LZMA2
317  && filters[i].id != LZMA_FILTER_LZMA1) {
318  if (filters[i].id == LZMA_VLI_UNKNOWN)
319  memlimit_too_small(memory_usage);
320 
321  ++i;
322  }
323 
324  // Decrease the dictionary size until we meet the memory
325  // usage limit. First round down to full mebibytes.
327  const uint32_t orig_dict_size = opt->dict_size;
328  opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
329  while (true) {
330  // If it is below 1 MiB, auto-adjusting failed. We could be
331  // more sophisticated and scale it down even more, but let's
332  // see if many complain about this version.
333  //
334  // FIXME: Displays the scaled memory usage instead
335  // of the original.
336  if (opt->dict_size < (UINT32_C(1) << 20))
337  memlimit_too_small(memory_usage);
338 
339  memory_usage = lzma_raw_encoder_memusage(filters);
340  if (memory_usage == UINT64_MAX)
341  message_bug();
342 
343  // Accept it if it is low enough.
344  if (memory_usage <= memory_limit)
345  break;
346 
347  // Otherwise 1 MiB down and try again. I hope this
348  // isn't too slow method for cases where the original
349  // dict_size is very big.
350  opt->dict_size -= UINT32_C(1) << 20;
351  }
352 
353  // Tell the user that we decreased the dictionary size.
354  message(V_WARNING, _("Adjusted LZMA%c dictionary size "
355  "from %s MiB to %s MiB to not exceed "
356  "the memory usage limit of %s MiB"),
358  ? '2' : '1',
359  uint64_to_str(orig_dict_size >> 20, 0),
360  uint64_to_str(opt->dict_size >> 20, 1),
361  uint64_to_str(round_up_to_mib(memory_limit), 2));
362 #endif
363 
364  return;
365 }
@ LZMA_CHECK_CRC32
Definition: check.h:35
@ LZMA_CHECK_CRC64
Definition: check.h:42
lzma_index ** i
Definition: index.h:629
bool opt_auto_adjust
Definition: coder.c:26
#define LZMA_FILTER_DELTA
Filter ID.
Definition: delta.h:25
void hardware_threads_set(uint32_t n)
Set the maximum number of worker threads.
Definition: hardware.c:31
#define PRIu32
Definition: macros.h:20
#define LZMA_FILTER_LZMA2
LZMA2 Filter ID.
Definition: lzma12.h:40
#define LZMA_FILTER_LZMA1
LZMA1 Filter ID.
Definition: lzma12.h:30
void message_bug(void)
Definition: message.c:789
void message_filters_show(enum message_verbosity v, const lzma_filter *filters)
Print the filter chain.
Definition: message.c:1050
@ V_DEBUG
Very verbose.
Definition: message.h:19
@ V_WARNING
Errors and warnings.
Definition: message.h:17
uint64_t opt_flush_timeout
Number of milliseconds to between LZMA_SYNC_FLUSHes.
Definition: mytime.c:19
#define UINT32_C(val)
Options specific to the LZMA1 and LZMA2 filters.
Definition: lzma12.h:185
uint32_t dict_size
Dictionary size in bytes.
Definition: lzma12.h:217
char * message
Definition: main.c:12
const char * uint64_to_str(uint64_t value, uint32_t slot)
Convert uint64_t to a string.
Definition: util.c:171
uint64_t round_up_to_mib(uint64_t n)
Round an integer up to the next full MiB and convert to MiB.
Definition: util.c:139
static lzma_options_lzma opt_lzma
#define LZMA_VLI_UNKNOWN
VLI value to denote that the value is unknown.
Definition: vli.h:39

Referenced by args_parse().

◆ coder_set_extreme()

void coder_set_extreme ( void  )

Enable extreme mode.

Definition at line 98 of file coder.c.

99 {
102  return;
103 }
static void forget_filter_chain(void)
Definition: coder.c:73
#define LZMA_PRESET_EXTREME
Extreme compression preset.
Definition: container.h:60

References forget_filter_chain(), LZMA_PRESET_EXTREME, and preset_number.

Referenced by parse_real().

◆ coder_set_preset()

void coder_set_preset ( uint32_t  new_preset)

Set preset number.

Definition at line 88 of file coder.c.

89 {
91  preset_number |= new_preset;
93  return;
94 }
#define LZMA_PRESET_LEVEL_MASK
Mask for preset level.
Definition: container.h:40

References forget_filter_chain(), LZMA_PRESET_LEVEL_MASK, and preset_number.

Referenced by parse_real().

◆ coder_write_output()

static bool coder_write_output ( file_pair pair)
static

Definition at line 616 of file coder.c.

617 {
618  if (opt_mode != MODE_TEST) {
620  return true;
621  }
622 
625  return false;
626 }

References lzma_stream::avail_out, IO_BUFFER_SIZE, io_write(), MODE_TEST, lzma_stream::next_out, opt_mode, out_buf, strm, and io_buf::u8.

Referenced by coder_normal().

◆ forget_filter_chain()

static void forget_filter_chain ( void  )
static

Definition at line 73 of file coder.c.

74 {
75  // Setting a preset makes us forget a possibly defined custom
76  // filter chain.
77  while (filters_count > 0) {
78  --filters_count;
81  }
82 
83  return;
84 }
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130

References filters, filters_count, free(), NULL, options, and lzma_filter::options.

Referenced by coder_set_extreme(), and coder_set_preset().

◆ lzma_attribute()

static void lzma_attribute ( (__noreturn__)  )
static

Definition at line 126 of file coder.c.

128 {
129  message(V_ERROR, _("Memory usage limit is too low for the given "
130  "filter setup."));
131  message_mem_needed(V_ERROR, memory_usage);
132  tuklib_exit(E_ERROR, E_ERROR, false);
133 }
@ E_ERROR
Definition: transport.h:23
#define tuklib_exit
Definition: tuklib_exit.h:20

References _, E_ERROR, message, message_mem_needed(), tuklib_exit, and V_ERROR.

◆ split_block()

static void split_block ( uint64_t block_remaining,
uint64_t next_block_remaining,
size_t list_pos 
)
static

Resolve conflicts between opt_block_size and opt_block_list in single threaded mode. We want to default to opt_block_list, except when it is larger than opt_block_size. If this is the case for the current Block at *list_pos, then we break into smaller Blocks. Otherwise advance to the next Block in opt_block_list, and break apart if needed.

Definition at line 571 of file coder.c.

574 {
575  if (*next_block_remaining > 0) {
576  // The Block at *list_pos has previously been split up.
578  assert(opt_block_size > 0);
580 
581  if (*next_block_remaining > opt_block_size) {
582  // We have to split the current Block at *list_pos
583  // into another opt_block_size length Block.
584  *block_remaining = opt_block_size;
585  } else {
586  // This is the last remaining split Block for the
587  // Block at *list_pos.
588  *block_remaining = *next_block_remaining;
589  }
590 
591  *next_block_remaining -= *block_remaining;
592 
593  } else {
594  // The Block at *list_pos has been finished. Go to the next
595  // entry in the list. If the end of the list has been reached,
596  // reuse the size of the last Block.
597  if (opt_block_list[*list_pos + 1] != 0)
598  ++*list_pos;
599 
600  *block_remaining = opt_block_list[*list_pos];
601 
602  // If in single-threaded mode, split up the Block if needed.
603  // This is not needed in multi-threaded mode because liblzma
604  // will do this due to how threaded encoding works.
605  if (hardware_threads_get() == 1 && opt_block_size > 0
606  && *block_remaining > opt_block_size) {
607  *next_block_remaining
608  = *block_remaining - opt_block_size;
609  *block_remaining = opt_block_size;
610  }
611  }
612 }

References assert(), hardware_threads_get(), NULL, opt_block_list, and opt_block_size.

Referenced by coder_normal().

Variable Documentation

◆ check

lzma_check check
static

Integrity check type.

Definition at line 49 of file coder.c.

Referenced by coder_init(), and coder_set_check().

◆ check_default

bool check_default = true
static

This becomes false if the –check=CHECK option is used.

Definition at line 52 of file coder.c.

Referenced by coder_init(), and coder_set_check().

◆ filters

lzma_filter filters[LZMA_FILTERS_MAX+1]
static

Filters needed for all encoding all formats, and also decoding in raw data.

Definition at line 36 of file coder.c.

Referenced by coder_add_filter(), coder_init(), and forget_filter_chain().

◆ filters_count

uint32_t filters_count = 0
static

Number of filters. Zero indicates that we are using a preset.

Definition at line 43 of file coder.c.

Referenced by coder_add_filter(), coder_init(), and forget_filter_chain().

◆ in_buf

◆ opt_auto_adjust

bool opt_auto_adjust = true

If true, the compression settings are automatically adjusted down if they exceed the memory usage limit.

Definition at line 26 of file coder.c.

Referenced by coder_init(), and parse_real().

◆ opt_block_list

uint64_t* opt_block_list = NULL

This is non-NULL if –block-list was used. This contains the Block sizes as an array that is terminated with 0.

Definition at line 29 of file coder.c.

Referenced by args_free(), coder_normal(), parse_block_list(), and split_block().

◆ opt_block_size

uint64_t opt_block_size = 0

If non-zero, start a new .xz Block after every opt_block_size bytes of input. This has an effect only when compressing to the .xz format.

Definition at line 28 of file coder.c.

Referenced by coder_init(), coder_normal(), parse_real(), and split_block().

◆ opt_format

enum format_type opt_format = FORMAT_AUTO

File format to use when encoding or what format(s) to accept when decoding. This is a global because it's needed also in suffix.c. This is set in args.c.

Definition at line 1 of file coder.c.

Referenced by args_parse(), coder_init(), coder_normal(), compressed_name(), list_file(), parse_real(), and uncompressed_name().

◆ opt_mode

enum operation_mode opt_mode = MODE_COMPRESS

Operation mode of the command line tool. This is set in args.c and read in several files.

Definition at line 1 of file coder.c.

Referenced by args_parse(), coder_init(), coder_normal(), coder_run(), coder_write_output(), io_open_dest_real(), io_open_src_real(), main(), message_filename(), message_mem_needed(), mytime_get_flush_timeout(), parse_real(), print_filename(), progress_pos(), and suffix_get_dest_name().

◆ opt_single_stream

bool opt_single_stream = false

If true, stop after decoding the first stream.

Definition at line 27 of file coder.c.

Referenced by coder_normal(), and parse_real().

◆ out_buf

◆ preset_number

uint32_t preset_number = LZMA_PRESET_DEFAULT
static

Number of the preset (0-9)

Definition at line 46 of file coder.c.

Referenced by coder_add_filter(), coder_init(), coder_set_extreme(), and coder_set_preset().

◆ strm

Stream used to communicate with liblzma.

Definition at line 33 of file coder.c.

Referenced by coder_free(), coder_normal(), coder_passthru(), coder_run(), and coder_write_output().