Rizin
unix-like reverse engineering framework and cli tools
01_compress_easy.c File Reference

Compress from stdin to stdout in multi-call mode. More...

#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <lzma.h>

Go to the source code of this file.

Functions

static void show_usage_and_exit (const char *argv0)
 
static uint32_t get_preset (int argc, char **argv)
 
static bool init_encoder (lzma_stream *strm, uint32_t preset)
 
static bool compress (lzma_stream *strm, FILE *infile, FILE *outfile)
 
int main (int argc, char **argv)
 

Detailed Description

Compress from stdin to stdout in multi-call mode.

Usage: ./01_compress_easy PRESET < INFILE > OUTFILE

Example: ./01_compress_easy 6 < foo > foo.xz

Definition in file 01_compress_easy.c.

Function Documentation

◆ compress()

static bool compress ( lzma_stream strm,
FILE *  infile,
FILE *  outfile 
)
static

Definition at line 108 of file 01_compress_easy.c.

109 {
110  // This will be LZMA_RUN until the end of the input file is reached.
111  // This tells lzma_code() when there will be no more input.
113 
114  // Buffers to temporarily hold uncompressed input
115  // and compressed output.
116  uint8_t inbuf[BUFSIZ];
117  uint8_t outbuf[BUFSIZ];
118 
119  // Initialize the input and output pointers. Initializing next_in
120  // and avail_in isn't really necessary when we are going to encode
121  // just one file since LZMA_STREAM_INIT takes care of initializing
122  // those already. But it doesn't hurt much and it will be needed
123  // if encoding more than one file like we will in 02_decompress.c.
124  //
125  // While we don't care about strm->total_in or strm->total_out in this
126  // example, it is worth noting that initializing the encoder will
127  // always reset total_in and total_out to zero. But the encoder
128  // initialization doesn't touch next_in, avail_in, next_out, or
129  // avail_out.
130  strm->next_in = NULL;
131  strm->avail_in = 0;
132  strm->next_out = outbuf;
133  strm->avail_out = sizeof(outbuf);
134 
135  // Loop until the file has been successfully compressed or until
136  // an error occurs.
137  while (true) {
138  // Fill the input buffer if it is empty.
139  if (strm->avail_in == 0 && !feof(infile)) {
140  strm->next_in = inbuf;
141  strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
142  infile);
143 
144  if (ferror(infile)) {
145  fprintf(stderr, "Read error: %s\n",
146  strerror(errno));
147  return false;
148  }
149 
150  // Once the end of the input file has been reached,
151  // we need to tell lzma_code() that no more input
152  // will be coming and that it should finish the
153  // encoding.
154  if (feof(infile))
156  }
157 
158  // Tell liblzma do the actual encoding.
159  //
160  // This reads up to strm->avail_in bytes of input starting
161  // from strm->next_in. avail_in will be decremented and
162  // next_in incremented by an equal amount to match the
163  // number of input bytes consumed.
164  //
165  // Up to strm->avail_out bytes of compressed output will be
166  // written starting from strm->next_out. avail_out and next_out
167  // will be incremented by an equal amount to match the number
168  // of output bytes written.
169  //
170  // The encoder has to do internal buffering, which means that
171  // it may take quite a bit of input before the same data is
172  // available in compressed form in the output buffer.
173  lzma_ret ret = lzma_code(strm, action);
174 
175  // If the output buffer is full or if the compression finished
176  // successfully, write the data from the output bufffer to
177  // the output file.
178  if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
179  // When lzma_code() has returned LZMA_STREAM_END,
180  // the output buffer is likely to be only partially
181  // full. Calculate how much new data there is to
182  // be written to the output file.
183  size_t write_size = sizeof(outbuf) - strm->avail_out;
184 
185  if (fwrite(outbuf, 1, write_size, outfile)
186  != write_size) {
187  fprintf(stderr, "Write error: %s\n",
188  strerror(errno));
189  return false;
190  }
191 
192  // Reset next_out and avail_out.
193  strm->next_out = outbuf;
194  strm->avail_out = sizeof(outbuf);
195  }
196 
197  // Normally the return value of lzma_code() will be LZMA_OK
198  // until everything has been encoded.
199  if (ret != LZMA_OK) {
200  // Once everything has been encoded successfully, the
201  // return value of lzma_code() will be LZMA_STREAM_END.
202  //
203  // It is important to check for LZMA_STREAM_END. Do not
204  // assume that getting ret != LZMA_OK would mean that
205  // everything has gone well.
206  if (ret == LZMA_STREAM_END)
207  return true;
208 
209  // It's not LZMA_OK nor LZMA_STREAM_END,
210  // so it must be an error code. See lzma/base.h
211  // (src/liblzma/api/lzma/base.h in the source package
212  // or e.g. /usr/include/lzma/base.h depending on the
213  // install prefix) for the list and documentation of
214  // possible values. Most values listen in lzma_ret
215  // enumeration aren't possible in this example.
216  const char *msg;
217  switch (ret) {
218  case LZMA_MEM_ERROR:
219  msg = "Memory allocation failed";
220  break;
221 
222  case LZMA_DATA_ERROR:
223  // This error is returned if the compressed
224  // or uncompressed size get near 8 EiB
225  // (2^63 bytes) because that's where the .xz
226  // file format size limits currently are.
227  // That is, the possibility of this error
228  // is mostly theoretical unless you are doing
229  // something very unusual.
230  //
231  // Note that strm->total_in and strm->total_out
232  // have nothing to do with this error. Changing
233  // those variables won't increase or decrease
234  // the chance of getting this error.
235  msg = "File size limits exceeded";
236  break;
237 
238  default:
239  // This is most likely LZMA_PROG_ERROR, but
240  // if this program is buggy (or liblzma has
241  // a bug), it may be e.g. LZMA_BUF_ERROR or
242  // LZMA_OPTIONS_ERROR too.
243  //
244  // It is inconvenient to have a separate
245  // error message for errors that should be
246  // impossible to occur, but knowing the error
247  // code is important for debugging. That's why
248  // it is good to print the error code at least
249  // when there is no good error message to show.
250  msg = "Unknown error, possibly a bug";
251  break;
252  }
253 
254  fprintf(stderr, "Encoder error: %s (error code %u)\n",
255  msg, ret);
256  return false;
257  }
258  }
259 }
#define NULL
Definition: cris-opc.c:27
static lzma_stream strm
Definition: full_flush.c:20
FILE * outfile
Definition: fuzz_diff.c:16
unsigned char outbuf[SIZE]
Definition: gun.c:162
unsigned char inbuf[SIZE]
Definition: gun.c:161
static struct sockaddr static addrlen static backlog const void msg
Definition: sfsocketcall.h:119
unsigned char uint8_t
Definition: sftypes.h:31
Definition: z80asm.h:95
uint8_t * next_out
Definition: base.h:490
size_t avail_out
Definition: base.h:491
const uint8_t * next_in
Definition: base.h:486
size_t avail_in
Definition: base.h:487
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_MEM_ERROR
Cannot allocate memory.
Definition: base.h:128
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
lzma_action
The ‘action’ argument for lzma_code()
Definition: base.h:250
@ LZMA_FINISH
Finish the coding operation.
Definition: base.h:328
@ LZMA_RUN
Continue coding.
Definition: base.h:251

References test-lz4-speed::action, lzma_stream::avail_in, lzma_stream::avail_out, if(), inbuf, LZMA_DATA_ERROR, LZMA_FINISH, LZMA_MEM_ERROR, LZMA_OK, LZMA_RUN, LZMA_STREAM_END, msg, lzma_stream::next_in, lzma_stream::next_out, NULL, outbuf, outfile, and strm.

Referenced by main().

◆ get_preset()

static uint32_t get_preset ( int  argc,
char **  argv 
)
static

Definition at line 37 of file 01_compress_easy.c.

38 {
39  // One argument whose first char must be 0-9.
40  if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
42 
43  // Calculate the preste level 0-9.
44  uint32_t preset = argv[1][0] - '0';
45 
46  // If there is a second char, it must be 'e'. It will set
47  // the LZMA_PRESET_EXTREME flag.
48  if (argv[1][1] != '\0') {
49  if (argv[1][1] != 'e' || argv[1][2] != '\0')
51 
53  }
54 
55  return preset;
56 }
static void show_usage_and_exit(const char *argv0)
uint32_t preset
Definition: container.h:259
#define LZMA_PRESET_EXTREME
Extreme compression preset.
Definition: container.h:60
static static fork const void static count static fd const char const char static newpath char char argv
Definition: sflib.h:40
unsigned int uint32_t
Definition: sftypes.h:29

References argv, LZMA_PRESET_EXTREME, preset, and show_usage_and_exit().

Referenced by main().

◆ init_encoder()

static bool init_encoder ( lzma_stream strm,
uint32_t  preset 
)
static

Definition at line 60 of file 01_compress_easy.c.

61 {
62  // Initialize the encoder using a preset. Set the integrity to check
63  // to CRC64, which is the default in the xz command line tool. If
64  // the .xz file needs to be decompressed with XZ Embedded, use
65  // LZMA_CHECK_CRC32 instead.
66  lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
67 
68  // Return successfully if the initialization went fine.
69  if (ret == LZMA_OK)
70  return true;
71 
72  // Something went wrong. The possible errors are documented in
73  // lzma/container.h (src/liblzma/api/lzma/container.h in the source
74  // package or e.g. /usr/include/lzma/container.h depending on the
75  // install prefix).
76  const char *msg;
77  switch (ret) {
78  case LZMA_MEM_ERROR:
79  msg = "Memory allocation failed";
80  break;
81 
82  case LZMA_OPTIONS_ERROR:
83  msg = "Specified preset is not supported";
84  break;
85 
87  msg = "Specified integrity check is not supported";
88  break;
89 
90  default:
91  // This is most likely LZMA_PROG_ERROR indicating a bug in
92  // this program or in liblzma. It is inconvenient to have a
93  // separate error message for errors that should be impossible
94  // to occur, but knowing the error code is important for
95  // debugging. That's why it is good to print the error code
96  // at least when there is no good error message to show.
97  msg = "Unknown error, possibly a bug";
98  break;
99  }
100 
101  fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
102  msg, ret);
103  return false;
104 }
@ LZMA_CHECK_CRC64
Definition: check.h:42
@ LZMA_UNSUPPORTED_CHECK
Cannot calculate the integrity check.
Definition: base.h:90
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160

References LZMA_CHECK_CRC64, LZMA_MEM_ERROR, LZMA_OK, LZMA_OPTIONS_ERROR, LZMA_UNSUPPORTED_CHECK, msg, preset, and strm.

Referenced by main().

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 263 of file 01_compress_easy.c.

264 {
265  // Get the preset number from the command line.
266  uint32_t preset = get_preset(argc, argv);
267 
268  // Initialize a lzma_stream structure. When it is allocated on stack,
269  // it is simplest to use LZMA_STREAM_INIT macro like below. When it
270  // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
271  // works (as long as NULL pointers are represented with zero bits
272  // as they are on practically all computers today).
274 
275  // Initialize the encoder. If it succeeds, compress from
276  // stdin to stdout.
277  bool success = init_encoder(&strm, preset);
278  if (success)
279  success = compress(&strm, stdin, stdout);
280 
281  // Free the memory allocated for the encoder. If we were encoding
282  // multiple files, this would only need to be done after the last
283  // file. See 02_decompress.c for handling of multiple files.
284  //
285  // It is OK to call lzma_end() multiple times or when it hasn't been
286  // actually used except initialized with LZMA_STREAM_INIT.
287  lzma_end(&strm);
288 
289  // Close stdout to catch possible write errors that can occur
290  // when pending data is flushed from the stdio buffers.
291  if (fclose(stdout)) {
292  fprintf(stderr, "Write error: %s\n", strerror(errno));
293  success = false;
294  }
295 
296  return success ? EXIT_SUCCESS : EXIT_FAILURE;
297 }
static bool init_encoder(lzma_stream *strm, uint32_t preset)
static uint32_t get_preset(int argc, char **argv)
static bool compress(lzma_stream *strm, FILE *infile, FILE *outfile)
Passing data to and from liblzma.
Definition: base.h:485
#define LZMA_STREAM_INIT
Initialization for lzma_stream.
Definition: base.h:545

References argv, compress(), get_preset(), init_encoder(), LZMA_STREAM_INIT, preset, and strm.

◆ show_usage_and_exit()

static void show_usage_and_exit ( const char *  argv0)
static

Definition at line 26 of file 01_compress_easy.c.

27 {
28  fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
29  "PRESET is a number 0-9 and can optionally be "
30  "followed by `e' to indicate extreme preset\n",
31  argv0);
32  exit(EXIT_FAILURE);
33 }

References test-lz4-list::exit.

Referenced by get_preset().