Rizin
unix-like reverse engineering framework and cli tools
01_compress_easy.c
Go to the documentation of this file.
1 //
9 //
10 // Author: Lasse Collin
11 //
12 // This file has been put into the public domain.
13 // You can do whatever you want with this file.
14 //
16 
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <lzma.h>
23 
24 
25 static void
26 show_usage_and_exit(const char *argv0)
27 {
28  fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
29  "PRESET is a number 0-9 and can optionally be "
30  "followed by `e' to indicate extreme preset\n",
31  argv0);
32  exit(EXIT_FAILURE);
33 }
34 
35 
36 static uint32_t
37 get_preset(int argc, char **argv)
38 {
39  // One argument whose first char must be 0-9.
40  if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
42 
43  // Calculate the preste level 0-9.
44  uint32_t preset = argv[1][0] - '0';
45 
46  // If there is a second char, it must be 'e'. It will set
47  // the LZMA_PRESET_EXTREME flag.
48  if (argv[1][1] != '\0') {
49  if (argv[1][1] != 'e' || argv[1][2] != '\0')
51 
53  }
54 
55  return preset;
56 }
57 
58 
59 static bool
61 {
62  // Initialize the encoder using a preset. Set the integrity to check
63  // to CRC64, which is the default in the xz command line tool. If
64  // the .xz file needs to be decompressed with XZ Embedded, use
65  // LZMA_CHECK_CRC32 instead.
66  lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
67 
68  // Return successfully if the initialization went fine.
69  if (ret == LZMA_OK)
70  return true;
71 
72  // Something went wrong. The possible errors are documented in
73  // lzma/container.h (src/liblzma/api/lzma/container.h in the source
74  // package or e.g. /usr/include/lzma/container.h depending on the
75  // install prefix).
76  const char *msg;
77  switch (ret) {
78  case LZMA_MEM_ERROR:
79  msg = "Memory allocation failed";
80  break;
81 
82  case LZMA_OPTIONS_ERROR:
83  msg = "Specified preset is not supported";
84  break;
85 
87  msg = "Specified integrity check is not supported";
88  break;
89 
90  default:
91  // This is most likely LZMA_PROG_ERROR indicating a bug in
92  // this program or in liblzma. It is inconvenient to have a
93  // separate error message for errors that should be impossible
94  // to occur, but knowing the error code is important for
95  // debugging. That's why it is good to print the error code
96  // at least when there is no good error message to show.
97  msg = "Unknown error, possibly a bug";
98  break;
99  }
100 
101  fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
102  msg, ret);
103  return false;
104 }
105 
106 
107 static bool
109 {
110  // This will be LZMA_RUN until the end of the input file is reached.
111  // This tells lzma_code() when there will be no more input.
113 
114  // Buffers to temporarily hold uncompressed input
115  // and compressed output.
116  uint8_t inbuf[BUFSIZ];
117  uint8_t outbuf[BUFSIZ];
118 
119  // Initialize the input and output pointers. Initializing next_in
120  // and avail_in isn't really necessary when we are going to encode
121  // just one file since LZMA_STREAM_INIT takes care of initializing
122  // those already. But it doesn't hurt much and it will be needed
123  // if encoding more than one file like we will in 02_decompress.c.
124  //
125  // While we don't care about strm->total_in or strm->total_out in this
126  // example, it is worth noting that initializing the encoder will
127  // always reset total_in and total_out to zero. But the encoder
128  // initialization doesn't touch next_in, avail_in, next_out, or
129  // avail_out.
130  strm->next_in = NULL;
131  strm->avail_in = 0;
132  strm->next_out = outbuf;
133  strm->avail_out = sizeof(outbuf);
134 
135  // Loop until the file has been successfully compressed or until
136  // an error occurs.
137  while (true) {
138  // Fill the input buffer if it is empty.
139  if (strm->avail_in == 0 && !feof(infile)) {
140  strm->next_in = inbuf;
141  strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
142  infile);
143 
144  if (ferror(infile)) {
145  fprintf(stderr, "Read error: %s\n",
146  strerror(errno));
147  return false;
148  }
149 
150  // Once the end of the input file has been reached,
151  // we need to tell lzma_code() that no more input
152  // will be coming and that it should finish the
153  // encoding.
154  if (feof(infile))
156  }
157 
158  // Tell liblzma do the actual encoding.
159  //
160  // This reads up to strm->avail_in bytes of input starting
161  // from strm->next_in. avail_in will be decremented and
162  // next_in incremented by an equal amount to match the
163  // number of input bytes consumed.
164  //
165  // Up to strm->avail_out bytes of compressed output will be
166  // written starting from strm->next_out. avail_out and next_out
167  // will be incremented by an equal amount to match the number
168  // of output bytes written.
169  //
170  // The encoder has to do internal buffering, which means that
171  // it may take quite a bit of input before the same data is
172  // available in compressed form in the output buffer.
173  lzma_ret ret = lzma_code(strm, action);
174 
175  // If the output buffer is full or if the compression finished
176  // successfully, write the data from the output bufffer to
177  // the output file.
178  if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
179  // When lzma_code() has returned LZMA_STREAM_END,
180  // the output buffer is likely to be only partially
181  // full. Calculate how much new data there is to
182  // be written to the output file.
183  size_t write_size = sizeof(outbuf) - strm->avail_out;
184 
185  if (fwrite(outbuf, 1, write_size, outfile)
186  != write_size) {
187  fprintf(stderr, "Write error: %s\n",
188  strerror(errno));
189  return false;
190  }
191 
192  // Reset next_out and avail_out.
193  strm->next_out = outbuf;
194  strm->avail_out = sizeof(outbuf);
195  }
196 
197  // Normally the return value of lzma_code() will be LZMA_OK
198  // until everything has been encoded.
199  if (ret != LZMA_OK) {
200  // Once everything has been encoded successfully, the
201  // return value of lzma_code() will be LZMA_STREAM_END.
202  //
203  // It is important to check for LZMA_STREAM_END. Do not
204  // assume that getting ret != LZMA_OK would mean that
205  // everything has gone well.
206  if (ret == LZMA_STREAM_END)
207  return true;
208 
209  // It's not LZMA_OK nor LZMA_STREAM_END,
210  // so it must be an error code. See lzma/base.h
211  // (src/liblzma/api/lzma/base.h in the source package
212  // or e.g. /usr/include/lzma/base.h depending on the
213  // install prefix) for the list and documentation of
214  // possible values. Most values listen in lzma_ret
215  // enumeration aren't possible in this example.
216  const char *msg;
217  switch (ret) {
218  case LZMA_MEM_ERROR:
219  msg = "Memory allocation failed";
220  break;
221 
222  case LZMA_DATA_ERROR:
223  // This error is returned if the compressed
224  // or uncompressed size get near 8 EiB
225  // (2^63 bytes) because that's where the .xz
226  // file format size limits currently are.
227  // That is, the possibility of this error
228  // is mostly theoretical unless you are doing
229  // something very unusual.
230  //
231  // Note that strm->total_in and strm->total_out
232  // have nothing to do with this error. Changing
233  // those variables won't increase or decrease
234  // the chance of getting this error.
235  msg = "File size limits exceeded";
236  break;
237 
238  default:
239  // This is most likely LZMA_PROG_ERROR, but
240  // if this program is buggy (or liblzma has
241  // a bug), it may be e.g. LZMA_BUF_ERROR or
242  // LZMA_OPTIONS_ERROR too.
243  //
244  // It is inconvenient to have a separate
245  // error message for errors that should be
246  // impossible to occur, but knowing the error
247  // code is important for debugging. That's why
248  // it is good to print the error code at least
249  // when there is no good error message to show.
250  msg = "Unknown error, possibly a bug";
251  break;
252  }
253 
254  fprintf(stderr, "Encoder error: %s (error code %u)\n",
255  msg, ret);
256  return false;
257  }
258  }
259 }
260 
261 
262 extern int
263 main(int argc, char **argv)
264 {
265  // Get the preset number from the command line.
266  uint32_t preset = get_preset(argc, argv);
267 
268  // Initialize a lzma_stream structure. When it is allocated on stack,
269  // it is simplest to use LZMA_STREAM_INIT macro like below. When it
270  // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
271  // works (as long as NULL pointers are represented with zero bits
272  // as they are on practically all computers today).
274 
275  // Initialize the encoder. If it succeeds, compress from
276  // stdin to stdout.
277  bool success = init_encoder(&strm, preset);
278  if (success)
279  success = compress(&strm, stdin, stdout);
280 
281  // Free the memory allocated for the encoder. If we were encoding
282  // multiple files, this would only need to be done after the last
283  // file. See 02_decompress.c for handling of multiple files.
284  //
285  // It is OK to call lzma_end() multiple times or when it hasn't been
286  // actually used except initialized with LZMA_STREAM_INIT.
287  lzma_end(&strm);
288 
289  // Close stdout to catch possible write errors that can occur
290  // when pending data is flushed from the stdio buffers.
291  if (fclose(stdout)) {
292  fprintf(stderr, "Write error: %s\n", strerror(errno));
293  success = false;
294  }
295 
296  return success ? EXIT_SUCCESS : EXIT_FAILURE;
297 }
static bool init_encoder(lzma_stream *strm, uint32_t preset)
int main(int argc, char **argv)
static uint32_t get_preset(int argc, char **argv)
static bool compress(lzma_stream *strm, FILE *infile, FILE *outfile)
static void show_usage_and_exit(const char *argv0)
@ LZMA_CHECK_CRC64
Definition: check.h:42
uint32_t preset
Definition: container.h:259
#define LZMA_PRESET_EXTREME
Extreme compression preset.
Definition: container.h:60
#define NULL
Definition: cris-opc.c:27
static lzma_stream strm
Definition: full_flush.c:20
FILE * outfile
Definition: fuzz_diff.c:16
unsigned char outbuf[SIZE]
Definition: gun.c:162
unsigned char inbuf[SIZE]
Definition: gun.c:161
static static fork const void static count static fd const char const char static newpath char char argv
Definition: sflib.h:40
The public API of liblzma data compression library.
string FILE
Definition: benchmark.py:21
static struct sockaddr static addrlen static backlog const void msg
Definition: sfsocketcall.h:119
unsigned int uint32_t
Definition: sftypes.h:29
unsigned char uint8_t
Definition: sftypes.h:31
Definition: z80asm.h:95
Passing data to and from liblzma.
Definition: base.h:485
uint8_t * next_out
Definition: base.h:490
size_t avail_out
Definition: base.h:491
const uint8_t * next_in
Definition: base.h:486
size_t avail_in
Definition: base.h:487
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4
lzma_ret
Return values used by several functions in liblzma.
Definition: base.h:57
@ LZMA_DATA_ERROR
Data is corrupt.
Definition: base.h:172
@ LZMA_MEM_ERROR
Cannot allocate memory.
Definition: base.h:128
@ LZMA_STREAM_END
End of stream was reached.
Definition: base.h:63
@ LZMA_UNSUPPORTED_CHECK
Cannot calculate the integrity check.
Definition: base.h:90
@ LZMA_OPTIONS_ERROR
Invalid or unsupported options.
Definition: base.h:160
@ LZMA_OK
Operation completed successfully.
Definition: base.h:58
lzma_action
The ‘action’ argument for lzma_code()
Definition: base.h:250
@ LZMA_FINISH
Finish the coding operation.
Definition: base.h:328
@ LZMA_RUN
Continue coding.
Definition: base.h:251
#define LZMA_STREAM_INIT
Initialization for lzma_stream.
Definition: base.h:545