Rizin
unix-like reverse engineering framework and cli tools
cab_extract.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2022 deroad <wargio@libero.it>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <rz_bin.h>
5 #include <rz_type.h>
6 #include <mspack.h>
7 
8 #include "pdb.h"
9 
10 // checks all the files and guesses if is using unix or win paths
12  bool slash = false, backslash = false;
13  struct mscabd_file *fi = NULL;
14 
15  for (fi = files; fi; fi = fi->next) {
16  for (char *p = fi->filename; *p; p++) {
17  if (*p == '/') {
18  slash = true;
19  } else if (*p == '\\') {
20  backslash = true;
21  }
22  }
23  if (slash && backslash) {
24  break;
25  }
26  }
27 
28  if (!slash) {
29  /* no slashes, therefore is windows */
30  return false;
31  } else if (!backslash) {
32  /* slashes but no backslashes, therefore is unix */
33  return true;
34  }
35 
36  /* check if starts with a slash */
37  if (!files->next) {
38  char c, *p = fi->filename;
39  while ((c = *p++)) {
40  if (c == '\\') {
41  return false; /* is windows */
42  } else if (c == '/') {
43  return true; /* is unix */
44  }
45  }
46  /* impossible scenario since at least one slash was found */
47  return false;
48  }
49 
50  const char *oldname = NULL;
51  size_t oldlen = 0;
52  for (fi = files; fi; fi = fi->next) {
53  const char *name = fi->filename;
54  size_t len = 0;
55  while (name[len]) {
56  if ((name[len] == '\\') || (name[len] == '/')) {
57  break;
58  }
59  len++;
60  }
61  if (!name[len]) {
62  len = 0;
63  } else {
64  len++;
65  if (len == oldlen && !strncmp(name, oldname, len)) {
66  return name[len - 1] != '\\';
67  }
68  }
69 
70  oldname = name;
71  oldlen = len;
72  }
73 
74  return false;
75 }
76 
77 static bool is_slash(const char *str) {
78  return *str == '/' || *str == '\\';
79 }
80 
81 static bool is_previous_dir(const char *path) {
82  return path[0] == '.' && path[1] == '.' && is_slash(path + 2);
83 }
84 
85 static char *sanitize_cab_filename(struct mscabd_file *file, const char *output_dir, bool is_unix) {
86  char separator = '\\';
87  char os_slash = '/';
88  if (is_unix) {
89  separator = '/';
90  os_slash = '\\';
91  }
92 
93  size_t output_dir_len = strlen(output_dir) + 1; // includes the path separator
94  size_t filename_len = strlen(file->filename);
95 
96  char *sanitized = RZ_NEWS0(char, output_dir_len + (filename_len * 4) + 2);
97  if (!sanitized) {
98  RZ_LOG_ERROR("Cannot allocate sanitized name\n");
99  return NULL;
100  }
101 
102  const ut8 *input = (const ut8 *)&file->filename[0];
103  const ut8 *endp = (const ut8 *)&file->filename[filename_len];
104  ut8 *output = (ut8 *)&sanitized[output_dir_len];
105 
106  memcpy(sanitized, output_dir, output_dir_len);
107  sanitized[output_dir_len - 1] = '/';
108 
109  if (file->attribs & MSCAB_ATTRIB_UTF_NAME) {
110  // sanitize utf-8 filename
111  RzRune rune;
112  for (; input < endp;) {
113  rune = 0;
114  int len = rz_utf8_decode(input, endp - input, &rune);
115  if (!len) {
116  len = 1;
117  rune = 0xFFFD;
118  } else if (rune <= 0 || rune > 0x10FFFF || (rune >= 0xD800 && rune <= 0xDFFF) || rune == 0xFFFE || rune == 0xFFFF) {
119  len = 1;
120  rune = 0xFFFD;
121  }
122  input += len;
123 
124  if (rune == separator) {
125  rune = '/';
126  } else if (rune == os_slash) {
127  rune = '\\';
128  }
129 
130  len = rz_utf8_encode(output, rune);
131  output += len;
132  }
133  *output++ = '\0';
134  } else {
135  // sanitize ascii filename
136  ut8 c = 0;
137  while (input < endp) {
138  c = *input++;
139  if (c == separator) {
140  c = '/';
141  } else if (c == os_slash) {
142  c = '\\';
143  }
144  *output++ = c;
145  }
146  *output++ = '\0';
147  }
148 
149  output = (ut8 *)&sanitized[output_dir_len];
150  for (input = output; is_slash((const char *)input); input++) {
151  // skip any leading slashes in the cab filename part
152  }
153 
154  if (input != output) {
155  size_t len = strlen((char *)input);
156  if (len > 0) {
157  memmove(output, input, len + 1);
158  } else {
159  /* change filename composed entirely of leading slashes to underscores */
160  strcpy((char *)output, "_");
161  }
162  }
163 
164  // remove any "../" or "..\" in the filename
165  for (; *output; output++) {
166  if (is_previous_dir((const char *)output)) {
167  output[0] = output[1] = '_';
168  output += 2;
169  }
170  }
171 
172  return sanitized;
173 }
174 
175 static const char *cab_error(struct mscab_decompressor *cd) {
176  switch (cd->last_error(cd)) {
177  case MSPACK_ERR_OPEN:
178  return "MSPACK_ERR_OPEN";
179  case MSPACK_ERR_READ:
180  return "MSPACK_ERR_READ";
181  case MSPACK_ERR_WRITE:
182  return "MSPACK_ERR_WRITE";
183  case MSPACK_ERR_SEEK:
184  return "MSPACK_ERR_SEEK";
185  case MSPACK_ERR_NOMEMORY:
186  return "MSPACK_ERR_NOMEMORY";
188  return "MSPACK_ERR_SIGNATURE";
190  return "MSPACK_ERR_DATAFORMAT";
191  case MSPACK_ERR_CHECKSUM:
192  return "MSPACK_ERR_CHECKSUM";
193  case MSPACK_ERR_DECRUNCH:
194  return "MSPACK_ERR_DECRUNCH";
195  default:
197  return "unknown";
198  }
199 }
200 
209 RZ_API bool rz_bin_pdb_extract_in_folder(RZ_NONNULL const char *file_cab, RZ_NONNULL const char *output_dir) {
210  rz_return_val_if_fail(file_cab && output_dir, false);
211 
212  if (!rz_file_exists(file_cab)) {
213  RZ_LOG_ERROR("%s is not a file or does not exist.\n", file_cab);
214  return false;
215  }
216 
217  if (!rz_file_is_directory(output_dir)) {
218  RZ_LOG_ERROR("%s is not a directory or does not exist.\n", output_dir);
219  return false;
220  }
221 
222  struct mscab_decompressor *cabd = NULL;
223  struct mscabd_cabinet *cab = NULL;
224 
226  RZ_LOG_ERROR("Cannot allocate mscab_decompressor.\n");
227  return false;
228  }
229 
230  if (!(cab = cabd->open(cabd, file_cab))) {
231  RZ_LOG_ERROR("Invalid compressed cab file: %s\n", file_cab);
233  return false;
234  }
235 
236  bool result = true;
237  bool is_unix = is_cab_using_unix_paths(cab->files);
238  for (struct mscabd_file *file = cab->files; file; file = file->next) {
239  char *new_name = sanitize_cab_filename(file, output_dir, is_unix);
240  if (!new_name) {
241  result = false;
242  break;
243  }
244  if (cabd->extract(cabd, file, new_name)) {
245  RZ_LOG_ERROR("cab_extract: %s: %s\n", new_name, cab_error(cabd));
246  free(new_name);
247  result = false;
248  break;
249  }
250  RZ_LOG_INFO("cab_extract: extracted %s\n", new_name);
251  free(new_name);
252  }
253 
254  cabd->close(cabd, cab);
256  return result;
257 }
size_t len
Definition: 6502dis.c:15
static csh cd
Definition: asm_mips_cs.c:10
static bool is_previous_dir(const char *path)
Definition: cab_extract.c:81
static bool is_cab_using_unix_paths(struct mscabd_file *files)
Definition: cab_extract.c:11
static bool is_slash(const char *str)
Definition: cab_extract.c:77
RZ_API bool rz_bin_pdb_extract_in_folder(RZ_NONNULL const char *file_cab, RZ_NONNULL const char *output_dir)
Extracts compressed PDB files into a folder.
Definition: cab_extract.c:209
static char * sanitize_cab_filename(struct mscabd_file *file, const char *output_dir, bool is_unix)
Definition: cab_extract.c:85
static const char * cab_error(struct mscab_decompressor *cd)
Definition: cab_extract.c:175
#define MSPACK_ERR_SEEK
Definition: mspack.h:495
#define MSCAB_ATTRIB_UTF_NAME
Definition: mspack.h:929
#define MSPACK_ERR_OPEN
Definition: mspack.h:489
#define MSPACK_ERR_WRITE
Definition: mspack.h:493
#define MSPACK_ERR_CHECKSUM
Definition: mspack.h:503
#define MSPACK_ERR_SIGNATURE
Definition: mspack.h:499
#define MSPACK_ERR_DATAFORMAT
Definition: mspack.h:501
#define MSPACK_ERR_DECRUNCH
Definition: mspack.h:507
#define MSPACK_ERR_READ
Definition: mspack.h:491
#define MSPACK_ERR_NOMEMORY
Definition: mspack.h:497
struct mscab_decompressor * cabd
Definition: cabextract.c:126
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static fork const void static count static fd const char const char static newpath const char static path const char path
Definition: sflib.h:35
checking print the parsed form of the magic use in n conjunction with m to debug a new magic file n before installing it n output MIME type special files
Definition: file_opts.h:46
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
uint8_t ut8
Definition: lh5801.h:11
void * p
Definition: libc.cpp:67
memcpy(mem, inblock.get(), min(CONTAINING_RECORD(inblock.get(), MEMBLOCK, data) ->size, size))
void mspack_destroy_cab_decompressor(struct mscab_decompressor *base)
Definition: cabd.c:173
struct mscab_decompressor * mspack_create_cab_decompressor(struct mspack_system *sys)
Definition: cabd.c:140
const char * name
Definition: op.c:541
#define rz_warn_if_reached()
Definition: rz_assert.h:29
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
RZ_API bool rz_file_is_directory(const char *str)
Definition: file.c:167
RZ_API bool rz_file_exists(const char *str)
Definition: file.c:192
#define RZ_LOG_INFO(fmtstr,...)
Definition: rz_log.h:54
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
#define RZ_NONNULL
Definition: rz_types.h:64
#define RZ_NEWS0(x, y)
Definition: rz_types.h:282
RZ_API int rz_utf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
Definition: utf8.c:492
ut32 RzRune
Definition: rz_utf8.h:13
RZ_API int rz_utf8_encode(ut8 *ptr, const RzRune ch)
Definition: utf8.c:535
#define c(i)
Definition: sha256.c:43
Definition: gzappend.c:170
z_const unsigned char * next
Definition: gzappend.c:175
struct mscabd_cabinet *(* open)(struct mscab_decompressor *self, const char *filename)
Definition: mspack.h:978
int(* extract)(struct mscab_decompressor *self, struct mscabd_file *file, const char *filename)
Definition: mspack.h:1138
void(* close)(struct mscab_decompressor *self, struct mscabd_cabinet *cab)
Definition: mspack.h:1010
struct mscabd_file * files
Definition: mspack.h:743
struct mscabd_file * next
Definition: mspack.h:868
char * filename
Definition: mspack.h:878
Definition: z80asm.h:102
static bool input(void *ud, zip_uint8_t *data, zip_uint64_t length)
diff_output_t output
Definition: zipcmp.c:237