Rizin
unix-like reverse engineering framework and cli tools
zip_utf-8.c File Reference
#include "zipint.h"
#include <stdlib.h>

Go to the source code of this file.

Macros

#define UTF_8_LEN_2_MASK   0xe0
 
#define UTF_8_LEN_2_MATCH   0xc0
 
#define UTF_8_LEN_3_MASK   0xf0
 
#define UTF_8_LEN_3_MATCH   0xe0
 
#define UTF_8_LEN_4_MASK   0xf8
 
#define UTF_8_LEN_4_MATCH   0xf0
 
#define UTF_8_CONTINUE_MASK   0xc0
 
#define UTF_8_CONTINUE_MATCH   0x80
 

Functions

zip_encoding_type_t _zip_guess_encoding (zip_string_t *str, zip_encoding_type_t expected_encoding)
 
static zip_uint32_t _zip_unicode_to_utf8_len (zip_uint32_t codepoint)
 
static zip_uint32_t _zip_unicode_to_utf8 (zip_uint32_t codepoint, zip_uint8_t *buf)
 
zip_uint8_t_zip_cp437_to_utf8 (const zip_uint8_t *const _cp437buf, zip_uint32_t len, zip_uint32_t *utf8_lenp, zip_error_t *error)
 

Variables

static const zip_uint16_t _cp437_to_unicode [256]
 

Macro Definition Documentation

◆ UTF_8_CONTINUE_MASK

#define UTF_8_CONTINUE_MASK   0xc0

Definition at line 95 of file zip_utf-8.c.

◆ UTF_8_CONTINUE_MATCH

#define UTF_8_CONTINUE_MATCH   0x80

Definition at line 96 of file zip_utf-8.c.

◆ UTF_8_LEN_2_MASK

#define UTF_8_LEN_2_MASK   0xe0

Definition at line 89 of file zip_utf-8.c.

◆ UTF_8_LEN_2_MATCH

#define UTF_8_LEN_2_MATCH   0xc0

Definition at line 90 of file zip_utf-8.c.

◆ UTF_8_LEN_3_MASK

#define UTF_8_LEN_3_MASK   0xf0

Definition at line 91 of file zip_utf-8.c.

◆ UTF_8_LEN_3_MATCH

#define UTF_8_LEN_3_MATCH   0xe0

Definition at line 92 of file zip_utf-8.c.

◆ UTF_8_LEN_4_MASK

#define UTF_8_LEN_4_MASK   0xf8

Definition at line 93 of file zip_utf-8.c.

◆ UTF_8_LEN_4_MATCH

#define UTF_8_LEN_4_MATCH   0xf0

Definition at line 94 of file zip_utf-8.c.

Function Documentation

◆ _zip_cp437_to_utf8()

zip_uint8_t* _zip_cp437_to_utf8 ( const zip_uint8_t *const  _cp437buf,
zip_uint32_t  len,
zip_uint32_t utf8_lenp,
zip_error_t error 
)

Definition at line 198 of file zip_utf-8.c.

198  {
199  zip_uint8_t *cp437buf = (zip_uint8_t *)_cp437buf;
200  zip_uint8_t *utf8buf;
202 
203  if (len == 0) {
204  if (utf8_lenp)
205  *utf8_lenp = 0;
206  return NULL;
207  }
208 
209  buflen = 1;
210  for (i = 0; i < len; i++)
212 
213  if ((utf8buf = (zip_uint8_t *)malloc(buflen)) == NULL) {
215  return NULL;
216  }
217 
218  offset = 0;
219  for (i = 0; i < len; i++)
220  offset += _zip_unicode_to_utf8(_cp437_to_unicode[cp437buf[i]], utf8buf + offset);
221 
222  utf8buf[buflen - 1] = 0;
223  if (utf8_lenp)
224  *utf8_lenp = buflen - 1;
225  return utf8buf;
226 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
voidpf uLong offset
Definition: ioapi.h:144
ZIP_EXTERN void zip_error_set(zip_error_t *_Nullable, int, int)
Definition: zip_error.c:126
#define ZIP_ER_MEMORY
Definition: zip.h:119
void * malloc(size_t size)
Definition: malloc.c:123
ut64 buflen
Definition: core.c:76
void error(const char *msg)
Definition: untgz.c:593
static const zip_uint16_t _cp437_to_unicode[256]
Definition: zip_utf-8.c:40
static zip_uint32_t _zip_unicode_to_utf8(zip_uint32_t codepoint, zip_uint8_t *buf)
Definition: zip_utf-8.c:173
static zip_uint32_t _zip_unicode_to_utf8_len(zip_uint32_t codepoint)
Definition: zip_utf-8.c:161
uint32_t zip_uint32_t
Definition: zipconf.h:37
uint8_t zip_uint8_t
Definition: zipconf.h:33

References _cp437_to_unicode, _zip_unicode_to_utf8(), _zip_unicode_to_utf8_len(), buflen, error(), i, len, malloc(), NULL, ZIP_ER_MEMORY, and zip_error_set().

Referenced by _zip_string_get().

◆ _zip_guess_encoding()

zip_encoding_type_t _zip_guess_encoding ( zip_string_t str,
zip_encoding_type_t  expected_encoding 
)

Definition at line 100 of file zip_utf-8.c.

100  {
102  const zip_uint8_t *name;
103  zip_uint32_t i, j, ulen;
104 
105  if (str == NULL)
106  return ZIP_ENCODING_ASCII;
107 
108  name = str->raw;
109 
110  if (str->encoding != ZIP_ENCODING_UNKNOWN)
111  enc = str->encoding;
112  else {
113  enc = ZIP_ENCODING_ASCII;
114  for (i = 0; i < str->length; i++) {
115  if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t')
116  continue;
117 
120  ulen = 1;
121  else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH)
122  ulen = 2;
123  else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH)
124  ulen = 3;
125  else {
126  enc = ZIP_ENCODING_CP437;
127  break;
128  }
129 
130  if (i + ulen >= str->length) {
131  enc = ZIP_ENCODING_CP437;
132  break;
133  }
134 
135  for (j = 1; j <= ulen; j++) {
137  enc = ZIP_ENCODING_CP437;
138  goto done;
139  }
140  }
141  i += ulen;
142  }
143  }
144 
145 done:
146  str->encoding = enc;
147 
148  if (expected_encoding != ZIP_ENCODING_UNKNOWN) {
149  if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED)
150  str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN;
151 
152  if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII)
153  return ZIP_ENCODING_ERROR;
154  }
155 
156  return enc;
157 }
struct tab * done
Definition: enough.c:233
const char * name
Definition: op.c:541
Definition: z80asm.h:102
#define UTF_8_LEN_4_MASK
Definition: zip_utf-8.c:93
#define UTF_8_CONTINUE_MATCH
Definition: zip_utf-8.c:96
#define UTF_8_LEN_4_MATCH
Definition: zip_utf-8.c:94
#define UTF_8_LEN_3_MATCH
Definition: zip_utf-8.c:92
#define UTF_8_LEN_3_MASK
Definition: zip_utf-8.c:91
#define UTF_8_LEN_2_MASK
Definition: zip_utf-8.c:89
#define UTF_8_CONTINUE_MASK
Definition: zip_utf-8.c:95
#define UTF_8_LEN_2_MATCH
Definition: zip_utf-8.c:90
enum zip_encoding_type zip_encoding_type_t
Definition: zipint.h:262
@ ZIP_ENCODING_ERROR
Definition: zipint.h:259
@ ZIP_ENCODING_UTF8_GUESSED
Definition: zipint.h:257
@ ZIP_ENCODING_UTF8_KNOWN
Definition: zipint.h:256
@ ZIP_ENCODING_UNKNOWN
Definition: zipint.h:254
@ ZIP_ENCODING_CP437
Definition: zipint.h:258
@ ZIP_ENCODING_ASCII
Definition: zipint.h:255

References done, i, name, NULL, cmd_descs_generate::str, UTF_8_CONTINUE_MASK, UTF_8_CONTINUE_MATCH, UTF_8_LEN_2_MASK, UTF_8_LEN_2_MATCH, UTF_8_LEN_3_MASK, UTF_8_LEN_3_MATCH, UTF_8_LEN_4_MASK, UTF_8_LEN_4_MATCH, ZIP_ENCODING_ASCII, ZIP_ENCODING_CP437, ZIP_ENCODING_ERROR, ZIP_ENCODING_UNKNOWN, ZIP_ENCODING_UTF8_GUESSED, and ZIP_ENCODING_UTF8_KNOWN.

Referenced by _zip_dirent_read(), _zip_dirent_write(), _zip_set_name(), _zip_string_get(), _zip_string_new(), zip_file_set_comment(), and zip_set_archive_comment().

◆ _zip_unicode_to_utf8()

static zip_uint32_t _zip_unicode_to_utf8 ( zip_uint32_t  codepoint,
zip_uint8_t buf 
)
static

Definition at line 173 of file zip_utf-8.c.

173  {
174  if (codepoint < 0x0080) {
175  buf[0] = codepoint & 0xff;
176  return 1;
177  }
178  if (codepoint < 0x0800) {
179  buf[0] = (zip_uint8_t)(UTF_8_LEN_2_MATCH | ((codepoint >> 6) & 0x1f));
180  buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f));
181  return 2;
182  }
183  if (codepoint < 0x10000) {
184  buf[0] = (zip_uint8_t)(UTF_8_LEN_3_MATCH | ((codepoint >> 12) & 0x0f));
185  buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f));
186  buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f));
187  return 3;
188  }
189  buf[0] = (zip_uint8_t)(UTF_8_LEN_4_MATCH | ((codepoint >> 18) & 0x07));
190  buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 12) & 0x3f));
191  buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f));
192  buf[3] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f));
193  return 4;
194 }
voidpf void * buf
Definition: ioapi.h:138

References UTF_8_CONTINUE_MATCH, UTF_8_LEN_2_MATCH, UTF_8_LEN_3_MATCH, and UTF_8_LEN_4_MATCH.

Referenced by _zip_cp437_to_utf8().

◆ _zip_unicode_to_utf8_len()

static zip_uint32_t _zip_unicode_to_utf8_len ( zip_uint32_t  codepoint)
static

Definition at line 161 of file zip_utf-8.c.

161  {
162  if (codepoint < 0x0080)
163  return 1;
164  if (codepoint < 0x0800)
165  return 2;
166  if (codepoint < 0x10000)
167  return 3;
168  return 4;
169 }

Referenced by _zip_cp437_to_utf8().

Variable Documentation

◆ _cp437_to_unicode

const zip_uint16_t _cp437_to_unicode[256]
static

Definition at line 40 of file zip_utf-8.c.

Referenced by _zip_cp437_to_utf8().