Rizin
unix-like reverse engineering framework and cli tools
ascmagic.c File Reference
#include <rz_userconf.h>
#include "file.h"
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>
#include <stdlib.h>
#include "names.h"

Go to the source code of this file.

Macros

#define MAXLINELEN   300 /* longest sane line length */
 
#define ISSPC(x)   ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' || (x) == 0x85 || (x) == '\f')
 
#define F   0 /* character never appears in text */
 
#define T   1 /* character appears in plain ASCII text */
 
#define I   2 /* character appears in ISO-8859 text */
 
#define X   3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
 

Functions

static int looks_ascii (const ut8 *, size_t, unichar *, size_t *)
 
static int looks_utf8_with_BOM (const ut8 *, size_t, unichar *, size_t *)
 
int file_looks_utf8 (const ut8 *, size_t, unichar *, size_t *)
 
static int looks_ucs16 (const ut8 *, size_t, unichar *, size_t *)
 
static int looks_latin1 (const ut8 *, size_t, unichar *, size_t *)
 
static int looks_extended (const ut8 *, size_t, unichar *, size_t *)
 
static void from_ebcdic (const ut8 *, size_t, ut8 *)
 
static int ascmatch (const ut8 *, const unichar *, size_t)
 
static ut8encode_utf8 (ut8 *, size_t, unichar *, size_t)
 
int file_ascmagic (RzMagic *ms, const ut8 *buf, size_t nbytes)
 

Variables

static char text_chars [256]
 
static ut8 ebcdic_to_ascii []
 

Macro Definition Documentation

◆ F

#define F   0 /* character never appears in text */

Definition at line 464 of file ascmagic.c.

◆ I

#define I   2 /* character appears in ISO-8859 text */

Definition at line 466 of file ascmagic.c.

◆ ISSPC

#define ISSPC (   x)    ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' || (x) == 0x85 || (x) == '\f')

Definition at line 52 of file ascmagic.c.

◆ MAXLINELEN

#define MAXLINELEN   300 /* longest sane line length */

Definition at line 51 of file ascmagic.c.

◆ T

#define T   1 /* character appears in plain ASCII text */

Definition at line 465 of file ascmagic.c.

◆ X

#define X   3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */

Definition at line 467 of file ascmagic.c.

Function Documentation

◆ ascmatch()

static int ascmatch ( const ut8 s,
const unichar us,
size_t  ulen 
)
static

Definition at line 402 of file ascmagic.c.

402  {
403  size_t i;
404  for (i = 0; i < ulen; i++) {
405  if (s[i] != us[i]) {
406  return 0;
407  }
408  }
409  return s[i] ? 0 : 1;
410 }
lzma_index ** i
Definition: index.h:629
static RzSocket * s
Definition: rtr.c:28

References i, and s.

Referenced by file_ascmagic().

◆ encode_utf8()

static ut8 * encode_utf8 ( ut8 buf,
size_t  len,
unichar ubuf,
size_t  ulen 
)
static

Definition at line 536 of file ascmagic.c.

536  {
537  size_t i;
538  ut8 *end = buf + len;
539 
540  for (i = 0; i < ulen; i++) {
541  if (ubuf[i] <= 0x7f) {
542  if (end - buf < 1) {
543  return NULL;
544  }
545  *buf++ = (ut8)ubuf[i];
546  } else if (ubuf[i] <= 0x7ff) {
547  if (end - buf < 2) {
548  return NULL;
549  }
550  *buf++ = (ut8)((ubuf[i] >> 6) + 0xc0);
551  *buf++ = (ut8)((ubuf[i] & 0x3f) + 0x80);
552  } else if (ubuf[i] <= 0xffff) {
553  if (end - buf < 3) {
554  return NULL;
555  }
556  *buf++ = (ut8)((ubuf[i] >> 12) + 0xe0);
557  *buf++ = (ut8)(((ubuf[i] >> 6) & 0x3f) + 0x80);
558  *buf++ = (ut8)((ubuf[i] & 0x3f) + 0x80);
559  } else if (ubuf[i] <= 0x1fffff) {
560  if (end - buf < 4) {
561  return NULL;
562  }
563  *buf++ = (ut8)((ubuf[i] >> 18) + 0xf0);
564  *buf++ = (ut8)(((ubuf[i] >> 12) & 0x3f) + 0x80);
565  *buf++ = (ut8)(((ubuf[i] >> 6) & 0x3f) + 0x80);
566  *buf++ = (ut8)((ubuf[i] & 0x3f) + 0x80);
567  } else if (ubuf[i] <= 0x3ffffff) {
568  if (end - buf < 5) {
569  return NULL;
570  }
571  *buf++ = (ut8)((ubuf[i] >> 24) + 0xf8);
572  *buf++ = (ut8)(((ubuf[i] >> 18) & 0x3f) + 0x80);
573  *buf++ = (ut8)(((ubuf[i] >> 12) & 0x3f) + 0x80);
574  *buf++ = (ut8)(((ubuf[i] >> 6) & 0x3f) + 0x80);
575  *buf++ = (ut8)((ubuf[i] & 0x3f) + 0x80);
576  } else if (ubuf[i] <= 0x7fffffff) {
577  if (end - buf < 6) {
578  return NULL;
579  }
580  *buf++ = (ut8)((ubuf[i] >> 30) + 0xfc);
581  *buf++ = (ut8)(((ubuf[i] >> 24) & 0x3f) + 0x80);
582  *buf++ = (ut8)(((ubuf[i] >> 18) & 0x3f) + 0x80);
583  *buf++ = (ut8)(((ubuf[i] >> 12) & 0x3f) + 0x80);
584  *buf++ = (ut8)(((ubuf[i] >> 6) & 0x3f) + 0x80);
585  *buf++ = (ut8)((ubuf[i] & 0x3f) + 0x80);
586  } else { /* Invalid character */
587  return NULL;
588  }
589  }
590 
591  return buf;
592 }
size_t len
Definition: 6502dis.c:15
#define NULL
Definition: cris-opc.c:27
#define ut8
Definition: dcpu16.h:8
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11

References test_evm::end, i, len, NULL, and ut8.

Referenced by file_ascmagic().

◆ file_ascmagic()

int file_ascmagic ( RzMagic *  ms,
const ut8 buf,
size_t  nbytes 
)

Definition at line 65 of file ascmagic.c.

65  {
66  return 0;
67  size_t i;
68  ut8 *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
69  unichar *ubuf = NULL;
70  size_t ulen, mlen;
71  const struct names *p;
72  int rv = -1;
73  int mime = ms->flags & RZ_MAGIC_MIME;
74 
75  const char *code = NULL;
76  const char *code_mime = NULL;
77  const char *type = NULL;
78  const char *subtype = NULL;
79  const char *subtype_mime = NULL;
80 
81  int has_escapes = 0;
82  int has_backspace = 0;
83  int seen_cr = 0;
84 
85  int n_crlf = 0;
86  int n_lf = 0;
87  int n_cr = 0;
88  int n_nel = 0;
89 
90  size_t last_line_end = (size_t)-1;
91  int has_long_lines = 0;
92 
93  /*
94  * Undo the NUL-termination kindly provided by process()
95  * but leave at least one byte to look at
96  */
97  while (nbytes > 1 && buf[nbytes - 1] == '\0') {
98  nbytes--;
99  }
100 
101  if (!(nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0])))) {
102  goto done;
103  }
104  if (!(ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0])))) {
105  goto done;
106  }
107 
108  /*
109  * Then try to determine whether it's any character code we can
110  * identify. Each of these tests, if it succeeds, will leave
111  * the text converted into one-unichar-per-character Unicode in
112  * ubuf, and the number of characters converted in ulen.
113  */
114  if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
115  code = "ASCII";
116  code_mime = "us-ascii";
117  type = "text";
118  } else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
119  code = "UTF-8 Unicode (with BOM)";
120  code_mime = "utf-8";
121  type = "text";
122  } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
123  code = "UTF-8 Unicode";
124  code_mime = "utf-8";
125  type = "text";
126  } else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
127  if (i == 1) {
128  code = "Little-endian UTF-16 Unicode";
129  } else {
130  code = "Big-endian UTF-16 Unicode";
131  }
132 
133  type = "character data";
134  code_mime = "utf-16"; /* is this defined? */
135  } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
136  if (!memcmp(buf, "\xff\xff\xff\xff", 4)) {
137  // uninitialized memory is not iso-8859!!
138  goto done;
139  }
140  code = "ISO-8859";
141  type = "text";
142  code_mime = "iso-8859-1";
143  } else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
144  code = "Non-ISO extended-ASCII";
145  type = "text";
146  code_mime = "unknown";
147  } else {
148  from_ebcdic(buf, nbytes, nbuf);
149 
150  if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
151  code = "EBCDIC";
152  type = "character data";
153  code_mime = "ebcdic";
154  } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
155  code = "International EBCDIC";
156  type = "character data";
157  code_mime = "ebcdic";
158  } else {
159  rv = 0;
160  goto done; /* doesn't look like text at all */
161  }
162  }
163 
164  if (nbytes <= 1) {
165  rv = 0;
166  goto done;
167  }
168 
169  /* Convert ubuf to UTF-8 and try text soft magic */
170  /* If original was ASCII or UTF-8, could use nbuf instead of
171  re-converting. */
172  /* malloc size is a conservative overestimate; could be
173  re-converting improved, or at least realloced after
174  re-converting conversion. */
175  mlen = ulen * 6;
176  if (!(utf8_buf = malloc(mlen))) {
177  file_oomem(ms, mlen);
178  goto done;
179  }
180  if (!(utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen))) {
181  goto done;
182  }
183  if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
184  rv = 1;
185  goto done;
186  }
187 
188  /* look for tokens from names.h - this is expensive! */
189  if ((ms->flags & RZ_MAGIC_NO_CHECK_TOKENS) != 0) {
190  goto subtype_identified;
191  }
192 
193  i = 0;
194  while (i < ulen) {
195  size_t end;
196 
197  /* skip past any leading space */
198  while (i < ulen && ISSPC(ubuf[i])) {
199  i++;
200  }
201  if (i >= ulen) {
202  break;
203  }
204 
205  /* find the next whitespace */
206  for (end = i + 1; end < nbytes; end++) {
207  if (ISSPC(ubuf[end])) {
208  break;
209  }
210  }
211 
212  /* compare the word thus isolated against the token list */
213  for (p = names; p < names + NNAMES; p++) {
214  if (ascmatch((const ut8 *)p->name, ubuf + i,
215  end - i)) {
216  subtype = types[p->type].human;
217  subtype_mime = types[p->type].mime;
218  goto subtype_identified;
219  }
220  }
221 
222  i = end;
223  }
224 
225 subtype_identified:
226 
227  /* Now try to discover other details about the file. */
228  for (i = 0; i < ulen; i++) {
229  if (ubuf[i] == '\n') {
230  if (seen_cr) {
231  n_crlf++;
232  } else {
233  n_lf++;
234  }
235  last_line_end = i;
236  } else if (seen_cr) {
237  n_cr++;
238  }
239 
240  seen_cr = (ubuf[i] == '\r');
241  if (seen_cr) {
242  last_line_end = i;
243  }
244 
245  if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
246  n_nel++;
247  last_line_end = i;
248  }
249  /* If this line is _longer_ than MAXLINELEN, remember it. */
250  if (i > last_line_end + MAXLINELEN) {
251  has_long_lines = 1;
252  }
253 
254  if (ubuf[i] == '\033') {
255  has_escapes = 1;
256  }
257  if (ubuf[i] == '\b') {
258  has_backspace = 1;
259  }
260  }
261 
262  /* Beware, if the data has been truncated, the final CR could have
263  been followed by a LF. If we have HOWMANY bytes, it indicates
264  that the data might have been truncated, probably even before
265  this function was called. */
266  if (seen_cr && nbytes < HOWMANY) {
267  n_cr++;
268  }
269 
270  if (mime) {
271  if (mime & RZ_MAGIC_MIME_TYPE) {
272  if (subtype_mime) {
273  if (file_printf(ms, subtype_mime) == -1) {
274  goto done;
275  }
276  } else {
277  if (file_printf(ms, "text/plain") == -1) {
278  goto done;
279  }
280  }
281  }
282 
283  if ((mime == 0 || mime == RZ_MAGIC_MIME) && code_mime) {
284  if ((mime & RZ_MAGIC_MIME_TYPE) &&
285  file_printf(ms, " charset=") == -1) {
286  goto done;
287  }
288  if (file_printf(ms, code_mime) == -1) {
289  goto done;
290  }
291  }
292 
293  if (mime == RZ_MAGIC_MIME_ENCODING) {
294  if (file_printf(ms, "binary") == -1) {
295  rv = 1;
296  goto done;
297  }
298  }
299  } else {
300  if (file_printf(ms, code) == -1) {
301  goto done;
302  }
303 
304  if (subtype) {
305  if (file_printf(ms, " ") == -1) {
306  goto done;
307  }
308  if (file_printf(ms, subtype) == -1) {
309  goto done;
310  }
311  }
312 
313  if (file_printf(ms, " ") == -1) {
314  goto done;
315  }
316  if (file_printf(ms, type) == -1) {
317  goto done;
318  }
319 
320  if (has_long_lines) {
321  if (file_printf(ms, ", with very long lines") == -1) {
322  goto done;
323  }
324  }
325 
326  /*
327  * Only report line terminators if we find one other than LF,
328  * or if we find none at all.
329  */
330  if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
331  (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
332  if (file_printf(ms, ", with") == -1) {
333  goto done;
334  }
335 
336  if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
337  if (file_printf(ms, " no") == -1) {
338  goto done;
339  }
340  } else {
341  if (n_crlf) {
342  if (file_printf(ms, " CRLF") == -1) {
343  goto done;
344  }
345  if (n_cr || n_lf || n_nel) {
346  if (file_printf(ms, ",") == -1) {
347  goto done;
348  }
349  }
350  }
351  if (n_cr) {
352  if (file_printf(ms, " CR") == -1) {
353  goto done;
354  }
355  if (n_lf || n_nel) {
356  if (file_printf(ms, ",") == -1) {
357  goto done;
358  }
359  }
360  }
361  if (n_lf) {
362  if (file_printf(ms, " LF") == -1) {
363  goto done;
364  }
365  if (n_nel) {
366  if (file_printf(ms, ",") == -1) {
367  goto done;
368  }
369  }
370  }
371  if (n_nel) {
372  if (file_printf(ms, " NEL") == -1) {
373  goto done;
374  }
375  }
376  }
377 
378  if (file_printf(ms, " line terminators") == -1) {
379  goto done;
380  }
381  }
382 
383  if (has_escapes) {
384  if (file_printf(ms, ", with escape sequences") == -1) {
385  goto done;
386  }
387  }
388  if (has_backspace) {
389  if (file_printf(ms, ", with overstriking") == -1) {
390  goto done;
391  }
392  }
393  }
394  rv = 1;
395 done:
396  free(nbuf);
397  free(ubuf);
398  free(utf8_buf);
399  return rv;
400 }
#define ISSPC(x)
Definition: ascmagic.c:52
static void from_ebcdic(const ut8 *, size_t, ut8 *)
Definition: ascmagic.c:809
static ut8 * encode_utf8(ut8 *, size_t, unichar *, size_t)
Definition: ascmagic.c:536
static int looks_latin1(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:504
static int looks_ucs16(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:688
int file_looks_utf8(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:605
#define MAXLINELEN
Definition: ascmagic.c:51
static int looks_extended(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:518
static int looks_utf8_with_BOM(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:681
static int looks_ascii(const ut8 *, size_t, unichar *, size_t *)
Definition: ascmagic.c:491
static int ascmatch(const ut8 *, const unichar *, size_t)
Definition: ascmagic.c:402
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void nbytes
Definition: sflib.h:113
struct tab * done
Definition: enough.c:233
void file_oomem(struct rz_magic_set *, size_t)
int file_printf(struct rz_magic_set *, const char *,...)
int file_softmagic(struct rz_magic_set *, const unsigned char *, size_t, int)
unsigned long unichar
Definition: file.h:51
checking print the parsed form of the magic use in n conjunction with m to debug a new magic file n before installing it n mime
Definition: file_opts.h:30
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
void * p
Definition: libc.cpp:67
void * malloc(size_t size)
Definition: malloc.c:123
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
int type
Definition: mipsasm.c:17
#define NNAMES
Definition: names.h:180
insn_type_descr_t types[]
Definition: or1k_disas.c:7
int size_t
Definition: sftypes.h:40
Definition: inftree9.h:24
Definition: names.h:123

References ascmatch(), calloc(), done, encode_utf8(), test_evm::end, file_looks_utf8(), file_oomem(), file_printf(), file_softmagic(), free(), from_ebcdic(), i, ISSPC, looks_ascii(), looks_extended(), looks_latin1(), looks_ucs16(), looks_utf8_with_BOM(), malloc(), MAXLINELEN, mime, nbytes, NNAMES, NULL, p, type, and types.

Referenced by file_buffer().

◆ file_looks_utf8()

int file_looks_utf8 ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)

Definition at line 605 of file ascmagic.c.

605  {
606  size_t i;
607  int n;
608  unichar c;
609  int gotone = 0, ctrl = 0;
610 
611  if (ubuf) {
612  *ulen = 0;
613  }
614 
615  for (i = 0; i < nbytes; i++) {
616  if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
617  /*
618  * Even if the whole file is valid UTF-8 sequences,
619  * still reject it if it uses weird control characters.
620  */
621 
622  if (text_chars[buf[i]] != T) {
623  ctrl = 1;
624  }
625 
626  if (ubuf) {
627  ubuf[(*ulen)++] = buf[i];
628  }
629  } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
630  return -1;
631  } else { /* 11xxxxxx begins UTF-8 */
632  int following;
633 
634  if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
635  c = buf[i] & 0x1f;
636  following = 1;
637  } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
638  c = buf[i] & 0x0f;
639  following = 2;
640  } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
641  c = buf[i] & 0x07;
642  following = 3;
643  } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
644  c = buf[i] & 0x03;
645  following = 4;
646  } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
647  c = buf[i] & 0x01;
648  following = 5;
649  } else {
650  return -1;
651  }
652 
653  for (n = 0; n < following; n++) {
654  i++;
655  if (i >= nbytes) {
656  goto done;
657  }
658 
659  if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) {
660  return -1;
661  }
662 
663  c = (c << 6) + (buf[i] & 0x3f);
664  }
665 
666  if (ubuf) {
667  ubuf[(*ulen)++] = c;
668  }
669  gotone = 1;
670  }
671  }
672 done:
673  return ctrl ? 0 : (gotone ? 2 : 1);
674 }
#define T
Definition: ascmagic.c:465
static char text_chars[256]
Definition: ascmagic.c:469
int n
Definition: mipsasm.c:19
#define c(i)
Definition: sha256.c:43

References c, done, i, n, nbytes, T, and text_chars.

Referenced by file_ascmagic(), looks_utf8_with_BOM(), and set_test_type().

◆ from_ebcdic()

static void from_ebcdic ( const ut8 buf,
size_t  nbytes,
ut8 out 
)
static

Definition at line 809 of file ascmagic.c.

809  {
810  size_t i;
811  for (i = 0; i < nbytes; i++) {
812  out[i] = ebcdic_to_ascii[buf[i]];
813  }
814 }
static ut8 ebcdic_to_ascii[]
Definition: ascmagic.c:752
const lzma_allocator const uint8_t size_t uint8_t * out
Definition: block.h:528

References ebcdic_to_ascii, i, nbytes, and out.

Referenced by file_ascmagic().

◆ looks_ascii()

static int looks_ascii ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)
static

Definition at line 491 of file ascmagic.c.

491  {
492  size_t i;
493  *ulen = 0;
494  for (i = 0; i < nbytes; i++) {
495  int t = text_chars[buf[i]];
496  if (t != T) {
497  return 0;
498  }
499  ubuf[(*ulen)++] = buf[i];
500  }
501  return 1;
502 }

References i, nbytes, T, and text_chars.

Referenced by file_ascmagic().

◆ looks_extended()

static int looks_extended ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)
static

Definition at line 518 of file ascmagic.c.

518  {
519  size_t i;
520  *ulen = 0;
521  for (i = 0; i < nbytes; i++) {
522  int t = text_chars[buf[i]];
523  if (t != T && t != I && t != X) {
524  return 0;
525  }
526  ubuf[(*ulen)++] = buf[i];
527  }
528  return 1;
529 }
#define X
Definition: ascmagic.c:467
#define I
Definition: ascmagic.c:466

References I, i, nbytes, T, text_chars, and X.

Referenced by file_ascmagic().

◆ looks_latin1()

static int looks_latin1 ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)
static

Definition at line 504 of file ascmagic.c.

504  {
505  size_t i;
506  *ulen = 0;
507 
508  for (i = 0; i < nbytes; i++) {
509  int t = text_chars[buf[i]];
510  if (t != T && t != I) {
511  return 0;
512  }
513  ubuf[(*ulen)++] = buf[i];
514  }
515  return 1;
516 }

References I, i, nbytes, T, and text_chars.

Referenced by file_ascmagic().

◆ looks_ucs16()

static int looks_ucs16 ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)
static

Definition at line 688 of file ascmagic.c.

688  {
689  int bigend;
690  size_t i;
691 
692  if (nbytes < 2) {
693  return 0;
694  }
695 
696  if (buf[0] == 0xff && buf[1] == 0xfe) {
697  bigend = 0;
698  } else if (buf[0] == 0xfe && buf[1] == 0xff) {
699  bigend = 1;
700  } else {
701  return 0;
702  }
703 
704  *ulen = 0;
705 
706  for (i = 2; i + 1 < nbytes; i += 2) {
707  /* XXX fix to properly handle chars > 65536 */
708 
709  if (bigend) {
710  ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
711  } else {
712  ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
713  }
714 
715  if (ubuf[*ulen - 1] == 0xfffe) {
716  return 0;
717  }
718  if (ubuf[*ulen - 1] < 128 && text_chars[(size_t)ubuf[*ulen - 1]] != T) {
719  return 0;
720  }
721  }
722  return 1 + bigend;
723 }

References i, nbytes, T, and text_chars.

Referenced by file_ascmagic().

◆ looks_utf8_with_BOM()

static int looks_utf8_with_BOM ( const ut8 buf,
size_t  nbytes,
unichar ubuf,
size_t ulen 
)
static

Definition at line 681 of file ascmagic.c.

681  {
682  if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) {
683  return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
684  }
685  return -1;
686 }

References file_looks_utf8(), and nbytes.

Referenced by file_ascmagic().

Variable Documentation

◆ ebcdic_to_ascii

ut8 ebcdic_to_ascii[]
static
Initial value:
= {
0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'', '=', '"',
195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
'\\', 159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
}

Definition at line 752 of file ascmagic.c.

Referenced by from_ebcdic().

◆ text_chars

char text_chars[256]
static
Initial value:
= {
F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I
}
#define F
Definition: ascmagic.c:464

Definition at line 469 of file ascmagic.c.

Referenced by file_looks_utf8(), looks_ascii(), looks_extended(), looks_latin1(), and looks_ucs16().