51 #define MAXLINELEN 300
52 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' || (x) == 0x85 || (x) == '\f')
73 int mime = ms->flags & RZ_MAGIC_MIME;
76 const char *code_mime =
NULL;
78 const char *subtype =
NULL;
79 const char *subtype_mime =
NULL;
82 int has_backspace = 0;
90 size_t last_line_end = (
size_t)-1;
91 int has_long_lines = 0;
101 if (!(nbuf =
calloc(1, (
nbytes + 1) *
sizeof(nbuf[0])))) {
104 if (!(ubuf =
calloc(1, (
nbytes + 1) *
sizeof(ubuf[0])))) {
116 code_mime =
"us-ascii";
119 code =
"UTF-8 Unicode (with BOM)";
123 code =
"UTF-8 Unicode";
128 code =
"Little-endian UTF-16 Unicode";
130 code =
"Big-endian UTF-16 Unicode";
133 type =
"character data";
134 code_mime =
"utf-16";
136 if (!memcmp(
buf,
"\xff\xff\xff\xff", 4)) {
142 code_mime =
"iso-8859-1";
144 code =
"Non-ISO extended-ASCII";
146 code_mime =
"unknown";
152 type =
"character data";
153 code_mime =
"ebcdic";
155 code =
"International EBCDIC";
156 type =
"character data";
157 code_mime =
"ebcdic";
176 if (!(utf8_buf =
malloc(mlen))) {
180 if (!(utf8_end =
encode_utf8(utf8_buf, mlen, ubuf, ulen))) {
183 if (
file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
189 if ((ms->flags & RZ_MAGIC_NO_CHECK_TOKENS) != 0) {
190 goto subtype_identified;
198 while (
i < ulen &&
ISSPC(ubuf[
i])) {
216 subtype =
types[
p->type].human;
217 subtype_mime =
types[
p->type].mime;
218 goto subtype_identified;
228 for (
i = 0;
i < ulen;
i++) {
229 if (ubuf[
i] ==
'\n') {
236 }
else if (seen_cr) {
240 seen_cr = (ubuf[
i] ==
'\r');
245 if (ubuf[
i] == 0x85) {
254 if (ubuf[
i] ==
'\033') {
257 if (ubuf[
i] ==
'\b') {
266 if (seen_cr &&
nbytes < HOWMANY) {
271 if (
mime & RZ_MAGIC_MIME_TYPE) {
283 if ((
mime == 0 ||
mime == RZ_MAGIC_MIME) && code_mime) {
284 if ((
mime & RZ_MAGIC_MIME_TYPE) &&
293 if (
mime == RZ_MAGIC_MIME_ENCODING) {
320 if (has_long_lines) {
321 if (
file_printf(ms,
", with very long lines") == -1) {
330 if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
331 (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
336 if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
345 if (n_cr || n_lf || n_nel) {
384 if (
file_printf(ms,
", with escape sequences") == -1) {
389 if (
file_printf(ms,
", with overstriking") == -1) {
404 for (
i = 0;
i < ulen;
i++) {
471 F,
F,
F,
F,
F,
F,
F,
T,
T,
T,
T,
F,
T,
T,
F,
F,
473 F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
F,
T,
F,
F,
F,
F,
474 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
475 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
476 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
477 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
478 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
479 T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
T,
F,
481 X,
X,
X,
X,
X,
T,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
482 X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
X,
483 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
484 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
485 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
486 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
487 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
488 I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I,
I
499 ubuf[(*ulen)++] =
buf[
i];
510 if (t !=
T && t !=
I) {
513 ubuf[(*ulen)++] =
buf[
i];
523 if (t !=
T && t !=
I && t !=
X) {
526 ubuf[(*ulen)++] =
buf[
i];
540 for (
i = 0;
i < ulen;
i++) {
541 if (ubuf[
i] <= 0x7f) {
546 }
else if (ubuf[
i] <= 0x7ff) {
550 *
buf++ = (
ut8)((ubuf[
i] >> 6) + 0xc0);
551 *
buf++ = (
ut8)((ubuf[
i] & 0x3f) + 0x80);
552 }
else if (ubuf[
i] <= 0xffff) {
556 *
buf++ = (
ut8)((ubuf[
i] >> 12) + 0xe0);
557 *
buf++ = (
ut8)(((ubuf[
i] >> 6) & 0x3f) + 0x80);
558 *
buf++ = (
ut8)((ubuf[
i] & 0x3f) + 0x80);
559 }
else if (ubuf[
i] <= 0x1fffff) {
563 *
buf++ = (
ut8)((ubuf[
i] >> 18) + 0xf0);
564 *
buf++ = (
ut8)(((ubuf[
i] >> 12) & 0x3f) + 0x80);
565 *
buf++ = (
ut8)(((ubuf[
i] >> 6) & 0x3f) + 0x80);
566 *
buf++ = (
ut8)((ubuf[
i] & 0x3f) + 0x80);
567 }
else if (ubuf[
i] <= 0x3ffffff) {
571 *
buf++ = (
ut8)((ubuf[
i] >> 24) + 0xf8);
572 *
buf++ = (
ut8)(((ubuf[
i] >> 18) & 0x3f) + 0x80);
573 *
buf++ = (
ut8)(((ubuf[
i] >> 12) & 0x3f) + 0x80);
574 *
buf++ = (
ut8)(((ubuf[
i] >> 6) & 0x3f) + 0x80);
575 *
buf++ = (
ut8)((ubuf[
i] & 0x3f) + 0x80);
576 }
else if (ubuf[
i] <= 0x7fffffff) {
580 *
buf++ = (
ut8)((ubuf[
i] >> 30) + 0xfc);
581 *
buf++ = (
ut8)(((ubuf[
i] >> 24) & 0x3f) + 0x80);
582 *
buf++ = (
ut8)(((ubuf[
i] >> 18) & 0x3f) + 0x80);
583 *
buf++ = (
ut8)(((ubuf[
i] >> 12) & 0x3f) + 0x80);
584 *
buf++ = (
ut8)(((ubuf[
i] >> 6) & 0x3f) + 0x80);
585 *
buf++ = (
ut8)((ubuf[
i] & 0x3f) + 0x80);
609 int gotone = 0, ctrl = 0;
616 if ((
buf[
i] & 0x80) == 0) {
627 ubuf[(*ulen)++] =
buf[
i];
629 }
else if ((
buf[
i] & 0x40) == 0) {
634 if ((
buf[
i] & 0x20) == 0) {
637 }
else if ((
buf[
i] & 0x10) == 0) {
640 }
else if ((
buf[
i] & 0x08) == 0) {
643 }
else if ((
buf[
i] & 0x04) == 0) {
646 }
else if ((
buf[
i] & 0x02) == 0) {
653 for (
n = 0;
n < following;
n++) {
659 if ((
buf[
i] & 0x80) == 0 || (
buf[
i] & 0x40)) {
663 c = (
c << 6) + (
buf[
i] & 0x3f);
673 return ctrl ? 0 : (gotone ? 2 : 1);
682 if (
nbytes > 3 &&
buf[0] == 0xef &&
buf[1] == 0xbb &&
buf[2] == 0xbf) {
696 if (
buf[0] == 0xff &&
buf[1] == 0xfe) {
698 }
else if (
buf[0] == 0xfe &&
buf[1] == 0xff) {
710 ubuf[(*ulen)++] =
buf[
i + 1] + 256 *
buf[
i];
712 ubuf[(*ulen)++] =
buf[
i] + 256 *
buf[
i + 1];
715 if (ubuf[*ulen - 1] == 0xfffe) {
718 if (ubuf[*ulen - 1] < 128 &&
text_chars[(
size_t)ubuf[*ulen - 1]] !=
T) {
753 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
754 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
755 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
756 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
757 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213,
'.',
'<',
'(',
'+',
'|',
758 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177,
'!',
'$',
'*',
')',
';',
'~',
759 '-',
'/', 178, 179, 180, 181, 182, 183, 184, 185, 203,
',',
'%',
'_',
'>',
'?',
760 186, 187, 188, 189, 190, 191, 192, 193, 194,
'`',
':',
'#',
'@',
'\'',
'=',
'"',
761 195,
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i', 196, 197, 198, 199, 200, 201,
762 202,
'j',
'k',
'l',
'm',
'n',
'o',
'p',
'q',
'r',
'^', 204, 205, 206, 207, 208,
763 209, 229,
's',
't',
'u',
'v',
'w',
'x',
'y',
'z', 210, 211, 212,
'[', 214, 215,
764 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
']', 230, 231,
765 '{',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I', 232, 233, 234, 235, 236, 237,
766 '}',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R', 238, 239, 240, 241, 242, 243,
767 '\\', 159,
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z', 244, 245, 246, 247, 248, 249,
768 '0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9', 250, 251, 252, 253, 254, 255
786 static ut8 ebcdic_1047_to_8859[] = {
787 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
788 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
789 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
790 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
791 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
792 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
793 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
794 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
795 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
796 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
797 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
798 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
799 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
800 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
801 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
802 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
static void from_ebcdic(const ut8 *, size_t, ut8 *)
static ut8 * encode_utf8(ut8 *, size_t, unichar *, size_t)
static int looks_latin1(const ut8 *, size_t, unichar *, size_t *)
static int looks_ucs16(const ut8 *, size_t, unichar *, size_t *)
int file_ascmagic(RzMagic *ms, const ut8 *buf, size_t nbytes)
static ut8 ebcdic_to_ascii[]
int file_looks_utf8(const ut8 *, size_t, unichar *, size_t *)
static int looks_extended(const ut8 *, size_t, unichar *, size_t *)
static int looks_utf8_with_BOM(const ut8 *, size_t, unichar *, size_t *)
static int looks_ascii(const ut8 *, size_t, unichar *, size_t *)
static int ascmatch(const ut8 *, const unichar *, size_t)
static char text_chars[256]
const lzma_allocator const uint8_t size_t uint8_t * out
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void nbytes
void file_oomem(struct rz_magic_set *, size_t)
int file_printf(struct rz_magic_set *, const char *,...)
int file_softmagic(struct rz_magic_set *, const unsigned char *, size_t, int)
checking print the parsed form of the magic use in n conjunction with m to debug a new magic file n before installing it n mime
RZ_API void Ht_() free(HtName_(Ht) *ht)
void * malloc(size_t size)
void * calloc(size_t number, size_t size)
insn_type_descr_t types[]