Rizin
unix-like reverse engineering framework and cli tools
idna.c
Go to the documentation of this file.
1 /* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14  */
15 
16 /* Derived from https://github.com/bnoordhuis/punycode
17  * but updated to support IDNA 2008.
18  */
19 
20 #include "uv.h"
21 #include "idna.h"
22 #include <string.h>
23 
24 static unsigned uv__utf8_decode1_slow(const char** p,
25  const char* pe,
26  unsigned a) {
27  unsigned b;
28  unsigned c;
29  unsigned d;
30  unsigned min;
31 
32  if (a > 0xF7)
33  return -1;
34 
35  switch (*p - pe) {
36  default:
37  if (a > 0xEF) {
38  min = 0x10000;
39  a = a & 7;
40  b = (unsigned char) *(*p)++;
41  c = (unsigned char) *(*p)++;
42  d = (unsigned char) *(*p)++;
43  break;
44  }
45  /* Fall through. */
46  case 2:
47  if (a > 0xDF) {
48  min = 0x800;
49  b = 0x80 | (a & 15);
50  c = (unsigned char) *(*p)++;
51  d = (unsigned char) *(*p)++;
52  a = 0;
53  break;
54  }
55  /* Fall through. */
56  case 1:
57  if (a > 0xBF) {
58  min = 0x80;
59  b = 0x80;
60  c = 0x80 | (a & 31);
61  d = (unsigned char) *(*p)++;
62  a = 0;
63  break;
64  }
65  return -1; /* Invalid continuation byte. */
66  }
67 
68  if (0x80 != (0xC0 & (b ^ c ^ d)))
69  return -1; /* Invalid sequence. */
70 
71  b &= 63;
72  c &= 63;
73  d &= 63;
74  a = (a << 18) | (b << 12) | (c << 6) | d;
75 
76  if (a < min)
77  return -1; /* Overlong sequence. */
78 
79  if (a > 0x10FFFF)
80  return -1; /* Four-byte sequence > U+10FFFF. */
81 
82  if (a >= 0xD800 && a <= 0xDFFF)
83  return -1; /* Surrogate pair. */
84 
85  return a;
86 }
87 
88 unsigned uv__utf8_decode1(const char** p, const char* pe) {
89  unsigned a;
90 
91  a = (unsigned char) *(*p)++;
92 
93  if (a < 128)
94  return a; /* ASCII, common case. */
95 
96  return uv__utf8_decode1_slow(p, pe, a);
97 }
98 
99 #define foreach_codepoint(c, p, pe) \
100  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
101 
102 static int uv__idna_toascii_label(const char* s, const char* se,
103  char** d, char* de) {
104  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
105  const char* ss;
106  unsigned c;
107  unsigned h;
108  unsigned k;
109  unsigned n;
110  unsigned m;
111  unsigned q;
112  unsigned t;
113  unsigned x;
114  unsigned y;
115  unsigned bias;
116  unsigned delta;
117  unsigned todo;
118  int first;
119 
120  h = 0;
121  ss = s;
122  todo = 0;
123 
124  foreach_codepoint(c, &s, se) {
125  if (c < 128)
126  h++;
127  else if (c == (unsigned) -1)
128  return UV_EINVAL;
129  else
130  todo++;
131  }
132 
133  if (todo > 0) {
134  if (*d < de) *(*d)++ = 'x';
135  if (*d < de) *(*d)++ = 'n';
136  if (*d < de) *(*d)++ = '-';
137  if (*d < de) *(*d)++ = '-';
138  }
139 
140  x = 0;
141  s = ss;
142  foreach_codepoint(c, &s, se) {
143  if (c > 127)
144  continue;
145 
146  if (*d < de)
147  *(*d)++ = c;
148 
149  if (++x == h)
150  break; /* Visited all ASCII characters. */
151  }
152 
153  if (todo == 0)
154  return h;
155 
156  /* Only write separator when we've written ASCII characters first. */
157  if (h > 0)
158  if (*d < de)
159  *(*d)++ = '-';
160 
161  n = 128;
162  bias = 72;
163  delta = 0;
164  first = 1;
165 
166  while (todo > 0) {
167  m = -1;
168  s = ss;
169  foreach_codepoint(c, &s, se)
170  if (c >= n)
171  if (c < m)
172  m = c;
173 
174  x = m - n;
175  y = h + 1;
176 
177  if (x > ~delta / y)
178  return UV_E2BIG; /* Overflow. */
179 
180  delta += x * y;
181  n = m;
182 
183  s = ss;
184  foreach_codepoint(c, &s, se) {
185  if (c < n)
186  if (++delta == 0)
187  return UV_E2BIG; /* Overflow. */
188 
189  if (c != n)
190  continue;
191 
192  for (k = 36, q = delta; /* empty */; k += 36) {
193  t = 1;
194 
195  if (k > bias)
196  t = k - bias;
197 
198  if (t > 26)
199  t = 26;
200 
201  if (q < t)
202  break;
203 
204  /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
205  * 10 <= y <= 35, we can optimize the long division
206  * into a table-based reciprocal multiplication.
207  */
208  x = q - t;
209  y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
210  q = x / y;
211  t = t + x % y; /* 1 <= t <= 35 because of y. */
212 
213  if (*d < de)
214  *(*d)++ = alphabet[t];
215  }
216 
217  if (*d < de)
218  *(*d)++ = alphabet[q];
219 
220  delta /= 2;
221 
222  if (first) {
223  delta /= 350;
224  first = 0;
225  }
226 
227  /* No overflow check is needed because |delta| was just
228  * divided by 2 and |delta+delta >= delta + delta/h|.
229  */
230  h++;
231  delta += delta / h;
232 
233  for (bias = 0; delta > 35 * 26 / 2; bias += 36)
234  delta /= 35;
235 
236  bias += 36 * delta / (delta + 38);
237  delta = 0;
238  todo--;
239  }
240 
241  delta++;
242  n++;
243  }
244 
245  return 0;
246 }
247 
248 #undef foreach_codepoint
249 
250 long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
251  const char* si;
252  const char* st;
253  unsigned c;
254  char* ds;
255  int rc;
256 
257  ds = d;
258 
259  for (si = s; si < se; /* empty */) {
260  st = si;
261  c = uv__utf8_decode1(&si, se);
262 
263  if (c != '.')
264  if (c != 0x3002) /* 。 */
265  if (c != 0xFF0E) /* . */
266  if (c != 0xFF61) /* 。 */
267  continue;
268 
269  rc = uv__idna_toascii_label(s, st, &d, de);
270 
271  if (rc < 0)
272  return rc;
273 
274  if (d < de)
275  *d++ = '.';
276 
277  s = si;
278  }
279 
280  if (s < se) {
281  rc = uv__idna_toascii_label(s, se, &d, de);
282 
283  if (rc < 0)
284  return rc;
285  }
286 
287  if (d < de)
288  *d++ = '\0';
289 
290  return d - ds; /* Number of bytes written. */
291 }
si
const char * k
Definition: dsignal.c:11
static unsigned uv__utf8_decode1_slow(const char **p, const char *pe, unsigned a)
Definition: idna.c:24
unsigned uv__utf8_decode1(const char **p, const char *pe)
Definition: idna.c:88
static int uv__idna_toascii_label(const char *s, const char *se, char **d, char *de)
Definition: idna.c:102
#define foreach_codepoint(c, p, pe)
Definition: idna.c:99
long uv__idna_toascii(const char *s, const char *se, char *d, char *de)
Definition: idna.c:250
void * p
Definition: libc.cpp:67
int x
Definition: mipsasm.c:20
int n
Definition: mipsasm.c:19
#define min(a, b)
Definition: qsort.h:83
static RzSocket * s
Definition: rtr.c:28
#define d(i)
Definition: sha256.c:44
#define b(i)
Definition: sha256.c:42
#define c(i)
Definition: sha256.c:43
#define a(i)
Definition: sha256.c:41
#define h(i)
Definition: sha256.c:48
static st64 delta
Definition: vmenus.c:2425