00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifdef HAVE_CONFIG_H
00024 # include "config.h"
00025 #endif
00026
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <stringprep.h>
00030 #include <punycode.h>
00031
00032 #include "idna.h"
00033
00034
00035 #include <c-strcase.h>
00036
00037 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
00038 (c) == 0xFF0E || (c) == 0xFF61)
00039
00040
00041
00073 int
00074 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
00075 {
00076 size_t len, outlen;
00077 uint32_t *src;
00078 int rc;
00079
00080
00081
00082
00083
00084
00085
00086
00087 {
00088 size_t i;
00089 int inasciirange;
00090
00091 inasciirange = 1;
00092 for (i = 0; i < inlen; i++)
00093 if (in[i] > 0x7F)
00094 inasciirange = 0;
00095 if (inasciirange)
00096 {
00097 src = malloc (sizeof (in[0]) * (inlen + 1));
00098 if (src == NULL)
00099 return IDNA_MALLOC_ERROR;
00100
00101 memcpy (src, in, sizeof (in[0]) * inlen);
00102 src[inlen] = 0;
00103
00104 goto step3;
00105 }
00106 }
00107
00108
00109
00110
00111
00112
00113 {
00114 char *p;
00115
00116 p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
00117 if (p == NULL)
00118 return IDNA_MALLOC_ERROR;
00119
00120 len = strlen (p);
00121 do
00122 {
00123 char *newp;
00124
00125 len = 2 * len + 10;
00126 newp = realloc (p, len);
00127 if (newp == NULL)
00128 {
00129 free (p);
00130 return IDNA_MALLOC_ERROR;
00131 }
00132 p = newp;
00133
00134 if (flags & IDNA_ALLOW_UNASSIGNED)
00135 rc = stringprep_nameprep (p, len);
00136 else
00137 rc = stringprep_nameprep_no_unassigned (p, len);
00138 }
00139 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00140
00141 if (rc != STRINGPREP_OK)
00142 {
00143 free (p);
00144 return IDNA_STRINGPREP_ERROR;
00145 }
00146
00147 src = stringprep_utf8_to_ucs4 (p, -1, NULL);
00148
00149 free (p);
00150 }
00151
00152 step3:
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164 if (flags & IDNA_USE_STD3_ASCII_RULES)
00165 {
00166 size_t i;
00167
00168 for (i = 0; src[i]; i++)
00169 if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
00170 (src[i] >= 0x3A && src[i] <= 0x40) ||
00171 (src[i] >= 0x5B && src[i] <= 0x60) ||
00172 (src[i] >= 0x7B && src[i] <= 0x7F))
00173 {
00174 free (src);
00175 return IDNA_CONTAINS_NON_LDH;
00176 }
00177
00178 if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
00179 {
00180 free (src);
00181 return IDNA_CONTAINS_MINUS;
00182 }
00183 }
00184
00185
00186
00187
00188
00189
00190 {
00191 size_t i;
00192 int inasciirange;
00193
00194 inasciirange = 1;
00195 for (i = 0; src[i]; i++)
00196 {
00197 if (src[i] > 0x7F)
00198 inasciirange = 0;
00199
00200 if (i < 64)
00201 out[i] = src[i];
00202 }
00203 if (i < 64)
00204 out[i] = '\0';
00205 if (inasciirange)
00206 goto step8;
00207 }
00208
00209
00210
00211
00212
00213
00214 {
00215 size_t i;
00216 int match;
00217
00218 match = 1;
00219 for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
00220 if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
00221 match = 0;
00222 if (match)
00223 {
00224 free (src);
00225 return IDNA_CONTAINS_ACE_PREFIX;
00226 }
00227 }
00228
00229
00230
00231
00232
00233 for (len = 0; src[len]; len++)
00234 ;
00235 src[len] = '\0';
00236 outlen = 63 - strlen (IDNA_ACE_PREFIX);
00237 rc = punycode_encode (len, src, NULL,
00238 &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
00239 if (rc != PUNYCODE_SUCCESS)
00240 {
00241 free (src);
00242 return IDNA_PUNYCODE_ERROR;
00243 }
00244 out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
00245
00246
00247
00248
00249
00250 memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
00251
00252
00253
00254
00255
00256
00257 step8:
00258 free (src);
00259 if (strlen (out) < 1 || strlen (out) > 63)
00260 return IDNA_INVALID_LENGTH;
00261
00262 return IDNA_SUCCESS;
00263 }
00264
00265
00266 static int
00267 idna_to_unicode_internal (char *utf8in,
00268 uint32_t * out, size_t * outlen, int flags)
00269 {
00270 int rc;
00271 char tmpout[64];
00272 size_t utf8len = strlen (utf8in) + 1;
00273 size_t addlen = 0;
00274
00275
00276
00277
00278
00279
00280
00281
00282 {
00283 size_t i;
00284 int inasciirange;
00285
00286 inasciirange = 1;
00287 for (i = 0; utf8in[i]; i++)
00288 if (utf8in[i] & ~0x7F)
00289 inasciirange = 0;
00290 if (inasciirange)
00291 goto step3;
00292 }
00293
00294
00295
00296
00297
00298
00299
00300 do
00301 {
00302 char *newp = realloc (utf8in, utf8len + addlen);
00303 if (newp == NULL)
00304 {
00305 free (utf8in);
00306 return IDNA_MALLOC_ERROR;
00307 }
00308 utf8in = newp;
00309 if (flags & IDNA_ALLOW_UNASSIGNED)
00310 rc = stringprep_nameprep (utf8in, utf8len + addlen);
00311 else
00312 rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
00313 addlen += 1;
00314 }
00315 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00316
00317 if (rc != STRINGPREP_OK)
00318 {
00319 free (utf8in);
00320 return IDNA_STRINGPREP_ERROR;
00321 }
00322
00323
00324
00325
00326
00327 step3:
00328 if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
00329 {
00330 free (utf8in);
00331 return IDNA_NO_ACE_PREFIX;
00332 }
00333
00334
00335
00336
00337 memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
00338 strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
00339
00340
00341
00342
00343
00344
00345 (*outlen)--;
00346
00347 rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
00348 if (rc != PUNYCODE_SUCCESS)
00349 {
00350 free (utf8in);
00351 return IDNA_PUNYCODE_ERROR;
00352 }
00353
00354 out[*outlen] = 0;
00355
00356
00357
00358
00359 rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
00360 if (rc != IDNA_SUCCESS)
00361 {
00362 free (utf8in);
00363 return rc;
00364 }
00365
00366
00367
00368
00369
00370 if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
00371 {
00372 free (utf8in);
00373 return IDNA_ROUNDTRIP_VERIFY_ERROR;
00374 }
00375
00376
00377
00378
00379 free (utf8in);
00380 return IDNA_SUCCESS;
00381 }
00382
00418 int
00419 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
00420 uint32_t * out, size_t * outlen, int flags)
00421 {
00422 int rc;
00423 size_t outlensave = *outlen;
00424 char *p;
00425
00426 p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
00427 if (p == NULL)
00428 return IDNA_MALLOC_ERROR;
00429
00430 rc = idna_to_unicode_internal (p, out, outlen, flags);
00431 if (rc != IDNA_SUCCESS)
00432 {
00433 memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
00434 inlen : outlensave));
00435 *outlen = inlen;
00436 }
00437
00438
00439
00440 return rc;
00441 }
00442
00443
00444
00458 int
00459 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
00460 {
00461 const uint32_t *start = input;
00462 const uint32_t *end = input;
00463 char buf[64];
00464 char *out = NULL;
00465 int rc;
00466
00467
00468
00469
00470
00471
00472 if (input[0] == 0)
00473 {
00474
00475 *output = malloc (1);
00476 if (!*output)
00477 return IDNA_MALLOC_ERROR;
00478 strcpy (*output, "");
00479 return IDNA_SUCCESS;
00480 }
00481
00482 if (DOTP (input[0]) && input[1] == 0)
00483 {
00484
00485 *output = malloc (2);
00486 if (!*output)
00487 return IDNA_MALLOC_ERROR;
00488 strcpy (*output, ".");
00489 return IDNA_SUCCESS;
00490 }
00491
00492 *output = NULL;
00493 do
00494 {
00495 end = start;
00496
00497 for (; *end && !DOTP (*end); end++)
00498 ;
00499
00500 if (*end == '\0' && start == end)
00501 {
00502
00503 buf[0] = '\0';
00504 }
00505 else
00506 {
00507 rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
00508 if (rc != IDNA_SUCCESS)
00509 return rc;
00510 }
00511
00512 if (out)
00513 {
00514 char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
00515 if (!newp)
00516 {
00517 free (out);
00518 return IDNA_MALLOC_ERROR;
00519 }
00520 out = newp;
00521 strcat (out, ".");
00522 strcat (out, buf);
00523 }
00524 else
00525 {
00526 out = (char *) malloc (strlen (buf) + 1);
00527 if (!out)
00528 return IDNA_MALLOC_ERROR;
00529 strcpy (out, buf);
00530 }
00531
00532 start = end + 1;
00533 }
00534 while (*end);
00535
00536 *output = out;
00537
00538 return IDNA_SUCCESS;
00539 }
00540
00554 int
00555 idna_to_ascii_8z (const char *input, char **output, int flags)
00556 {
00557 uint32_t *ucs4;
00558 size_t ucs4len;
00559 int rc;
00560
00561 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00562 if (!ucs4)
00563 return IDNA_ICONV_ERROR;
00564
00565 rc = idna_to_ascii_4z (ucs4, output, flags);
00566
00567 free (ucs4);
00568
00569 return rc;
00570
00571 }
00572
00587 int
00588 idna_to_ascii_lz (const char *input, char **output, int flags)
00589 {
00590 char *utf8;
00591 int rc;
00592
00593 utf8 = stringprep_locale_to_utf8 (input);
00594 if (!utf8)
00595 return IDNA_ICONV_ERROR;
00596
00597 rc = idna_to_ascii_8z (utf8, output, flags);
00598
00599 free (utf8);
00600
00601 return rc;
00602 }
00603
00618 int
00619 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
00620 {
00621 const uint32_t *start = input;
00622 const uint32_t *end = input;
00623 uint32_t *buf;
00624 size_t buflen;
00625 uint32_t *out = NULL;
00626 size_t outlen = 0;
00627 int rc;
00628
00629 *output = NULL;
00630
00631 do
00632 {
00633 end = start;
00634
00635 for (; *end && !DOTP (*end); end++)
00636 ;
00637
00638 buflen = (size_t) (end - start);
00639 buf = malloc (sizeof (buf[0]) * (buflen + 1));
00640 if (!buf)
00641 return IDNA_MALLOC_ERROR;
00642
00643 rc = idna_to_unicode_44i (start, (size_t) (end - start),
00644 buf, &buflen, flags);
00645
00646
00647 if (out)
00648 {
00649 uint32_t *newp = realloc (out,
00650 sizeof (out[0])
00651 * (outlen + 1 + buflen + 1));
00652 if (!newp)
00653 {
00654 free (buf);
00655 free (out);
00656 return IDNA_MALLOC_ERROR;
00657 }
00658 out = newp;
00659 out[outlen++] = 0x002E;
00660 memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
00661 outlen += buflen;
00662 out[outlen] = 0x0;
00663 free (buf);
00664 }
00665 else
00666 {
00667 out = buf;
00668 outlen = buflen;
00669 out[outlen] = 0x0;
00670 }
00671
00672 start = end + 1;
00673 }
00674 while (*end);
00675
00676 *output = out;
00677
00678 return IDNA_SUCCESS;
00679 }
00680
00695 int
00696 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
00697 {
00698 uint32_t *ucs4;
00699 size_t ucs4len;
00700 int rc;
00701
00702 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
00703 if (!ucs4)
00704 return IDNA_ICONV_ERROR;
00705
00706 rc = idna_to_unicode_4z4z (ucs4, output, flags);
00707 free (ucs4);
00708
00709 return rc;
00710 }
00711
00726 int
00727 idna_to_unicode_8z8z (const char *input, char **output, int flags)
00728 {
00729 uint32_t *ucs4;
00730 int rc;
00731
00732 rc = idna_to_unicode_8z4z (input, &ucs4, flags);
00733 *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
00734 free (ucs4);
00735
00736 if (!*output)
00737 return IDNA_ICONV_ERROR;
00738
00739 return rc;
00740 }
00741
00757 int
00758 idna_to_unicode_8zlz (const char *input, char **output, int flags)
00759 {
00760 char *utf8;
00761 int rc;
00762
00763 rc = idna_to_unicode_8z8z (input, &utf8, flags);
00764 *output = stringprep_utf8_to_locale (utf8);
00765 free (utf8);
00766
00767 if (!*output)
00768 return IDNA_ICONV_ERROR;
00769
00770 return rc;
00771 }
00772
00789 int
00790 idna_to_unicode_lzlz (const char *input, char **output, int flags)
00791 {
00792 char *utf8;
00793 int rc;
00794
00795 utf8 = stringprep_locale_to_utf8 (input);
00796 if (!utf8)
00797 return IDNA_ICONV_ERROR;
00798
00799 rc = idna_to_unicode_8zlz (utf8, output, flags);
00800 free (utf8);
00801
00802 return rc;
00803 }
00804