00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifdef HAVE_CONFIG_H
00024 # include "config.h"
00025 #endif
00026
00027 #include <stdlib.h>
00028 #include <string.h>
00029
00030 #include "stringprep.h"
00031
00032 static ssize_t
00033 stringprep_find_character_in_table (uint32_t ucs4,
00034 const Stringprep_table_element * table)
00035 {
00036 ssize_t i;
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 for (i = 0; table[i].start || table[i].end; i++)
00047 if (ucs4 >= table[i].start &&
00048 ucs4 <= (table[i].end ? table[i].end : table[i].start))
00049 return i;
00050
00051 return -1;
00052 }
00053
00054 static ssize_t
00055 stringprep_find_string_in_table (uint32_t * ucs4,
00056 size_t ucs4len,
00057 size_t * tablepos,
00058 const Stringprep_table_element * table)
00059 {
00060 size_t j;
00061 ssize_t pos;
00062
00063 for (j = 0; j < ucs4len; j++)
00064 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
00065 {
00066 if (tablepos)
00067 *tablepos = pos;
00068 return j;
00069 }
00070
00071 return -1;
00072 }
00073
00074 static int
00075 stringprep_apply_table_to_string (uint32_t * ucs4,
00076 size_t * ucs4len,
00077 size_t maxucs4len,
00078 const Stringprep_table_element * table)
00079 {
00080 ssize_t pos;
00081 size_t i, maplen;
00082
00083 while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
00084 &i, table)) != -1)
00085 {
00086 for (maplen = STRINGPREP_MAX_MAP_CHARS;
00087 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
00088 ;
00089
00090 if (*ucs4len - 1 + maplen >= maxucs4len)
00091 return STRINGPREP_TOO_SMALL_BUFFER;
00092
00093 memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
00094 sizeof (uint32_t) * (*ucs4len - pos - 1));
00095 memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
00096 *ucs4len = *ucs4len - 1 + maplen;
00097 }
00098
00099 return STRINGPREP_OK;
00100 }
00101
00102 #define INVERTED(x) ((x) & ((~0UL) >> 1))
00103 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
00104 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
00105 ( INVERTED(profileflags) && (profileflags & flags)))
00106
00138 int
00139 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
00140 Stringprep_profile_flags flags,
00141 const Stringprep_profile * profile)
00142 {
00143 size_t i, j;
00144 ssize_t k;
00145 size_t ucs4len = *len;
00146 int rc;
00147
00148 for (i = 0; profile[i].operation; i++)
00149 {
00150 switch (profile[i].operation)
00151 {
00152 case STRINGPREP_NFKC:
00153 {
00154 uint32_t *q = 0;
00155
00156 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00157 break;
00158
00159 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
00160
00161 return STRINGPREP_FLAG_ERROR;
00162
00163 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
00164 if (!q)
00165 return STRINGPREP_NFKC_FAILED;
00166
00167 for (ucs4len = 0; q[ucs4len]; ucs4len++)
00168 ;
00169
00170 if (ucs4len >= maxucs4len)
00171 {
00172 free (q);
00173 return STRINGPREP_TOO_SMALL_BUFFER;
00174 }
00175
00176 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
00177
00178 free (q);
00179 }
00180 break;
00181
00182 case STRINGPREP_PROHIBIT_TABLE:
00183 k = stringprep_find_string_in_table (ucs4, ucs4len,
00184 NULL, profile[i].table);
00185 if (k != -1)
00186 return STRINGPREP_CONTAINS_PROHIBITED;
00187 break;
00188
00189 case STRINGPREP_UNASSIGNED_TABLE:
00190 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00191 break;
00192 if (flags & STRINGPREP_NO_UNASSIGNED)
00193 {
00194 k = stringprep_find_string_in_table
00195 (ucs4, ucs4len, NULL, profile[i].table);
00196 if (k != -1)
00197 return STRINGPREP_CONTAINS_UNASSIGNED;
00198 }
00199 break;
00200
00201 case STRINGPREP_MAP_TABLE:
00202 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00203 break;
00204 rc = stringprep_apply_table_to_string
00205 (ucs4, &ucs4len, maxucs4len, profile[i].table);
00206 if (rc != STRINGPREP_OK)
00207 return rc;
00208 break;
00209
00210 case STRINGPREP_BIDI_PROHIBIT_TABLE:
00211 case STRINGPREP_BIDI_RAL_TABLE:
00212 case STRINGPREP_BIDI_L_TABLE:
00213 break;
00214
00215 case STRINGPREP_BIDI:
00216 {
00217 int done_prohibited = 0;
00218 int done_ral = 0;
00219 int done_l = 0;
00220 size_t contains_ral = SIZE_MAX;
00221 size_t contains_l = SIZE_MAX;
00222
00223 for (j = 0; profile[j].operation; j++)
00224 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
00225 {
00226 done_prohibited = 1;
00227 k = stringprep_find_string_in_table (ucs4, ucs4len,
00228 NULL,
00229 profile[j].table);
00230 if (k != -1)
00231 return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
00232 }
00233 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
00234 {
00235 done_ral = 1;
00236 if (stringprep_find_string_in_table
00237 (ucs4, ucs4len, NULL, profile[j].table) != -1)
00238 contains_ral = j;
00239 }
00240 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
00241 {
00242 done_l = 1;
00243 if (stringprep_find_string_in_table
00244 (ucs4, ucs4len, NULL, profile[j].table) != -1)
00245 contains_l = j;
00246 }
00247
00248 if (!done_prohibited || !done_ral || !done_l)
00249 return STRINGPREP_PROFILE_ERROR;
00250
00251 if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
00252 return STRINGPREP_BIDI_BOTH_L_AND_RAL;
00253
00254 if (contains_ral != SIZE_MAX)
00255 {
00256 if (!(stringprep_find_character_in_table
00257 (ucs4[0], profile[contains_ral].table) != -1 &&
00258 stringprep_find_character_in_table
00259 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
00260 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
00261 }
00262 }
00263 break;
00264
00265 default:
00266 return STRINGPREP_PROFILE_ERROR;
00267 break;
00268 }
00269 }
00270
00271 *len = ucs4len;
00272
00273 return STRINGPREP_OK;
00274 }
00275
00276 static int
00277 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
00278 Stringprep_profile_flags flags,
00279 const Stringprep_profile * profile)
00280 {
00281 int rc;
00282
00283 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00284 if (rc != STRINGPREP_OK)
00285 return rc;
00286
00287 if (ucs4len >= maxucs4len)
00288 return STRINGPREP_TOO_SMALL_BUFFER;
00289
00290 ucs4[ucs4len] = 0;
00291
00292 return STRINGPREP_OK;
00293 }
00294
00319 int
00320 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
00321 Stringprep_profile_flags flags,
00322 const Stringprep_profile * profile)
00323 {
00324 size_t ucs4len;
00325
00326 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
00327 ;
00328
00329 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
00330 }
00331
00359 int
00360 stringprep (char *in,
00361 size_t maxlen,
00362 Stringprep_profile_flags flags,
00363 const Stringprep_profile * profile)
00364 {
00365 int rc;
00366 char *utf8 = NULL;
00367 uint32_t *ucs4 = NULL;
00368 size_t ucs4len, maxucs4len, adducs4len = 50;
00369
00370 do
00371 {
00372 uint32_t *newp;
00373
00374 free (ucs4);
00375 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
00376 maxucs4len = ucs4len + adducs4len;
00377 newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
00378 if (!newp)
00379 {
00380 free (ucs4);
00381 return STRINGPREP_MALLOC_ERROR;
00382 }
00383 ucs4 = newp;
00384
00385 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00386 adducs4len += 50;
00387 }
00388 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00389 if (rc != STRINGPREP_OK)
00390 {
00391 free (ucs4);
00392 return rc;
00393 }
00394
00395 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
00396 free (ucs4);
00397 if (!utf8)
00398 return STRINGPREP_MALLOC_ERROR;
00399
00400 if (strlen (utf8) >= maxlen)
00401 {
00402 free (utf8);
00403 return STRINGPREP_TOO_SMALL_BUFFER;
00404 }
00405
00406 strcpy (in, utf8);
00407
00408 free (utf8);
00409
00410 return STRINGPREP_OK;
00411 }
00412
00437 int
00438 stringprep_profile (const char *in,
00439 char **out,
00440 const char *profile, Stringprep_profile_flags flags)
00441 {
00442 const Stringprep_profiles *p;
00443 char *str = NULL;
00444 size_t len = strlen (in) + 1;
00445 int rc;
00446
00447 for (p = &stringprep_profiles[0]; p->name; p++)
00448 if (strcmp (p->name, profile) == 0)
00449 break;
00450
00451 if (!p || !p->name || !p->tables)
00452 return STRINGPREP_UNKNOWN_PROFILE;
00453
00454 do
00455 {
00456 free (str);
00457 str = (char *) malloc (len);
00458 if (str == NULL)
00459 return STRINGPREP_MALLOC_ERROR;
00460
00461 strcpy (str, in);
00462
00463 rc = stringprep (str, len, flags, p->tables);
00464 len += 50;
00465 }
00466 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00467
00468 if (rc == STRINGPREP_OK)
00469 *out = str;
00470 else
00471 free (str);
00472
00473 return rc;
00474 }
00475