diff options
Diffstat (limited to 'libcharconv_latin.c')
| -rw-r--r-- | libcharconv_latin.c | 201 |
1 files changed, 198 insertions, 3 deletions
diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 49de445..8be47c8 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -5,6 +5,7 @@ enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { + char csbuf[8]; enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; char c1, c2, c3, c4, c5, c6; @@ -218,20 +219,20 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz *n += clen; if (!slen) { ret = LIBCHARCONV_CONVERT_IF_END; - goto budih_conv; + goto buhid_conv; } clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) return LIBCHARCONV_INDETERMINATE; if (!clen) - goto budih_conv; + goto buhid_conv; switch (c) { case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; default: break; } - budih_conv: + buhid_conv: if (c1 == 'G') { c3 = c2; c2 = 'g'; @@ -492,6 +493,14 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz /* chess */ c = (uint_least32_t)"eEe"[(c - UINT32_C(0x1FA48)) % 3u]; goto conv; + } else if (UINT32_C(0x2BF9) <= c && c <= UINT32_C(0x2BFE)) { + /* chess */ + c = (uint_least32_t)"=-.|ij"[c - UINT32_C(0x2BF9)]; + goto conv; + } else if (UINT32_C(0x2BBA) <= c && c <= UINT32_C(0x2BBC)) { + /* chess */ + c = (uint_least32_t)"012"[c - UINT32_C(0x2BBA)]; + goto conv; } else if (UINT32_C(0x2460) <= c && c <= UINT32_C(0x2468)) { /* enclosed (positive) */ @@ -747,6 +756,127 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz } goto conv2_prechecked; + } else if (UINT32_C(0x1763) <= c && c <= UINT32_C(0x1770)) { + /* tagbanwa */ + c1 = "kgGtdnpbmylws"[c - UINT32_C(0x1763)]; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto conv2_prechecked; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto conv2_prechecked; + switch (c) { + case UINT32_C(0x1772): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1773): c2 = 'u'; *n += clen; break; + default: + break; + } + goto conv2_prechecked; + + } else if (UINT32_C(0x11C72) <= c && c <= UINT32_C(0x11C8F)) { + /* marchen */ + char root; + char vowel = 0; + char subjoined = 0; + if (*n) + goto no_conv; + root = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C72)]; + for (;;) { + s = &s[clen]; + *n += clen; + if (subjoined && vowel) + break; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + break; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + break; + slen -= clen; + if (!subjoined && UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF)) + subjoined = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)]; + else if (!vowel && UINT32_C(0x11CB0) <= c && c <= UINT32_C(0x11CB4)) + vowel = "Aiueo"[c - UINT32_C(0x11CB0)]; + else + break; + } + if (root == 'a') + root = '\0'; + if (!vowel) + vowel = 'a'; + i = 0u; + if (root) + csbuf[i++] = root; + if (subjoined && !root) { + csbuf[i++] = vowel; + csbuf[i++] = '_'; + vowel = '\0'; + } else if (subjoined == 'a') { + if (vowel) + csbuf[i++] = vowel; + csbuf[i++] = '_'; + vowel = '\0'; + } + if (subjoined) + csbuf[i++] = subjoined; + if (vowel) + csbuf[i++] = vowel; + csbuf[i] = '\0'; + cs = csbuf; + goto conv_str_prechecked; + } else if (UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF)) { + /* marchen */ + c1 = '_'; + c2 = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)]; + goto conv2; + + } else if (UINT32_C(0x1F007) <= c && c <= UINT32_C(0x1F021)) { + /* mahjong tiles */ + c1 = "cbo"[(c - UINT32_C(0x1F007)) / 9u]; + c2 = (char)((c - UINT32_C(0x1F007)) % 9u + (unsigned)'1'); + goto conv2; + + } else if (UINT32_C(0x1680) <= c && c <= UINT32_C(0x169C)) { + /* ogham */ + c1 = " blfsnhdtcqaoueimgGzrEOUIAp><"[c - UINT32_C(0x1680)]; + goto conv1; + + } else if (UINT32_C(0x11150) <= c && c <= UINT32_C(0x11176)) { + /* mahajani */ + c1 = "aiueokkggccjjYTTDDNttddnppbbmrlvsHR.'|s"[c - UINT32_C(0x11150)]; + c2 = " h h h h h h h h h h r"[c - UINT32_C(0x11150)]; + if (c2 == ' ') + goto conv1; + goto conv2; + + } else if (UINT32_C(0x1FBA0) <= c && c <= UINT32_C(0x1FBAE)) { + /* box drawing */ + c = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[c & 0xF]; + i = 0u; + if (c & 0x01u) + csbuf[i++] = '1'; + if (c & 0x02u) + csbuf[i++] = '2'; + if (c & 0x04u) + csbuf[i++] = '3'; + if (c & 0x08u) + csbuf[i++] = '4'; + csbuf[i++] = 'O'; + csbuf[i] = '\0'; + cs = csbuf; + goto conv_str; + } else { use_switch: switch (c) { @@ -901,12 +1031,22 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x1FA51): c1 = 'N'; c2 = 'Q'; goto conv2; case UINT32_C(0x1FA52): c1 = 'N'; c2 = 'R'; goto conv2; case UINT32_C(0x1FA53): c1 = 'N'; c2 = 'B'; goto conv2; + case UINT32_C(0x00A7): c1 = 'S'; c2 = 'S'; goto conv2; + case UINT32_C(0x2E16): c1 = '>'; c2 = ':'; goto conv2; + + /* joined + overlaid */ + case UINT32_C(0x205C): c1 = '+'; c2 = ':'; c3 = ':'; goto conv3; + + /* joined + overlaid + rotated 45 degrees */ + case UINT32_C(0x203B): c1 = 'x'; c2 = ':'; c3 = ':'; goto conv3; /* joined */ case UINT32_C(0x2048): c1 = '?'; c2 = '!'; goto conv2; case UINT32_C(0x2047): c1 = '?'; c2 = '?'; goto conv2; case UINT32_C(0x203C): c1 = '!'; c2 = '!'; goto conv2; case UINT32_C(0x2049): c1 = '!'; c2 = '?'; goto conv2; + case UINT32_C(0x2E2C): c1 = ':'; c2 = ':'; goto conv2; + case UINT32_C(0x2016): c1 = '|'; c2 = '|'; goto conv2; /* mirrored */ case UINT32_C(0x204F): c = UINT32_C(0x003B); goto conv; @@ -1169,6 +1309,8 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz /* stacked */ case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2; case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2; + case UINT32_C(0x2E40): c1 = '-'; c2 = '-'; goto conv2; + case UINT32_C(0x2017): c1 = '_'; c2 = '_'; goto conv2; /* gothic */ case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2; @@ -1196,6 +1338,58 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x11EF7): c1 = '.'; goto conv1; case UINT32_C(0x11EF8): c1 = '|'; goto conv1; + /* tagbanwa */ + case UINT32_C(0x1760): c1 = 'a'; goto conv1; + case UINT32_C(0x1761): c1 = 'i'; goto conv1; + case UINT32_C(0x1762): c1 = 'u'; goto conv1; + case UINT32_C(0x1772): c1 = 'i'; goto combining; + case UINT32_C(0x1773): c1 = 'u'; goto combining; + + /* marchen */ + case UINT32_C(0x11CB5): c1 = '\''; goto conv1; + case UINT32_C(0x11CB6): c1 = '\"'; goto conv1; + case UINT32_C(0x11C70): c1 = '.'; goto conv1; + case UINT32_C(0x11C71): c1 = ':'; goto conv1; + case UINT32_C(0x11CB0): c1 = 'A'; goto combining; + case UINT32_C(0x11CB1): c1 = 'i'; goto combining; + case UINT32_C(0x11CB2): c1 = 'u'; goto combining; + case UINT32_C(0x11CB3): c1 = 'e'; goto combining; + case UINT32_C(0x11CB4): c1 = 'o'; goto combining; + + /* mahjong tiles */ + case UINT32_C(0x1F000): c1 = 'e'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F001): c1 = 's'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F002): c1 = 'w'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F003): c1 = 'n'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F004): c1 = 'r'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F005): c1 = 'g'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F006): c1 = 'w'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F022): c1 = 'p'; goto conv1; + case UINT32_C(0x1F023): c1 = 'o'; goto conv1; + case UINT32_C(0x1F024): c1 = 'b'; goto conv1; + case UINT32_C(0x1F025): c1 = 'c'; goto conv1; + case UINT32_C(0x1F026): c1 = 's'; c2 = 'p'; goto conv2; + case UINT32_C(0x1F027): c1 = 's'; c2 = 'u'; goto conv2; + case UINT32_C(0x1F028): c1 = 'a'; c2 = 'u'; goto conv2; + case UINT32_C(0x1F029): c1 = 'w'; c2 = 'i'; goto conv2; + case UINT32_C(0x1F02A): c1 = 'j'; goto conv1; + case UINT32_C(0x1F02B): c1 = '#'; goto conv1; + + /* box drawing */ + case UINT32_C(0x2571): c1 = '/'; goto conv1; + case UINT32_C(0x2572): c1 = '\\'; goto conv1; + case UINT32_C(0x2573): c1 = 'X'; goto conv1; + + /* lowered */ + case UINT32_C(0x204E): c1 = '*'; goto conv1; + + /* raised */ + case UINT32_C(0x2E33): c1 = '.'; goto conv1; + case UINT32_C(0x2E34): c1 = ','; goto conv1; + + /* raised + turned */ + case UINT32_C(0x2E32): c1 = ','; goto conv1; + default: no_match: *n += clen; @@ -1303,6 +1497,7 @@ conv_str: if (*n) goto no_conv; *n += clen; +conv_str_prechecked: i = 0u; for (; cs[i]; i++) if (*ncp > i) |
