aboutsummaryrefslogtreecommitdiffstats
path: root/libcharconv_latin.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcharconv_latin.c')
-rw-r--r--libcharconv_latin.c201
1 files changed, 198 insertions, 3 deletions
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 49de445..8be47c8 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -5,6 +5,7 @@
enum libcharconv_result
libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
{
+ char csbuf[8];
enum libcharconv_result ret = LIBCHARCONV_CONVERTED;
uint_least32_t c;
char c1, c2, c3, c4, c5, c6;
@@ -218,20 +219,20 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
*n += clen;
if (!slen) {
ret = LIBCHARCONV_CONVERT_IF_END;
- goto budih_conv;
+ goto buhid_conv;
}
clen = libcharconv_decode_utf8_(s, slen, &c);
if (clen > slen)
return LIBCHARCONV_INDETERMINATE;
if (!clen)
- goto budih_conv;
+ goto buhid_conv;
switch (c) {
case UINT32_C(0x1752): c2 = 'i'; *n += clen; break;
case UINT32_C(0x1753): c2 = 'u'; *n += clen; break;
default:
break;
}
- budih_conv:
+ buhid_conv:
if (c1 == 'G') {
c3 = c2;
c2 = 'g';
@@ -492,6 +493,14 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
/* chess */
c = (uint_least32_t)"eEe"[(c - UINT32_C(0x1FA48)) % 3u];
goto conv;
+ } else if (UINT32_C(0x2BF9) <= c && c <= UINT32_C(0x2BFE)) {
+ /* chess */
+ c = (uint_least32_t)"=-.|ij"[c - UINT32_C(0x2BF9)];
+ goto conv;
+ } else if (UINT32_C(0x2BBA) <= c && c <= UINT32_C(0x2BBC)) {
+ /* chess */
+ c = (uint_least32_t)"012"[c - UINT32_C(0x2BBA)];
+ goto conv;
} else if (UINT32_C(0x2460) <= c && c <= UINT32_C(0x2468)) {
/* enclosed (positive) */
@@ -747,6 +756,127 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
}
goto conv2_prechecked;
+ } else if (UINT32_C(0x1763) <= c && c <= UINT32_C(0x1770)) {
+ /* tagbanwa */
+ c1 = "kgGtdnpbmylws"[c - UINT32_C(0x1763)];
+ if (*n)
+ goto no_conv;
+ c2 = 'a';
+ s = &s[clen];
+ *n += clen;
+ if (!slen) {
+ ret = LIBCHARCONV_CONVERT_IF_END;
+ goto conv2_prechecked;
+ }
+ clen = libcharconv_decode_utf8_(s, slen, &c);
+ if (clen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!clen)
+ goto conv2_prechecked;
+ switch (c) {
+ case UINT32_C(0x1772): c2 = 'i'; *n += clen; break;
+ case UINT32_C(0x1773): c2 = 'u'; *n += clen; break;
+ default:
+ break;
+ }
+ goto conv2_prechecked;
+
+ } else if (UINT32_C(0x11C72) <= c && c <= UINT32_C(0x11C8F)) {
+ /* marchen */
+ char root;
+ char vowel = 0;
+ char subjoined = 0;
+ if (*n)
+ goto no_conv;
+ root = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C72)];
+ for (;;) {
+ s = &s[clen];
+ *n += clen;
+ if (subjoined && vowel)
+ break;
+ if (!slen) {
+ ret = LIBCHARCONV_CONVERT_IF_END;
+ break;
+ }
+ clen = libcharconv_decode_utf8_(s, slen, &c);
+ if (clen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!clen)
+ break;
+ slen -= clen;
+ if (!subjoined && UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF))
+ subjoined = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)];
+ else if (!vowel && UINT32_C(0x11CB0) <= c && c <= UINT32_C(0x11CB4))
+ vowel = "Aiueo"[c - UINT32_C(0x11CB0)];
+ else
+ break;
+ }
+ if (root == 'a')
+ root = '\0';
+ if (!vowel)
+ vowel = 'a';
+ i = 0u;
+ if (root)
+ csbuf[i++] = root;
+ if (subjoined && !root) {
+ csbuf[i++] = vowel;
+ csbuf[i++] = '_';
+ vowel = '\0';
+ } else if (subjoined == 'a') {
+ if (vowel)
+ csbuf[i++] = vowel;
+ csbuf[i++] = '_';
+ vowel = '\0';
+ }
+ if (subjoined)
+ csbuf[i++] = subjoined;
+ if (vowel)
+ csbuf[i++] = vowel;
+ csbuf[i] = '\0';
+ cs = csbuf;
+ goto conv_str_prechecked;
+ } else if (UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF)) {
+ /* marchen */
+ c1 = '_';
+ c2 = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)];
+ goto conv2;
+
+ } else if (UINT32_C(0x1F007) <= c && c <= UINT32_C(0x1F021)) {
+ /* mahjong tiles */
+ c1 = "cbo"[(c - UINT32_C(0x1F007)) / 9u];
+ c2 = (char)((c - UINT32_C(0x1F007)) % 9u + (unsigned)'1');
+ goto conv2;
+
+ } else if (UINT32_C(0x1680) <= c && c <= UINT32_C(0x169C)) {
+ /* ogham */
+ c1 = " blfsnhdtcqaoueimgGzrEOUIAp><"[c - UINT32_C(0x1680)];
+ goto conv1;
+
+ } else if (UINT32_C(0x11150) <= c && c <= UINT32_C(0x11176)) {
+ /* mahajani */
+ c1 = "aiueokkggccjjYTTDDNttddnppbbmrlvsHR.'|s"[c - UINT32_C(0x11150)];
+ c2 = " h h h h h h h h h h r"[c - UINT32_C(0x11150)];
+ if (c2 == ' ')
+ goto conv1;
+ goto conv2;
+
+ } else if (UINT32_C(0x1FBA0) <= c && c <= UINT32_C(0x1FBAE)) {
+ /* box drawing */
+ c = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[c & 0xF];
+ i = 0u;
+ if (c & 0x01u)
+ csbuf[i++] = '1';
+ if (c & 0x02u)
+ csbuf[i++] = '2';
+ if (c & 0x04u)
+ csbuf[i++] = '3';
+ if (c & 0x08u)
+ csbuf[i++] = '4';
+ csbuf[i++] = 'O';
+ csbuf[i] = '\0';
+ cs = csbuf;
+ goto conv_str;
+
} else {
use_switch:
switch (c) {
@@ -901,12 +1031,22 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x1FA51): c1 = 'N'; c2 = 'Q'; goto conv2;
case UINT32_C(0x1FA52): c1 = 'N'; c2 = 'R'; goto conv2;
case UINT32_C(0x1FA53): c1 = 'N'; c2 = 'B'; goto conv2;
+ case UINT32_C(0x00A7): c1 = 'S'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x2E16): c1 = '>'; c2 = ':'; goto conv2;
+
+ /* joined + overlaid */
+ case UINT32_C(0x205C): c1 = '+'; c2 = ':'; c3 = ':'; goto conv3;
+
+ /* joined + overlaid + rotated 45 degrees */
+ case UINT32_C(0x203B): c1 = 'x'; c2 = ':'; c3 = ':'; goto conv3;
/* joined */
case UINT32_C(0x2048): c1 = '?'; c2 = '!'; goto conv2;
case UINT32_C(0x2047): c1 = '?'; c2 = '?'; goto conv2;
case UINT32_C(0x203C): c1 = '!'; c2 = '!'; goto conv2;
case UINT32_C(0x2049): c1 = '!'; c2 = '?'; goto conv2;
+ case UINT32_C(0x2E2C): c1 = ':'; c2 = ':'; goto conv2;
+ case UINT32_C(0x2016): c1 = '|'; c2 = '|'; goto conv2;
/* mirrored */
case UINT32_C(0x204F): c = UINT32_C(0x003B); goto conv;
@@ -1169,6 +1309,8 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
/* stacked */
case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2;
case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2;
+ case UINT32_C(0x2E40): c1 = '-'; c2 = '-'; goto conv2;
+ case UINT32_C(0x2017): c1 = '_'; c2 = '_'; goto conv2;
/* gothic */
case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2;
@@ -1196,6 +1338,58 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x11EF7): c1 = '.'; goto conv1;
case UINT32_C(0x11EF8): c1 = '|'; goto conv1;
+ /* tagbanwa */
+ case UINT32_C(0x1760): c1 = 'a'; goto conv1;
+ case UINT32_C(0x1761): c1 = 'i'; goto conv1;
+ case UINT32_C(0x1762): c1 = 'u'; goto conv1;
+ case UINT32_C(0x1772): c1 = 'i'; goto combining;
+ case UINT32_C(0x1773): c1 = 'u'; goto combining;
+
+ /* marchen */
+ case UINT32_C(0x11CB5): c1 = '\''; goto conv1;
+ case UINT32_C(0x11CB6): c1 = '\"'; goto conv1;
+ case UINT32_C(0x11C70): c1 = '.'; goto conv1;
+ case UINT32_C(0x11C71): c1 = ':'; goto conv1;
+ case UINT32_C(0x11CB0): c1 = 'A'; goto combining;
+ case UINT32_C(0x11CB1): c1 = 'i'; goto combining;
+ case UINT32_C(0x11CB2): c1 = 'u'; goto combining;
+ case UINT32_C(0x11CB3): c1 = 'e'; goto combining;
+ case UINT32_C(0x11CB4): c1 = 'o'; goto combining;
+
+ /* mahjong tiles */
+ case UINT32_C(0x1F000): c1 = 'e'; c2 = 'w'; goto conv2;
+ case UINT32_C(0x1F001): c1 = 's'; c2 = 'w'; goto conv2;
+ case UINT32_C(0x1F002): c1 = 'w'; c2 = 'w'; goto conv2;
+ case UINT32_C(0x1F003): c1 = 'n'; c2 = 'w'; goto conv2;
+ case UINT32_C(0x1F004): c1 = 'r'; c2 = 'd'; goto conv2;
+ case UINT32_C(0x1F005): c1 = 'g'; c2 = 'd'; goto conv2;
+ case UINT32_C(0x1F006): c1 = 'w'; c2 = 'd'; goto conv2;
+ case UINT32_C(0x1F022): c1 = 'p'; goto conv1;
+ case UINT32_C(0x1F023): c1 = 'o'; goto conv1;
+ case UINT32_C(0x1F024): c1 = 'b'; goto conv1;
+ case UINT32_C(0x1F025): c1 = 'c'; goto conv1;
+ case UINT32_C(0x1F026): c1 = 's'; c2 = 'p'; goto conv2;
+ case UINT32_C(0x1F027): c1 = 's'; c2 = 'u'; goto conv2;
+ case UINT32_C(0x1F028): c1 = 'a'; c2 = 'u'; goto conv2;
+ case UINT32_C(0x1F029): c1 = 'w'; c2 = 'i'; goto conv2;
+ case UINT32_C(0x1F02A): c1 = 'j'; goto conv1;
+ case UINT32_C(0x1F02B): c1 = '#'; goto conv1;
+
+ /* box drawing */
+ case UINT32_C(0x2571): c1 = '/'; goto conv1;
+ case UINT32_C(0x2572): c1 = '\\'; goto conv1;
+ case UINT32_C(0x2573): c1 = 'X'; goto conv1;
+
+ /* lowered */
+ case UINT32_C(0x204E): c1 = '*'; goto conv1;
+
+ /* raised */
+ case UINT32_C(0x2E33): c1 = '.'; goto conv1;
+ case UINT32_C(0x2E34): c1 = ','; goto conv1;
+
+ /* raised + turned */
+ case UINT32_C(0x2E32): c1 = ','; goto conv1;
+
default:
no_match:
*n += clen;
@@ -1303,6 +1497,7 @@ conv_str:
if (*n)
goto no_conv;
*n += clen;
+conv_str_prechecked:
i = 0u;
for (; cs[i]; i++)
if (*ncp > i)