From 579e91697244d5813a667bf49209ea4102bcd360 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Wed, 28 Jan 2026 19:52:45 +0100 Subject: Misc stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- libcharconv_latin.c | 317 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 216 insertions(+), 101 deletions(-) (limited to 'libcharconv_latin.c') diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 374a097..20e3fd9 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -220,6 +220,38 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; goto conv; + } else if (UINT32_C(0x1743) <= c && c <= UINT32_C(0x1751)) { + /* buhid */ + c1 = "kgGtdnpbmyrlwsh"[c - UINT32_C(0x1743)]; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto budih_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto budih_conv; + switch (c) { + case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; + default: + break; + } + budih_conv: + if (c1 == 'G') { + c3 = c2; + c2 = 'g'; + c1 = 'n'; + goto conv3_prechecked; + } + goto conv2_prechecked; + } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) { /* bracketed (parenthesised capital) */ c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A'; @@ -627,127 +659,180 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c2 = (char)c; goto conv2; + } else if (UINT32_C(0x10330) <= c && c <= UINT32_C(0x10349)) { + /* gothic */ + if (c == UINT32_C(0x10341)) + goto use_switch; + c = (uint_least32_t)"abgdeqzhTiklmnjup rstwfxvo"[c - UINT32_C(0x10330)]; + goto conv; + + } else if (UINT32_C(0x0840) <= c && c <= UINT32_C(0x085B)) { + /* mandaic */ + c = (uint_least32_t)"abgdhuzHTiklmnsepZqrStDKo\'*:"[c - UINT32_C(0x0840)]; + goto conv; + + } else if (UINT32_C(0x10E60) <= c && c <= UINT32_C(0x10E68)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E60) - (uint_least32_t)'1'; + c1 = (char)c; + goto conv1; + } else if (UINT32_C(0x10E69) <= c && c <= UINT32_C(0x10E71)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E69) - (uint_least32_t)'1'; + c1 = (char)c; + c2 = '0'; + goto conv2; + } else if (UINT32_C(0x10E72) <= c && c <= UINT32_C(0x10E7A)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E72) - (uint_least32_t)'1'; + c1 = (char)c; + c2 = '0'; + c3 = '0'; + goto conv3; + + } else if (UINT32_C(0x1A00) <= c && c <= UINT32_C(0x1A1B)) { + /* buginese */ + c1 = "kgGKpbmMtdnRcjYCyrlvsahiueoA"[c - UINT32_C(0x1A00)]; + if (c >= UINT32_C(0x1A17)) + goto combining; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto buginese_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto buginese_conv; + switch (c) { + case UINT32_C(0x1A17): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1A18): c2 = 'u'; *n += clen; break; + case UINT32_C(0x1A19): c2 = 'e'; *n += clen; break; + case UINT32_C(0x1A1A): c2 = 'o'; *n += clen; break; + case UINT32_C(0x1A1B): c2 = 'A'; *n += clen; break; + default: + break; + } + buginese_conv: + if (c1 == 'a') { + c = (uint_least32_t)c2; + goto conv_prechecked; + } + goto conv2_prechecked; + + } else if (UINT32_C(0x11EE0) <= c && c <= UINT32_C(0x11EF6)) { + /* makasar */ + c1 = "kgGpbmtdncjYyrlvsa-iueo"[c - UINT32_C(0x11EE0)]; + if (c >= UINT32_C(0x11EF3)) + goto combining; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto makasar_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto makasar_conv; + switch (c) { + case UINT32_C(0x11EF3): c2 = 'i'; *n += clen; break; + case UINT32_C(0x11EF4): c2 = 'u'; *n += clen; break; + case UINT32_C(0x11EF5): c2 = 'e'; *n += clen; break; + case UINT32_C(0x11EF6): c2 = 'o'; *n += clen; break; + default: + break; + } + makasar_conv: + if (c1 == 'a') { + c = (uint_least32_t)c2; + goto conv_prechecked; + } + goto conv2_prechecked; + } else { use_switch: switch (c) { /* shogi */ - case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv; - case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv; - case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv; - case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv; + case UINT32_C(0x2616): c1 = 'w'; goto conv1; + case UINT32_C(0x2617): c1 = 'b'; goto conv1; + case UINT32_C(0x26C9): c1 = 'W'; goto conv1; + case UINT32_C(0x26CA): c1 = 'B'; goto conv1; /* go (common) */ - case UINT32_C(0x25CF): c = (uint_least32_t)'b'; goto conv; - case UINT32_C(0x25CB): c = (uint_least32_t)'w'; goto conv; + case UINT32_C(0x25CB): c1 = 'w'; goto conv1; + case UINT32_C(0x25CF): c1 = 'b'; goto conv1; /* go (white) */ - case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv; - case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv; + case UINT32_C(0x2686): c1 = '1'; goto conv1; + case UINT32_C(0x2687): c1 = '2'; goto conv1; /* go (black) */ - case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv; - case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv; + case UINT32_C(0x2688): c1 = '1'; goto conv1; + case UINT32_C(0x2689): c1 = '2'; goto conv1; /* draughts */ - case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv; - case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv; - case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv; - case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv; + case UINT32_C(0x26C0): c1 = 'm'; goto conv1; + case UINT32_C(0x26C1): c1 = 'k'; goto conv1; + case UINT32_C(0x26C2): c1 = 'M'; goto conv1; + case UINT32_C(0x26C3): c1 = 'K'; goto conv1; /* gender symbols */ - case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv; - case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv; - case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv; + case UINT32_C(0x2640): c1 = 'f'; goto conv1; + case UINT32_C(0x2642): c1 = 'm'; goto conv1; + case UINT32_C(0x263F): c1 = 'i'; goto conv1; /* double-struck */ - case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv; - case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv; - case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv; - case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv; - case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv; + case UINT32_C(0x2102): c1 = 'C'; goto conv1; + case UINT32_C(0x210D): c1 = 'H'; goto conv1; + case UINT32_C(0x2115): c1 = 'N'; goto conv1; + case UINT32_C(0x2119): c1 = 'P'; goto conv1; + case UINT32_C(0x211A): c1 = 'Q'; goto conv1; + case UINT32_C(0x211D): c1 = 'R'; goto conv1; + case UINT32_C(0x2124): c1 = 'Z'; goto conv1; /* double-struck italic */ - case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv; - case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv; - case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv; - case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv; - case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv; + case UINT32_C(0x2145): c1 = 'D'; goto conv1; + case UINT32_C(0x2146): c1 = 'd'; goto conv1; + case UINT32_C(0x2147): c1 = 'e'; goto conv1; + case UINT32_C(0x2148): c1 = 'i'; goto conv1; + case UINT32_C(0x2149): c1 = 'j'; goto conv1; /* fraktur */ - case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv; - case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv; - case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv; + case UINT32_C(0x212D): c1 = 'C'; goto conv1; + case UINT32_C(0x210C): c1 = 'H'; goto conv1; + case UINT32_C(0x2111): c1 = 'I'; goto conv1; + case UINT32_C(0x211C): c1 = 'R'; goto conv1; + case UINT32_C(0x2128): c1 = 'Z'; goto conv1; /* script */ - case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv; - case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv; - case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv; - case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv; - case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv; - case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv; - case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv; - case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv; - case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv; + case UINT32_C(0x212C): c1 = 'B'; goto conv1; + case UINT32_C(0x2130): c1 = 'E'; goto conv1; + case UINT32_C(0x2131): c1 = 'F'; goto conv1; + case UINT32_C(0x210B): c1 = 'H'; goto conv1; + case UINT32_C(0x2110): c1 = 'I'; goto conv1; + case UINT32_C(0x2112): c1 = 'L'; goto conv1; + case UINT32_C(0x2133): c1 = 'M'; goto conv1; + case UINT32_C(0x211B): c1 = 'R'; goto conv1; + case UINT32_C(0x212F): c1 = 'e'; goto conv1; + case UINT32_C(0x210A): c1 = 'g'; goto conv1; + case UINT32_C(0x2134): c1 = 'o'; goto conv1; /* buhid */ - case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv; - case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv; - case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv; - case UINT32_C(0x1752): c2 = 'i'; goto budih_combining; - case UINT32_C(0x1753): c2 = 'u'; goto budih_combining; - budih_combining: - c1 = '^'; - goto conv2; - case UINT32_C(0x174A): c1 = 'b'; goto budih; - case UINT32_C(0x1747): c1 = 'd'; goto budih; - case UINT32_C(0x1744): c1 = 'g'; goto budih; - case UINT32_C(0x1751): c1 = 'h'; goto budih; - case UINT32_C(0x1743): c1 = 'k'; goto budih; - case UINT32_C(0x174E): c1 = 'l'; goto budih; - case UINT32_C(0x174B): c1 = 'm'; goto budih; - case UINT32_C(0x1748): c1 = 'n'; goto budih; - case UINT32_C(0x1749): c1 = 'p'; goto budih; - case UINT32_C(0x174D): c1 = 'r'; goto budih; - case UINT32_C(0x1750): c1 = 's'; goto budih; - case UINT32_C(0x1746): c1 = 't'; goto budih; - case UINT32_C(0x174C): c1 = 'y'; goto budih; - case UINT32_C(0x174F): c1 = 'w'; goto budih; - case UINT32_C(0x1745): c1 = '-'; goto budih; - budih: - if (*n) - goto no_conv; - c2 = 'a'; - s = &s[clen]; - *n += clen; - if (!slen) { - ret = LIBCHARCONV_CONVERT_IF_END; - goto budih_conv; - } - clen = libcharconv_decode_utf8_(s, slen, &c); - if (clen > slen) - return LIBCHARCONV_INDETERMINATE; - if (!clen) - goto budih_conv; - switch (c) { - case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; - case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; - default: - break; - } - budih_conv: - if (c1 == '-') { - c3 = c2; - c2 = 'g'; - c1 = 'n'; - goto conv3_prechecked; - } - goto conv2_prechecked; + case UINT32_C(0x1740): c1 = 'a'; goto conv1; + case UINT32_C(0x1741): c1 = 'i'; goto conv1; + case UINT32_C(0x1742): c1 = 'u'; goto conv1; + case UINT32_C(0x1752): c1 = 'i'; goto combining; + case UINT32_C(0x1753): c1 = 'u'; goto combining; /* replacement */ case UINT32_C(0xFFFC): @@ -1038,10 +1123,10 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3; /* invisible */ - case UINT32_C(0x2061): c = (uint_least32_t)'('; goto conv; - case UINT32_C(0x2062): c = (uint_least32_t)'*'; goto conv; - case UINT32_C(0x2063): c = (uint_least32_t)'|'; goto conv; - case UINT32_C(0x2064): c = (uint_least32_t)'+'; goto conv; + case UINT32_C(0x2061): c1 = '('; goto conv1; + case UINT32_C(0x2062): c1 = '*'; goto conv1; + case UINT32_C(0x2063): c1 = '|'; goto conv1; + case UINT32_C(0x2064): c1 = '+'; goto conv1; /* enclosed (positive) */ case UINT32_C(0x24EA): c1 = '('; c2 = '0'; c3 = ')'; goto conv3; @@ -1097,6 +1182,32 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2; case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2; + /* gothic */ + case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2; + case UINT32_C(0x1034A): c1 = '9'; c2 = '0'; c3 = '0'; goto conv3; + + /* mandaic */ + case UINT32_C(0x085E): c1 = '.'; goto conv1; + + /* rumi */ + case UINT32_C(0x10E7B): c1 = '1'; c2 = '/'; c3 = '2'; goto conv3; + case UINT32_C(0x10E7C): c1 = '1'; c2 = '/'; c3 = '4'; goto conv3; + case UINT32_C(0x10E7D): c1 = '1'; c2 = '/'; c3 = '3'; goto conv3; + case UINT32_C(0x10E7E): c1 = '2'; c2 = '/'; c3 = '3'; goto conv3; + + /* scores */ + case UINT32_C(0x1F4AE): c1 = '*'; goto conv1; + case UINT32_C(0x1F51F): c1 = '1'; c2 = '0'; goto conv2; + case UINT32_C(0x1F4AF): c1 = '1'; c2 = '0'; c3 = '0'; goto conv3; + + /* buginese */ + case UINT32_C(0x1A1E): c1 = '.'; goto conv1; + case UINT32_C(0x1A1F): c1 = '|'; goto conv1; + + /* makasar */ + case UINT32_C(0x11EF7): c1 = '.'; goto conv1; + case UINT32_C(0x11EF8): c1 = '|'; goto conv1; + default: no_match: *n += clen; @@ -1114,11 +1225,15 @@ conv: if (*n) goto no_conv; *n += clen; +conv_prechecked: if (*ncp) *cp = c; *ncp = 1u; return ret; +combining: + c2 = c1; + c1 = '^'; conv2: if (*n) goto no_conv; -- cgit v1.2.3-70-g09d2