aboutsummaryrefslogtreecommitdiffstats
path: root/libcharconv_latin.c
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-28 19:52:45 +0100
committerMattias Andrée <m@maandree.se>2026-01-28 19:52:45 +0100
commit579e91697244d5813a667bf49209ea4102bcd360 (patch)
tree2910e440fbaf968e8010b054e29c8e57b60b8935 /libcharconv_latin.c
parentAdd ROTATED CAPITAL Q (diff)
downloadcharconv-579e91697244d5813a667bf49209ea4102bcd360.tar.gz
charconv-579e91697244d5813a667bf49209ea4102bcd360.tar.bz2
charconv-579e91697244d5813a667bf49209ea4102bcd360.tar.xz
Misc stuff
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to 'libcharconv_latin.c')
-rw-r--r--libcharconv_latin.c317
1 files changed, 216 insertions, 101 deletions
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 374a097..20e3fd9 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -220,6 +220,38 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a';
goto conv;
+ } else if (UINT32_C(0x1743) <= c && c <= UINT32_C(0x1751)) {
+ /* buhid */
+ c1 = "kgGtdnpbmyrlwsh"[c - UINT32_C(0x1743)];
+ if (*n)
+ goto no_conv;
+ c2 = 'a';
+ s = &s[clen];
+ *n += clen;
+ if (!slen) {
+ ret = LIBCHARCONV_CONVERT_IF_END;
+ goto budih_conv;
+ }
+ clen = libcharconv_decode_utf8_(s, slen, &c);
+ if (clen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!clen)
+ goto budih_conv;
+ switch (c) {
+ case UINT32_C(0x1752): c2 = 'i'; *n += clen; break;
+ case UINT32_C(0x1753): c2 = 'u'; *n += clen; break;
+ default:
+ break;
+ }
+ budih_conv:
+ if (c1 == 'G') {
+ c3 = c2;
+ c2 = 'g';
+ c1 = 'n';
+ goto conv3_prechecked;
+ }
+ goto conv2_prechecked;
+
} else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) {
/* bracketed (parenthesised capital) */
c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A';
@@ -627,127 +659,180 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c2 = (char)c;
goto conv2;
+ } else if (UINT32_C(0x10330) <= c && c <= UINT32_C(0x10349)) {
+ /* gothic */
+ if (c == UINT32_C(0x10341))
+ goto use_switch;
+ c = (uint_least32_t)"abgdeqzhTiklmnjup rstwfxvo"[c - UINT32_C(0x10330)];
+ goto conv;
+
+ } else if (UINT32_C(0x0840) <= c && c <= UINT32_C(0x085B)) {
+ /* mandaic */
+ c = (uint_least32_t)"abgdhuzHTiklmnsepZqrStDKo\'*:"[c - UINT32_C(0x0840)];
+ goto conv;
+
+ } else if (UINT32_C(0x10E60) <= c && c <= UINT32_C(0x10E68)) {
+ /* rumi */
+ c -= (uint_least32_t)UINT32_C(0x10E60) - (uint_least32_t)'1';
+ c1 = (char)c;
+ goto conv1;
+ } else if (UINT32_C(0x10E69) <= c && c <= UINT32_C(0x10E71)) {
+ /* rumi */
+ c -= (uint_least32_t)UINT32_C(0x10E69) - (uint_least32_t)'1';
+ c1 = (char)c;
+ c2 = '0';
+ goto conv2;
+ } else if (UINT32_C(0x10E72) <= c && c <= UINT32_C(0x10E7A)) {
+ /* rumi */
+ c -= (uint_least32_t)UINT32_C(0x10E72) - (uint_least32_t)'1';
+ c1 = (char)c;
+ c2 = '0';
+ c3 = '0';
+ goto conv3;
+
+ } else if (UINT32_C(0x1A00) <= c && c <= UINT32_C(0x1A1B)) {
+ /* buginese */
+ c1 = "kgGKpbmMtdnRcjYCyrlvsahiueoA"[c - UINT32_C(0x1A00)];
+ if (c >= UINT32_C(0x1A17))
+ goto combining;
+ if (*n)
+ goto no_conv;
+ c2 = 'a';
+ s = &s[clen];
+ *n += clen;
+ if (!slen) {
+ ret = LIBCHARCONV_CONVERT_IF_END;
+ goto buginese_conv;
+ }
+ clen = libcharconv_decode_utf8_(s, slen, &c);
+ if (clen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!clen)
+ goto buginese_conv;
+ switch (c) {
+ case UINT32_C(0x1A17): c2 = 'i'; *n += clen; break;
+ case UINT32_C(0x1A18): c2 = 'u'; *n += clen; break;
+ case UINT32_C(0x1A19): c2 = 'e'; *n += clen; break;
+ case UINT32_C(0x1A1A): c2 = 'o'; *n += clen; break;
+ case UINT32_C(0x1A1B): c2 = 'A'; *n += clen; break;
+ default:
+ break;
+ }
+ buginese_conv:
+ if (c1 == 'a') {
+ c = (uint_least32_t)c2;
+ goto conv_prechecked;
+ }
+ goto conv2_prechecked;
+
+ } else if (UINT32_C(0x11EE0) <= c && c <= UINT32_C(0x11EF6)) {
+ /* makasar */
+ c1 = "kgGpbmtdncjYyrlvsa-iueo"[c - UINT32_C(0x11EE0)];
+ if (c >= UINT32_C(0x11EF3))
+ goto combining;
+ if (*n)
+ goto no_conv;
+ c2 = 'a';
+ s = &s[clen];
+ *n += clen;
+ if (!slen) {
+ ret = LIBCHARCONV_CONVERT_IF_END;
+ goto makasar_conv;
+ }
+ clen = libcharconv_decode_utf8_(s, slen, &c);
+ if (clen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!clen)
+ goto makasar_conv;
+ switch (c) {
+ case UINT32_C(0x11EF3): c2 = 'i'; *n += clen; break;
+ case UINT32_C(0x11EF4): c2 = 'u'; *n += clen; break;
+ case UINT32_C(0x11EF5): c2 = 'e'; *n += clen; break;
+ case UINT32_C(0x11EF6): c2 = 'o'; *n += clen; break;
+ default:
+ break;
+ }
+ makasar_conv:
+ if (c1 == 'a') {
+ c = (uint_least32_t)c2;
+ goto conv_prechecked;
+ }
+ goto conv2_prechecked;
+
} else {
use_switch:
switch (c) {
/* shogi */
- case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv;
- case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv;
- case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv;
- case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv;
+ case UINT32_C(0x2616): c1 = 'w'; goto conv1;
+ case UINT32_C(0x2617): c1 = 'b'; goto conv1;
+ case UINT32_C(0x26C9): c1 = 'W'; goto conv1;
+ case UINT32_C(0x26CA): c1 = 'B'; goto conv1;
/* go (common) */
- case UINT32_C(0x25CF): c = (uint_least32_t)'b'; goto conv;
- case UINT32_C(0x25CB): c = (uint_least32_t)'w'; goto conv;
+ case UINT32_C(0x25CB): c1 = 'w'; goto conv1;
+ case UINT32_C(0x25CF): c1 = 'b'; goto conv1;
/* go (white) */
- case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv;
- case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv;
+ case UINT32_C(0x2686): c1 = '1'; goto conv1;
+ case UINT32_C(0x2687): c1 = '2'; goto conv1;
/* go (black) */
- case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv;
- case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv;
+ case UINT32_C(0x2688): c1 = '1'; goto conv1;
+ case UINT32_C(0x2689): c1 = '2'; goto conv1;
/* draughts */
- case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv;
- case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv;
- case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv;
- case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv;
+ case UINT32_C(0x26C0): c1 = 'm'; goto conv1;
+ case UINT32_C(0x26C1): c1 = 'k'; goto conv1;
+ case UINT32_C(0x26C2): c1 = 'M'; goto conv1;
+ case UINT32_C(0x26C3): c1 = 'K'; goto conv1;
/* gender symbols */
- case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv;
- case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv;
- case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv;
+ case UINT32_C(0x2640): c1 = 'f'; goto conv1;
+ case UINT32_C(0x2642): c1 = 'm'; goto conv1;
+ case UINT32_C(0x263F): c1 = 'i'; goto conv1;
/* double-struck */
- case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv;
- case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv;
- case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv;
- case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv;
- case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv;
- case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv;
- case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv;
+ case UINT32_C(0x2102): c1 = 'C'; goto conv1;
+ case UINT32_C(0x210D): c1 = 'H'; goto conv1;
+ case UINT32_C(0x2115): c1 = 'N'; goto conv1;
+ case UINT32_C(0x2119): c1 = 'P'; goto conv1;
+ case UINT32_C(0x211A): c1 = 'Q'; goto conv1;
+ case UINT32_C(0x211D): c1 = 'R'; goto conv1;
+ case UINT32_C(0x2124): c1 = 'Z'; goto conv1;
/* double-struck italic */
- case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv;
- case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv;
- case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv;
- case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv;
- case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv;
+ case UINT32_C(0x2145): c1 = 'D'; goto conv1;
+ case UINT32_C(0x2146): c1 = 'd'; goto conv1;
+ case UINT32_C(0x2147): c1 = 'e'; goto conv1;
+ case UINT32_C(0x2148): c1 = 'i'; goto conv1;
+ case UINT32_C(0x2149): c1 = 'j'; goto conv1;
/* fraktur */
- case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv;
- case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv;
- case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv;
- case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv;
- case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv;
+ case UINT32_C(0x212D): c1 = 'C'; goto conv1;
+ case UINT32_C(0x210C): c1 = 'H'; goto conv1;
+ case UINT32_C(0x2111): c1 = 'I'; goto conv1;
+ case UINT32_C(0x211C): c1 = 'R'; goto conv1;
+ case UINT32_C(0x2128): c1 = 'Z'; goto conv1;
/* script */
- case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv;
- case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv;
- case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv;
- case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv;
- case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv;
- case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv;
- case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv;
- case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv;
- case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv;
- case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv;
- case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv;
+ case UINT32_C(0x212C): c1 = 'B'; goto conv1;
+ case UINT32_C(0x2130): c1 = 'E'; goto conv1;
+ case UINT32_C(0x2131): c1 = 'F'; goto conv1;
+ case UINT32_C(0x210B): c1 = 'H'; goto conv1;
+ case UINT32_C(0x2110): c1 = 'I'; goto conv1;
+ case UINT32_C(0x2112): c1 = 'L'; goto conv1;
+ case UINT32_C(0x2133): c1 = 'M'; goto conv1;
+ case UINT32_C(0x211B): c1 = 'R'; goto conv1;
+ case UINT32_C(0x212F): c1 = 'e'; goto conv1;
+ case UINT32_C(0x210A): c1 = 'g'; goto conv1;
+ case UINT32_C(0x2134): c1 = 'o'; goto conv1;
/* buhid */
- case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv;
- case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv;
- case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv;
- case UINT32_C(0x1752): c2 = 'i'; goto budih_combining;
- case UINT32_C(0x1753): c2 = 'u'; goto budih_combining;
- budih_combining:
- c1 = '^';
- goto conv2;
- case UINT32_C(0x174A): c1 = 'b'; goto budih;
- case UINT32_C(0x1747): c1 = 'd'; goto budih;
- case UINT32_C(0x1744): c1 = 'g'; goto budih;
- case UINT32_C(0x1751): c1 = 'h'; goto budih;
- case UINT32_C(0x1743): c1 = 'k'; goto budih;
- case UINT32_C(0x174E): c1 = 'l'; goto budih;
- case UINT32_C(0x174B): c1 = 'm'; goto budih;
- case UINT32_C(0x1748): c1 = 'n'; goto budih;
- case UINT32_C(0x1749): c1 = 'p'; goto budih;
- case UINT32_C(0x174D): c1 = 'r'; goto budih;
- case UINT32_C(0x1750): c1 = 's'; goto budih;
- case UINT32_C(0x1746): c1 = 't'; goto budih;
- case UINT32_C(0x174C): c1 = 'y'; goto budih;
- case UINT32_C(0x174F): c1 = 'w'; goto budih;
- case UINT32_C(0x1745): c1 = '-'; goto budih;
- budih:
- if (*n)
- goto no_conv;
- c2 = 'a';
- s = &s[clen];
- *n += clen;
- if (!slen) {
- ret = LIBCHARCONV_CONVERT_IF_END;
- goto budih_conv;
- }
- clen = libcharconv_decode_utf8_(s, slen, &c);
- if (clen > slen)
- return LIBCHARCONV_INDETERMINATE;
- if (!clen)
- goto budih_conv;
- switch (c) {
- case UINT32_C(0x1752): c2 = 'i'; *n += clen; break;
- case UINT32_C(0x1753): c2 = 'u'; *n += clen; break;
- default:
- break;
- }
- budih_conv:
- if (c1 == '-') {
- c3 = c2;
- c2 = 'g';
- c1 = 'n';
- goto conv3_prechecked;
- }
- goto conv2_prechecked;
+ case UINT32_C(0x1740): c1 = 'a'; goto conv1;
+ case UINT32_C(0x1741): c1 = 'i'; goto conv1;
+ case UINT32_C(0x1742): c1 = 'u'; goto conv1;
+ case UINT32_C(0x1752): c1 = 'i'; goto combining;
+ case UINT32_C(0x1753): c1 = 'u'; goto combining;
/* replacement */
case UINT32_C(0xFFFC):
@@ -1038,10 +1123,10 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3;
/* invisible */
- case UINT32_C(0x2061): c = (uint_least32_t)'('; goto conv;
- case UINT32_C(0x2062): c = (uint_least32_t)'*'; goto conv;
- case UINT32_C(0x2063): c = (uint_least32_t)'|'; goto conv;
- case UINT32_C(0x2064): c = (uint_least32_t)'+'; goto conv;
+ case UINT32_C(0x2061): c1 = '('; goto conv1;
+ case UINT32_C(0x2062): c1 = '*'; goto conv1;
+ case UINT32_C(0x2063): c1 = '|'; goto conv1;
+ case UINT32_C(0x2064): c1 = '+'; goto conv1;
/* enclosed (positive) */
case UINT32_C(0x24EA): c1 = '('; c2 = '0'; c3 = ')'; goto conv3;
@@ -1097,6 +1182,32 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2;
case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2;
+ /* gothic */
+ case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2;
+ case UINT32_C(0x1034A): c1 = '9'; c2 = '0'; c3 = '0'; goto conv3;
+
+ /* mandaic */
+ case UINT32_C(0x085E): c1 = '.'; goto conv1;
+
+ /* rumi */
+ case UINT32_C(0x10E7B): c1 = '1'; c2 = '/'; c3 = '2'; goto conv3;
+ case UINT32_C(0x10E7C): c1 = '1'; c2 = '/'; c3 = '4'; goto conv3;
+ case UINT32_C(0x10E7D): c1 = '1'; c2 = '/'; c3 = '3'; goto conv3;
+ case UINT32_C(0x10E7E): c1 = '2'; c2 = '/'; c3 = '3'; goto conv3;
+
+ /* scores */
+ case UINT32_C(0x1F4AE): c1 = '*'; goto conv1;
+ case UINT32_C(0x1F51F): c1 = '1'; c2 = '0'; goto conv2;
+ case UINT32_C(0x1F4AF): c1 = '1'; c2 = '0'; c3 = '0'; goto conv3;
+
+ /* buginese */
+ case UINT32_C(0x1A1E): c1 = '.'; goto conv1;
+ case UINT32_C(0x1A1F): c1 = '|'; goto conv1;
+
+ /* makasar */
+ case UINT32_C(0x11EF7): c1 = '.'; goto conv1;
+ case UINT32_C(0x11EF8): c1 = '|'; goto conv1;
+
default:
no_match:
*n += clen;
@@ -1114,11 +1225,15 @@ conv:
if (*n)
goto no_conv;
*n += clen;
+conv_prechecked:
if (*ncp)
*cp = c;
*ncp = 1u;
return ret;
+combining:
+ c2 = c1;
+ c1 = '^';
conv2:
if (*n)
goto no_conv;