/* See LICENSE file for copyright and license details. */ #include "lib-common.h" enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; char c1, c2, c3; size_t clen; *n = 0; for (; slen; s++) { clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) return LIBCHARCONV_INDETERMINATE; if (!clen) { *n += 1u; slen -= 1u; continue; } slen -= clen; if (UINT32_C(0x2680) <= c && c <= UINT32_C(0x2685)) { /* dice */ c -= (uint_least32_t)UINT32_C(0x2680) - (uint_least32_t)'1'; goto conv; } else if (UINT32_C(0x1F1E6) <= c && c <= UINT32_C(0x1F1FF)) { /* region indicators */ c -= (uint_least32_t)UINT32_C(0x1F1E6) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0xE0020) <= c && c <= UINT32_C(0xE007E)) { /* tags */ c -= (uint_least32_t)UINT32_C(0xE0000); goto conv; } else if (UINT32_C(0x10800) <= c && c <= UINT32_C(0x1083F)) { /* cypriot */ c -= UINT32_C(0x10800); c1 = "_jklmnprstwxz"[c / 5]; c2 = "aeiou"[c % 5]; if (c1 == '_') { c = (uint_least32_t)c2; goto conv; } if (c1 == 'j' && c2 != 'a' && c2 != 'o') goto no_match; if (c1 == 'w' && c2 == 'u') goto no_match; if (c1 == 'x' && c2 != 'a' && c2 != 'e') goto no_match; if (c1 == 'z' && c2 != 'a' && c2 != 'o') goto no_match; goto conv2; } else if (UINT32_C(0x1D400) <= c && c <= UINT32_C(0x1D419)) { /* bold (captial) */ c -= (uint_least32_t)UINT32_C(0x1D400) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D41A) <= c && c <= UINT32_C(0x1D433)) { /* bold (small) */ c -= (uint_least32_t)UINT32_C(0x1D41A) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7CE) <= c && c <= UINT32_C(0x1D7D7)) { /* bold (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7CE) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D434) <= c && c <= UINT32_C(0x1D44D)) { /* italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D434) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D44E) <= c && c <= UINT32_C(0x1D467)) { /* italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D44E) - (uint_least32_t)'a'; goto conv; } else if (c == UINT32_C(0x210E)) { /* italic (small h) */ c = (uint_least32_t)'h'; goto conv; } else if (UINT32_C(0x1D468) <= c && c <= UINT32_C(0x1D481)) { /* bold italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D468) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D482) <= c && c <= UINT32_C(0x1D49B)) { /* bold italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D482) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D670) <= c && c <= UINT32_C(0x1D689)) { /* monospace (captial) */ c -= (uint_least32_t)UINT32_C(0x1D670) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D68A) <= c && c <= UINT32_C(0x1D6A3)) { /* monospace (small) */ c -= (uint_least32_t)UINT32_C(0x1D68A) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7F6) <= c && c <= UINT32_C(0x1D7FF)) { /* monospace (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7F6) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1FBF0) <= c && c <= UINT32_C(0x1FBF9)) { /* segmented */ c -= (uint_least32_t)UINT32_C(0x1FBF0) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D5A0) <= c && c <= UINT32_C(0x1D5B9)) { /* sans-serif (captial) */ c -= (uint_least32_t)UINT32_C(0x1D5A0) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D5BA) <= c && c <= UINT32_C(0x1D5D3)) { /* sans-serif (small) */ c -= (uint_least32_t)UINT32_C(0x1D5BA) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7E2) <= c && c <= UINT32_C(0x1D7EB)) { /* sans-serif (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7E2) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D5D4) <= c && c <= UINT32_C(0x1D5ED)) { /* sans-serif bold (captial) */ c -= (uint_least32_t)UINT32_C(0x1D5D4) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D5EE) <= c && c <= UINT32_C(0x1D607)) { /* sans-serif bold (small) */ c -= (uint_least32_t)UINT32_C(0x1D5EE) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7EC) <= c && c <= UINT32_C(0x1D7F5)) { /* sans-serif bold (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7EC) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D608) <= c && c <= UINT32_C(0x1D621)) { /* sans-serif italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D608) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D622) <= c && c <= UINT32_C(0x1D63B)) { /* sans-serif italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D622) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D63C) <= c && c <= UINT32_C(0x1D655)) { /* sans-serif bold italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D63C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D656) <= c && c <= UINT32_C(0x1D66F)) { /* sans-serif bold italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D656) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D538) <= c && c <= UINT32_C(0x1D551)) { /* double-struck (captial) */ c -= (uint_least32_t)UINT32_C(0x1D538) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D552) <= c && c <= UINT32_C(0x1D56B)) { /* double-struck (small) */ c -= (uint_least32_t)UINT32_C(0x1D552) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7D8) <= c && c <= UINT32_C(0x1D7E1)) { /* double-struck (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7D8) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D504) <= c && c <= UINT32_C(0x1D51D)) { /* fraktur (captial) */ c -= (uint_least32_t)UINT32_C(0x1D504) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D51E) <= c && c <= UINT32_C(0x1D537)) { /* fraktur (small) */ c -= (uint_least32_t)UINT32_C(0x1D51E) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D56C) <= c && c <= UINT32_C(0x1D585)) { /* bold fraktur (captial) */ c -= (uint_least32_t)UINT32_C(0x1D56C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D586) <= c && c <= UINT32_C(0x1D59F)) { /* bold fraktur (small) */ c -= (uint_least32_t)UINT32_C(0x1D586) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D49C) <= c && c <= UINT32_C(0x1D4B5)) { /* script (captial) */ c -= (uint_least32_t)UINT32_C(0x1D49C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D4B6) <= c && c <= UINT32_C(0x1D4CF)) { /* script (small) */ c -= (uint_least32_t)UINT32_C(0x1D4B6) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D4D0) <= c && c <= UINT32_C(0x1D4E9)) { /* bold script (captial) */ c -= (uint_least32_t)UINT32_C(0x1D4D0) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D4EA) <= c && c <= UINT32_C(0x1D503)) { /* bold script (small) */ c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; goto conv; } else { switch (c) { /* shogi */ case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv; case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv; case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv; case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv; /* go (white) */ case UINT32_C(0x25CB): c = (uint_least32_t)'0'; goto conv; case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv; case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv; /* go (black) */ case UINT32_C(0x25CF): c = (uint_least32_t)'0'; goto conv; case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv; case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv; /* draughts */ case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv; case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv; case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv; case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv; /* gender symbols */ case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv; case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv; case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv; /* double-struck */ case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv; case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv; case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv; case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv; case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv; /* double-struck italic */ case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv; case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv; case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv; case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv; case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv; /* fraktur */ case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv; case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv; case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv; /* script */ case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv; case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv; case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv; case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv; case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv; case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv; case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv; case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv; case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv; /* buhid */ case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv; case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv; case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv; case UINT32_C(0x1752): c2 = 'i'; goto budih_combining; case UINT32_C(0x1753): c2 = 'u'; goto budih_combining; budih_combining: c1 = '^'; goto conv2; case UINT32_C(0x174A): c1 = 'b'; goto budih; case UINT32_C(0x1747): c1 = 'd'; goto budih; case UINT32_C(0x1744): c1 = 'g'; goto budih; case UINT32_C(0x1751): c1 = 'h'; goto budih; case UINT32_C(0x1743): c1 = 'k'; goto budih; case UINT32_C(0x174E): c1 = 'l'; goto budih; case UINT32_C(0x174B): c1 = 'm'; goto budih; case UINT32_C(0x1748): c1 = 'n'; goto budih; case UINT32_C(0x1749): c1 = 'p'; goto budih; case UINT32_C(0x174D): c1 = 'r'; goto budih; case UINT32_C(0x1750): c1 = 's'; goto budih; case UINT32_C(0x1746): c1 = 't'; goto budih; case UINT32_C(0x174C): c1 = 'y'; goto budih; case UINT32_C(0x174F): c1 = 'w'; goto budih; case UINT32_C(0x1745): c1 = '-'; goto budih; budih: if (*n) goto no_conv; c2 = 'a'; s = &s[clen]; *n += clen; if (!slen) { ret = LIBCHARCONV_CONVERT_IF_END; goto budih_conv; } clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) return LIBCHARCONV_INDETERMINATE; if (!clen) goto budih_conv; switch (c) { case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; default: break; } budih_conv: if (c1 == '-') { c3 = c2; c2 = 'g'; c1 = 'n'; goto conv3_prechecked; } goto conv2_prechecked; /* replacement */ case UINT32_C(0xFFFC): c1 = 'o'; c2 = 'b'; c3 = 'j'; goto conv3; case UINT32_C(0xFFFD): c = (uint_least32_t)'?'; goto conv; default: no_match: *n += clen; break; } } } no_conv: return LIBCHARCONV_NO_CONVERT; conv: if (*n) goto no_conv; *n += clen; if (*ncp) *cp = c; *ncp = 1u; return ret; conv2: if (*n) goto no_conv; *n += clen; conv2_prechecked: if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; *ncp = 2u; return ret; conv3: if (*n) goto no_conv; *n += clen; conv3_prechecked: if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; if (*ncp >= 3u) cp[2] = (uint_least32_t)c3; *ncp = 3u; return ret; }