/* See LICENSE file for copyright and license details. */ #include "lib-common.h" enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; char c1, c2, c3, c4, c5; size_t clen; unsigned num; *n = 0; while (slen) { clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) { if (*n) goto no_conv; return LIBCHARCONV_INDETERMINATE; } if (!clen) { *n += 1u; slen -= 1u; s = &s[1]; continue; } slen -= clen; if (UINT32_C(0x2680) <= c && c <= UINT32_C(0x2685)) { /* dice */ c -= (uint_least32_t)UINT32_C(0x2680) - (uint_least32_t)'1'; goto conv; } else if (UINT32_C(0x1F1E6) <= c && c <= UINT32_C(0x1F1FF)) { /* region indicators */ c -= (uint_least32_t)UINT32_C(0x1F1E6) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0xE0020) <= c && c <= UINT32_C(0xE007E)) { /* tags */ c -= (uint_least32_t)UINT32_C(0xE0000); goto conv; } else if (UINT32_C(0x10800) <= c && c <= UINT32_C(0x1083F)) { /* cypriot */ c -= UINT32_C(0x10800); c1 = "_jklmnprstwxz"[c / 5]; c2 = "aeiou"[c % 5]; if (c1 == '_') { c = (uint_least32_t)c2; goto conv; } if (c1 == 'j' && c2 != 'a' && c2 != 'o') goto no_match; if (c1 == 'w' && c2 == 'u') goto no_match; if (c1 == 'x' && c2 != 'a' && c2 != 'e') goto no_match; if (c1 == 'z' && c2 != 'a' && c2 != 'o') goto no_match; goto conv2; } else if (UINT32_C(0x1D400) <= c && c <= UINT32_C(0x1D419)) { /* bold (captial) */ c -= (uint_least32_t)UINT32_C(0x1D400) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D41A) <= c && c <= UINT32_C(0x1D433)) { /* bold (small) */ c -= (uint_least32_t)UINT32_C(0x1D41A) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7CE) <= c && c <= UINT32_C(0x1D7D7)) { /* bold (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7CE) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D434) <= c && c <= UINT32_C(0x1D44D)) { /* italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D434) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D44E) <= c && c <= UINT32_C(0x1D467)) { /* italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D44E) - (uint_least32_t)'a'; goto conv; } else if (c == UINT32_C(0x210E)) { /* italic (small h) */ c = (uint_least32_t)'h'; goto conv; } else if (UINT32_C(0x1D468) <= c && c <= UINT32_C(0x1D481)) { /* bold italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D468) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D482) <= c && c <= UINT32_C(0x1D49B)) { /* bold italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D482) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D670) <= c && c <= UINT32_C(0x1D689)) { /* monospace (captial) */ c -= (uint_least32_t)UINT32_C(0x1D670) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D68A) <= c && c <= UINT32_C(0x1D6A3)) { /* monospace (small) */ c -= (uint_least32_t)UINT32_C(0x1D68A) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7F6) <= c && c <= UINT32_C(0x1D7FF)) { /* monospace (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7F6) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1FBF0) <= c && c <= UINT32_C(0x1FBF9)) { /* segmented */ c -= (uint_least32_t)UINT32_C(0x1FBF0) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D5A0) <= c && c <= UINT32_C(0x1D5B9)) { /* sans-serif (captial) */ c -= (uint_least32_t)UINT32_C(0x1D5A0) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D5BA) <= c && c <= UINT32_C(0x1D5D3)) { /* sans-serif (small) */ c -= (uint_least32_t)UINT32_C(0x1D5BA) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7E2) <= c && c <= UINT32_C(0x1D7EB)) { /* sans-serif (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7E2) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D5D4) <= c && c <= UINT32_C(0x1D5ED)) { /* sans-serif bold (captial) */ c -= (uint_least32_t)UINT32_C(0x1D5D4) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D5EE) <= c && c <= UINT32_C(0x1D607)) { /* sans-serif bold (small) */ c -= (uint_least32_t)UINT32_C(0x1D5EE) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7EC) <= c && c <= UINT32_C(0x1D7F5)) { /* sans-serif bold (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7EC) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D608) <= c && c <= UINT32_C(0x1D621)) { /* sans-serif italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D608) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D622) <= c && c <= UINT32_C(0x1D63B)) { /* sans-serif italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D622) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D63C) <= c && c <= UINT32_C(0x1D655)) { /* sans-serif bold italic (captial) */ c -= (uint_least32_t)UINT32_C(0x1D63C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D656) <= c && c <= UINT32_C(0x1D66F)) { /* sans-serif bold italic (small) */ c -= (uint_least32_t)UINT32_C(0x1D656) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D538) <= c && c <= UINT32_C(0x1D551)) { /* double-struck (captial) */ c -= (uint_least32_t)UINT32_C(0x1D538) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D552) <= c && c <= UINT32_C(0x1D56B)) { /* double-struck (small) */ c -= (uint_least32_t)UINT32_C(0x1D552) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D7D8) <= c && c <= UINT32_C(0x1D7E1)) { /* double-struck (digit) */ c -= (uint_least32_t)UINT32_C(0x1D7D8) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x1D504) <= c && c <= UINT32_C(0x1D51D)) { /* fraktur (captial) */ c -= (uint_least32_t)UINT32_C(0x1D504) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D51E) <= c && c <= UINT32_C(0x1D537)) { /* fraktur (small) */ c -= (uint_least32_t)UINT32_C(0x1D51E) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D56C) <= c && c <= UINT32_C(0x1D585)) { /* bold fraktur (captial) */ c -= (uint_least32_t)UINT32_C(0x1D56C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D586) <= c && c <= UINT32_C(0x1D59F)) { /* bold fraktur (small) */ c -= (uint_least32_t)UINT32_C(0x1D586) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D49C) <= c && c <= UINT32_C(0x1D4B5)) { /* script (captial) */ c -= (uint_least32_t)UINT32_C(0x1D49C) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D4B6) <= c && c <= UINT32_C(0x1D4CF)) { /* script (small) */ c -= (uint_least32_t)UINT32_C(0x1D4B6) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1D4D0) <= c && c <= UINT32_C(0x1D4E9)) { /* bold script (captial) */ c -= (uint_least32_t)UINT32_C(0x1D4D0) - (uint_least32_t)'A'; goto conv; } else if (UINT32_C(0x1D4EA) <= c && c <= UINT32_C(0x1D503)) { /* bold script (small) */ c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; goto conv; } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) { /* bracketed (parenthesised capital) */ c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A'; c1 = '('; c2 = (char)c; c3 = ')'; goto conv3; } else if (UINT32_C(0x249C) <= c && c <= UINT32_C(0x24B5)) { /* bracketed (parenthesised small) */ c -= (uint_least32_t)UINT32_C(0x249C) - (uint_least32_t)'a'; c1 = '('; c2 = (char)c; c3 = ')'; goto conv3; } else if (UINT32_C(0x2474) <= c && c <= UINT32_C(0x247C)) { /* bracketed (parenthesised number) */ c -= (uint_least32_t)UINT32_C(0x2474) - (uint_least32_t)'1'; c1 = '('; c2 = (char)c; c3 = ')'; goto conv3; } else if (UINT32_C(0x247D) <= c && c <= UINT32_C(0x2486)) { /* bracketed (parenthesised number) */ c -= (uint_least32_t)UINT32_C(0x247D) - (uint_least32_t)'0'; c1 = '('; c2 = '1'; c3 = (char)c; c4 = ')'; goto conv4; } else if (c == UINT32_C(0x2487)) { /* bracketed (parenthesised number) */ c1 = '('; c2 = '2'; c3 = '0'; c4 = ')'; goto conv4; } else if (UINT32_C(0x1F101) <= c && c <= UINT32_C(0x1F10A)) { /* bracketed (number comma) */ c -= (uint_least32_t)UINT32_C(0x1F101) - (uint_least32_t)'0'; c1 = (char)c; c2 = ','; goto conv2; } else if (c == UINT32_C(0x1F100)) { /* bracketed (number full stop) */ c1 = '0'; c2 = '.'; goto conv2; } else if (UINT32_C(0x2488) <= c && c <= UINT32_C(0x2490)) { /* bracketed (number full stop) */ c -= (uint_least32_t)UINT32_C(0x2488) - (uint_least32_t)'1'; c1 = (char)c; c2 = '.'; goto conv2; } else if (UINT32_C(0x2491) <= c && c <= UINT32_C(0x249A)) { /* bracketed (number full stop) */ c -= (uint_least32_t)UINT32_C(0x2491) - (uint_least32_t)'0'; c1 = '1'; c2 = (char)c; c3 = '.'; goto conv3; } else if (c == UINT32_C(0x249B)) { /* bracketed (number full stop) */ c1 = '2'; c2 = '0'; c3 = '.'; goto conv3; } else if (UINT32_C(0xFE00) <= c && c <= UINT32_C(0xFE0F)) { /* variation selectors */ num = (unsigned)(c - UINT32_C(0xFE00)) + 1u; if (num < 10) { c1 = (char)('0' + num); goto conv1; } else { c1 = (char)('0' + num / 10); c2 = (char)('0' + num % 10); goto conv2; } } else if (UINT32_C(0xE0100) <= c && c <= UINT32_C(0xE01EF)) { /* variation selectors */ num = (unsigned)(c - UINT32_C(0xE0100)) + 17u; if (num < 100) { c1 = (char)('0' + num / 10); c2 = (char)('0' + num % 10); goto conv2; } else { c1 = (char)('0' + num / 100); c2 = (char)('0' + num / 10 % 10); c3 = (char)('0' + num % 10); goto conv3; } } else if (UINT32_C(0x2070) <= c && c <= UINT32_C(0x2079)) { /* superscript */ c -= (uint_least32_t)UINT32_C(0x2070) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x00B2) <= c && c <= UINT32_C(0x00B3)) { /* superscript */ c -= (uint_least32_t)UINT32_C(0x00B2) - (uint_least32_t)'2'; goto conv; } else if (c == UINT32_C(0x00B9)) { /* superscript */ c -= (uint_least32_t)UINT32_C(0x00B9) - (uint_least32_t)'1'; goto conv; } else if (UINT32_C(0x2080) <= c && c <= UINT32_C(0x2089)) { /* subscript */ c -= (uint_least32_t)UINT32_C(0x2080) - (uint_least32_t)'0'; goto conv; } else if (UINT32_C(0x10920) <= c && c <= UINT32_C(0x10939)) { /* lydian */ c = (uint_least32_t)"abgdeviyklmnorStufqsTAELNc"[c - UINT32_C(0x10920)]; goto conv; } else if (UINT32_C(0x10280) <= c && c <= UINT32_C(0x1029C)) { /* lycian */ c = (uint_least32_t)"aebBgdiwzDjkqlmnMNupKrstTAEhx"[c - UINT32_C(0x10280)]; goto conv; } else if (UINT32_C(0x1F031) <= c && c <= UINT32_C(0x1F061)) { /* domino tiles */ c1 = (char)(((uint_least32_t)c - UINT32_C(0x1F031)) / 7u + (unsigned)'0'); c3 = (char)(((uint_least32_t)c - UINT32_C(0x1F031)) % 7u + (unsigned)'0'); c2 = '|'; goto conv3; } else if (UINT32_C(0x1F063) <= c && c <= UINT32_C(0x1F093)) { /* domino tiles */ c1 = (char)(((uint_least32_t)c - UINT32_C(0x1F063)) / 7u + (unsigned)'0'); c3 = (char)(((uint_least32_t)c - UINT32_C(0x1F063)) % 7u + (unsigned)'0'); c2 = '-'; goto conv3; } else if (UINT32_C(0x1F550) <= c && c <= UINT32_C(0x1F567)) { /* clock faces */ c -= UINT32_C(0x1F550); c1 = (char)((c + 1u) % 12u / 10u + (uint_least32_t)'0'); c2 = (char)((c + 1u) % 12u % 10u + (uint_least32_t)'0'); c3 = ':'; c4 = c < 12u ? '0' : '3'; c5 = '0'; goto conv5; } else { switch (c) { /* shogi */ case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv; case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv; case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv; case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv; /* go (white) */ case UINT32_C(0x25CB): c = (uint_least32_t)'0'; goto conv; case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv; case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv; /* go (black) */ case UINT32_C(0x25CF): c = (uint_least32_t)'0'; goto conv; case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv; case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv; /* draughts */ case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv; case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv; case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv; case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv; /* gender symbols */ case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv; case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv; case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv; /* double-struck */ case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv; case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv; case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv; case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv; case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv; /* double-struck italic */ case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv; case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv; case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv; case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv; case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv; /* fraktur */ case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv; case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv; case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv; /* script */ case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv; case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv; case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv; case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv; case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv; case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv; case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv; case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv; case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv; case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv; case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv; /* buhid */ case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv; case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv; case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv; case UINT32_C(0x1752): c2 = 'i'; goto budih_combining; case UINT32_C(0x1753): c2 = 'u'; goto budih_combining; budih_combining: c1 = '^'; goto conv2; case UINT32_C(0x174A): c1 = 'b'; goto budih; case UINT32_C(0x1747): c1 = 'd'; goto budih; case UINT32_C(0x1744): c1 = 'g'; goto budih; case UINT32_C(0x1751): c1 = 'h'; goto budih; case UINT32_C(0x1743): c1 = 'k'; goto budih; case UINT32_C(0x174E): c1 = 'l'; goto budih; case UINT32_C(0x174B): c1 = 'm'; goto budih; case UINT32_C(0x1748): c1 = 'n'; goto budih; case UINT32_C(0x1749): c1 = 'p'; goto budih; case UINT32_C(0x174D): c1 = 'r'; goto budih; case UINT32_C(0x1750): c1 = 's'; goto budih; case UINT32_C(0x1746): c1 = 't'; goto budih; case UINT32_C(0x174C): c1 = 'y'; goto budih; case UINT32_C(0x174F): c1 = 'w'; goto budih; case UINT32_C(0x1745): c1 = '-'; goto budih; budih: if (*n) goto no_conv; c2 = 'a'; s = &s[clen]; *n += clen; if (!slen) { ret = LIBCHARCONV_CONVERT_IF_END; goto budih_conv; } clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) return LIBCHARCONV_INDETERMINATE; if (!clen) goto budih_conv; switch (c) { case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; default: break; } budih_conv: if (c1 == '-') { c3 = c2; c2 = 'g'; c1 = 'n'; goto conv3_prechecked; } goto conv2_prechecked; /* replacement */ case UINT32_C(0xFFFC): c1 = 'o'; c2 = 'b'; c3 = 'j'; goto conv3; case UINT32_C(0xFFFD): c = (uint_least32_t)'?'; goto conv; /* lydian */ case UINT32_C(0x1093F): c = (uint_least32_t)'"'; goto conv; /* domino tiles */ case UINT32_C(0x1F030): c1 = '#'; c2 = '|'; c3 = '#'; goto conv3; case UINT32_C(0x1F062): c1 = '#'; c2 = '-'; c3 = '#'; goto conv3; default: no_match: *n += clen; s = &s[clen]; break; } } } no_conv: return LIBCHARCONV_NO_CONVERT; conv1: c = (uint_least32_t)c1; conv: if (*n) goto no_conv; *n += clen; if (*ncp) *cp = c; *ncp = 1u; return ret; conv2: if (*n) goto no_conv; *n += clen; conv2_prechecked: if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; *ncp = 2u; return ret; conv3: if (*n) goto no_conv; *n += clen; conv3_prechecked: if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; if (*ncp >= 3u) cp[2] = (uint_least32_t)c3; *ncp = 3u; return ret; conv4: if (*n) goto no_conv; *n += clen; if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; if (*ncp >= 3u) cp[2] = (uint_least32_t)c3; if (*ncp >= 4u) cp[3] = (uint_least32_t)c4; *ncp = 4u; return ret; conv5: if (*n) goto no_conv; *n += clen; if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; if (*ncp >= 2u) cp[1] = (uint_least32_t)c2; if (*ncp >= 3u) cp[2] = (uint_least32_t)c3; if (*ncp >= 4u) cp[3] = (uint_least32_t)c4; if (*ncp >= 5u) cp[4] = (uint_least32_t)c5; *ncp = 5u; return ret; }