diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-28 19:52:45 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-28 19:52:45 +0100 |
| commit | 579e91697244d5813a667bf49209ea4102bcd360 (patch) | |
| tree | 2910e440fbaf968e8010b054e29c8e57b60b8935 | |
| parent | Add ROTATED CAPITAL Q (diff) | |
| download | charconv-579e91697244d5813a667bf49209ea4102bcd360.tar.gz charconv-579e91697244d5813a667bf49209ea4102bcd360.tar.bz2 charconv-579e91697244d5813a667bf49209ea4102bcd360.tar.xz | |
Misc stuff
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to '')
| -rw-r--r-- | Makefile | 16 | ||||
| -rw-r--r-- | convert-to-buginese.c | 4 | ||||
| -rw-r--r-- | convert-to-gothic.c | 4 | ||||
| -rw-r--r-- | convert-to-makasar.c | 4 | ||||
| -rw-r--r-- | convert-to-mandaic.c | 4 | ||||
| -rw-r--r-- | convert-to-rumi.c | 4 | ||||
| -rw-r--r-- | convert-to-scores.c | 4 | ||||
| -rw-r--r-- | libcharconv.h | 33 | ||||
| -rw-r--r-- | libcharconv_buginese.c | 114 | ||||
| -rw-r--r-- | libcharconv_flipped.c | 10 | ||||
| -rw-r--r-- | libcharconv_gothic.c | 91 | ||||
| -rw-r--r-- | libcharconv_latin.c | 317 | ||||
| -rw-r--r-- | libcharconv_makasar.c | 107 | ||||
| -rw-r--r-- | libcharconv_mandaic.c | 65 | ||||
| -rw-r--r-- | libcharconv_mirrored.c | 5 | ||||
| -rw-r--r-- | libcharconv_negative.c | 5 | ||||
| -rw-r--r-- | libcharconv_overlaid.c | 5 | ||||
| -rw-r--r-- | libcharconv_rumi.c | 69 | ||||
| -rw-r--r-- | libcharconv_scores.c | 51 | ||||
| -rw-r--r-- | libcharconv_turned.c | 11 |
20 files changed, 816 insertions, 107 deletions
@@ -87,7 +87,13 @@ BIN =\ convert-to-cards\ convert-to-stacked\ convert-to-counting-rods\ - convert-to-mayan + convert-to-mayan\ + convert-to-gothic\ + convert-to-mandaic\ + convert-to-rumi\ + convert-to-scores\ + convert-to-buginese\ + convert-to-makasar LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -167,7 +173,13 @@ LIBOBJ =\ libcharconv_cards.o\ libcharconv_stacked.o\ libcharconv_counting_rods.o\ - libcharconv_mayan.o + libcharconv_mayan.o\ + libcharconv_gothic.o\ + libcharconv_mandaic.o\ + libcharconv_rumi.o\ + libcharconv_scores.o\ + libcharconv_buginese.o\ + libcharconv_makasar.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-buginese.c b/convert-to-buginese.c new file mode 100644 index 0000000..212e69f --- /dev/null +++ b/convert-to-buginese.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_buginese) diff --git a/convert-to-gothic.c b/convert-to-gothic.c new file mode 100644 index 0000000..5fe4a65 --- /dev/null +++ b/convert-to-gothic.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_gothic) diff --git a/convert-to-makasar.c b/convert-to-makasar.c new file mode 100644 index 0000000..ba46310 --- /dev/null +++ b/convert-to-makasar.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_makasar) diff --git a/convert-to-mandaic.c b/convert-to-mandaic.c new file mode 100644 index 0000000..0f36e4f --- /dev/null +++ b/convert-to-mandaic.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_mandaic) diff --git a/convert-to-rumi.c b/convert-to-rumi.c new file mode 100644 index 0000000..e6a9015 --- /dev/null +++ b/convert-to-rumi.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_rumi) diff --git a/convert-to-scores.c b/convert-to-scores.c new file mode 100644 index 0000000..851c761 --- /dev/null +++ b/convert-to-scores.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_scores) diff --git a/libcharconv.h b/libcharconv.h index 2fc6a3e..c107aa8 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -511,6 +511,39 @@ LIBCHARCONV_FUNC_(libcharconv_counting_rods); */ LIBCHARCONV_FUNC_(libcharconv_mayan); +/** + * Convert from Latin to Gothic + */ +LIBCHARCONV_FUNC_(libcharconv_gothic); + +/** + * Convert from Latin to Mandaic + */ +LIBCHARCONV_FUNC_(libcharconv_mandaic); + +/** + * Convert from Latin to RUMI numerals + */ +LIBCHARCONV_FUNC_(libcharconv_rumi); + +/** + * Convert + * "*" to WHITE FLOWER, + * "10" to KEYCAP TEN, and + * "100" to HUNDRED POINTS SYMBOL + */ +LIBCHARCONV_FUNC_(libcharconv_scores); + +/** + * Convert from Latin to Buginese + */ +LIBCHARCONV_FUNC_(libcharconv_buginese); + +/** + * Convert from Latin to Makasar + */ +LIBCHARCONV_FUNC_(libcharconv_makasar); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_buginese.c b/libcharconv_buginese.c new file mode 100644 index 0000000..311ab4c --- /dev/null +++ b/libcharconv_buginese.c @@ -0,0 +1,114 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_buginese(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c, c2; + *n = 0; + for (; slen--; s++) { + switch (s[0]) { + case 'k': c = UINT32_C(0x1A00); goto conv; + case 'g': c = UINT32_C(0x1A01); goto conv; + case 'G': c = UINT32_C(0x1A02); goto conv; + case 'K': c = UINT32_C(0x1A03); goto conv; + case 'p': c = UINT32_C(0x1A04); goto conv; + case 'b': c = UINT32_C(0x1A05); goto conv; + case 'm': c = UINT32_C(0x1A06); goto conv; + case 'M': c = UINT32_C(0x1A07); goto conv; + case 't': c = UINT32_C(0x1A08); goto conv; + case 'd': c = UINT32_C(0x1A09); goto conv; + case 'n': c = UINT32_C(0x1A0A); goto conv; + case 'R': c = UINT32_C(0x1A0B); goto conv; + case 'c': c = UINT32_C(0x1A0C); goto conv; + case 'j': c = UINT32_C(0x1A0D); goto conv; + case 'Y': c = UINT32_C(0x1A0E); goto conv; + case 'C': c = UINT32_C(0x1A0F); goto conv; + case 'y': c = UINT32_C(0x1A10); goto conv; + case 'r': c = UINT32_C(0x1A11); goto conv; + case 'l': c = UINT32_C(0x1A12); goto conv; + case 'v': c = UINT32_C(0x1A13); goto conv; + case 's': c = UINT32_C(0x1A14); goto conv; + case 'a': c = UINT32_C(0x1A15); goto conv1; + case 'h': c = UINT32_C(0x1A16); goto conv; + case 'i': c = UINT32_C(0x1A17); goto conv_vowel; + case 'u': c = UINT32_C(0x1A18); goto conv_vowel; + case 'e': c = UINT32_C(0x1A19); goto conv_vowel; + case 'o': c = UINT32_C(0x1A1A); goto conv_vowel; + case 'A': c = UINT32_C(0x1A1B); goto conv_vowel; + case '.': c = UINT32_C(0x1A1E); goto conv1; + case '|': c = UINT32_C(0x1A1F); goto conv1; + case '^': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'i': c = UINT32_C(0x1A17); goto conv2; + case 'u': c = UINT32_C(0x1A18); goto conv2; + case 'e': c = UINT32_C(0x1A19); goto conv2; + case 'o': c = UINT32_C(0x1A1A); goto conv2; + case 'A': c = UINT32_C(0x1A1B); goto conv2; + default: + goto no_match; + } + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'a': goto conv2; + case 'i': c2 = UINT32_C(0x1A17); break; + case 'u': c2 = UINT32_C(0x1A18); break; + case 'e': c2 = UINT32_C(0x1A19); break; + case 'o': c2 = UINT32_C(0x1A1A); break; + case 'A': c2 = UINT32_C(0x1A1B); break; + default: + goto no_match; + } + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = c2; + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + +conv1: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + cp[0] = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv_vowel: + if (*n) + goto no_conv; + if (*ncp >= 1u) + cp[0] = UINT32_C(0x1A15); + if (*ncp >= 2u) + cp[1] = c; + *n += 1u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_flipped.c b/libcharconv_flipped.c index 9423bdd..49f2c9b 100644 --- a/libcharconv_flipped.c +++ b/libcharconv_flipped.c @@ -32,7 +32,9 @@ static struct { {UINT32_C(0x23BE), UINT32_C(0x23BF)}, {UINT32_C(0x23CB), UINT32_C(0x23CC)}, {UINT32_C(0x23C9), UINT32_C(0x23CA)}, - {UINT32_C(0x23C1), UINT32_C(0x23C2)} + {UINT32_C(0x23C1), UINT32_C(0x23C2)}, + {UINT32_C(0x238F), UINT32_C(0x2390)}, + {UINT32_C(0x2391), UINT32_C(0x2392)} }; @@ -88,6 +90,12 @@ libcharconv_flipped(const char *s, size_t slen, size_t *n, uint_least32_t *cp, s c |= (i & 1u) ? 32u : 0u; c = UINT32_C(0x4D00) | yijing_hexagrams[c]; goto conv; + } else if (UINT32_C(0x2800) <= c && c <= UINT32_C(0x28FF)) { + c = ((c & 0x01u) << 6) | ((c & 0x40u) >> 6) + | ((c & 0x12u) << 1) | ((c & 0x24u) >> 1) + | ((c & 0x08u) << 4) | ((c & 0x80u) >> 4) + | UINT32_C(0x2800); + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { diff --git a/libcharconv_gothic.c b/libcharconv_gothic.c new file mode 100644 index 0000000..61ec9a9 --- /dev/null +++ b/libcharconv_gothic.c @@ -0,0 +1,91 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +static struct { + unsigned char cp_low; + char latin; +} gothic[] = { + {0x30, 'a'}, + {0x31, 'b'}, + {0x32, 'g'}, + {0x32, 'G'}, + {0x33, 'd'}, + {0x33, 'D'}, + {0x34, 'e'}, + {0x35, 'q'}, + {0x36, 'z'}, + {0x37, 'h'}, + {0x38, 'T'}, + {0x39, 'i'}, + {0x3A, 'k'}, + {0x3B, 'l'}, + {0x3C, 'm'}, + {0x3D, 'n'}, + {0x3E, 'j'}, + {0x3F, 'u'}, + {0x40, 'p'}, + {0x42, 'r'}, + {0x43, 's'}, + {0x44, 't'}, + {0x45, 'w'}, + {0x45, 'y'}, + {0x46, 'f'}, + {0x47, 'x'}, + {0x48, 'v'}, + {0x48, 'W'}, + {0x49, 'o'} +}; + + +enum libcharconv_result +libcharconv_gothic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + size_t i; + *n = 0; + for (; slen--; s++) { + if ('1' <= *s && *s <= '9') { + if (*n) + goto no_conv; + c = UINT32_C(0x10330) + (uint_least32_t)(*s - '1'); + *n = 1u; + if (slen == 0u) + goto convn_if_end; + if (s[1] != '0') + goto convn; + c += 9u; + *n += 1u; + if (slen == 1u) + goto convn_if_end; + if (s[2] == '0') { + c += 9u; + *n += 1u; + } + goto convn; + } else { + for (i = 0u; i < sizeof(gothic) / sizeof(*gothic); i++) + if (*s == gothic[i].latin) + goto conv; + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + *n += 1u; + c = (uint_least32_t)(UINT32_C(0x10300) | gothic[i].cp_low); +convn: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +convn_if_end: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERT_IF_END; +} diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 374a097..20e3fd9 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -220,6 +220,38 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; goto conv; + } else if (UINT32_C(0x1743) <= c && c <= UINT32_C(0x1751)) { + /* buhid */ + c1 = "kgGtdnpbmyrlwsh"[c - UINT32_C(0x1743)]; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto budih_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto budih_conv; + switch (c) { + case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; + default: + break; + } + budih_conv: + if (c1 == 'G') { + c3 = c2; + c2 = 'g'; + c1 = 'n'; + goto conv3_prechecked; + } + goto conv2_prechecked; + } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) { /* bracketed (parenthesised capital) */ c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A'; @@ -627,127 +659,180 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c2 = (char)c; goto conv2; + } else if (UINT32_C(0x10330) <= c && c <= UINT32_C(0x10349)) { + /* gothic */ + if (c == UINT32_C(0x10341)) + goto use_switch; + c = (uint_least32_t)"abgdeqzhTiklmnjup rstwfxvo"[c - UINT32_C(0x10330)]; + goto conv; + + } else if (UINT32_C(0x0840) <= c && c <= UINT32_C(0x085B)) { + /* mandaic */ + c = (uint_least32_t)"abgdhuzHTiklmnsepZqrStDKo\'*:"[c - UINT32_C(0x0840)]; + goto conv; + + } else if (UINT32_C(0x10E60) <= c && c <= UINT32_C(0x10E68)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E60) - (uint_least32_t)'1'; + c1 = (char)c; + goto conv1; + } else if (UINT32_C(0x10E69) <= c && c <= UINT32_C(0x10E71)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E69) - (uint_least32_t)'1'; + c1 = (char)c; + c2 = '0'; + goto conv2; + } else if (UINT32_C(0x10E72) <= c && c <= UINT32_C(0x10E7A)) { + /* rumi */ + c -= (uint_least32_t)UINT32_C(0x10E72) - (uint_least32_t)'1'; + c1 = (char)c; + c2 = '0'; + c3 = '0'; + goto conv3; + + } else if (UINT32_C(0x1A00) <= c && c <= UINT32_C(0x1A1B)) { + /* buginese */ + c1 = "kgGKpbmMtdnRcjYCyrlvsahiueoA"[c - UINT32_C(0x1A00)]; + if (c >= UINT32_C(0x1A17)) + goto combining; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto buginese_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto buginese_conv; + switch (c) { + case UINT32_C(0x1A17): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1A18): c2 = 'u'; *n += clen; break; + case UINT32_C(0x1A19): c2 = 'e'; *n += clen; break; + case UINT32_C(0x1A1A): c2 = 'o'; *n += clen; break; + case UINT32_C(0x1A1B): c2 = 'A'; *n += clen; break; + default: + break; + } + buginese_conv: + if (c1 == 'a') { + c = (uint_least32_t)c2; + goto conv_prechecked; + } + goto conv2_prechecked; + + } else if (UINT32_C(0x11EE0) <= c && c <= UINT32_C(0x11EF6)) { + /* makasar */ + c1 = "kgGpbmtdncjYyrlvsa-iueo"[c - UINT32_C(0x11EE0)]; + if (c >= UINT32_C(0x11EF3)) + goto combining; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto makasar_conv; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto makasar_conv; + switch (c) { + case UINT32_C(0x11EF3): c2 = 'i'; *n += clen; break; + case UINT32_C(0x11EF4): c2 = 'u'; *n += clen; break; + case UINT32_C(0x11EF5): c2 = 'e'; *n += clen; break; + case UINT32_C(0x11EF6): c2 = 'o'; *n += clen; break; + default: + break; + } + makasar_conv: + if (c1 == 'a') { + c = (uint_least32_t)c2; + goto conv_prechecked; + } + goto conv2_prechecked; + } else { use_switch: switch (c) { /* shogi */ - case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv; - case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv; - case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv; - case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv; + case UINT32_C(0x2616): c1 = 'w'; goto conv1; + case UINT32_C(0x2617): c1 = 'b'; goto conv1; + case UINT32_C(0x26C9): c1 = 'W'; goto conv1; + case UINT32_C(0x26CA): c1 = 'B'; goto conv1; /* go (common) */ - case UINT32_C(0x25CF): c = (uint_least32_t)'b'; goto conv; - case UINT32_C(0x25CB): c = (uint_least32_t)'w'; goto conv; + case UINT32_C(0x25CB): c1 = 'w'; goto conv1; + case UINT32_C(0x25CF): c1 = 'b'; goto conv1; /* go (white) */ - case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv; - case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv; + case UINT32_C(0x2686): c1 = '1'; goto conv1; + case UINT32_C(0x2687): c1 = '2'; goto conv1; /* go (black) */ - case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv; - case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv; + case UINT32_C(0x2688): c1 = '1'; goto conv1; + case UINT32_C(0x2689): c1 = '2'; goto conv1; /* draughts */ - case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv; - case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv; - case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv; - case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv; + case UINT32_C(0x26C0): c1 = 'm'; goto conv1; + case UINT32_C(0x26C1): c1 = 'k'; goto conv1; + case UINT32_C(0x26C2): c1 = 'M'; goto conv1; + case UINT32_C(0x26C3): c1 = 'K'; goto conv1; /* gender symbols */ - case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv; - case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv; - case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv; + case UINT32_C(0x2640): c1 = 'f'; goto conv1; + case UINT32_C(0x2642): c1 = 'm'; goto conv1; + case UINT32_C(0x263F): c1 = 'i'; goto conv1; /* double-struck */ - case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv; - case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv; - case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv; - case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv; - case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv; + case UINT32_C(0x2102): c1 = 'C'; goto conv1; + case UINT32_C(0x210D): c1 = 'H'; goto conv1; + case UINT32_C(0x2115): c1 = 'N'; goto conv1; + case UINT32_C(0x2119): c1 = 'P'; goto conv1; + case UINT32_C(0x211A): c1 = 'Q'; goto conv1; + case UINT32_C(0x211D): c1 = 'R'; goto conv1; + case UINT32_C(0x2124): c1 = 'Z'; goto conv1; /* double-struck italic */ - case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv; - case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv; - case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv; - case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv; - case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv; + case UINT32_C(0x2145): c1 = 'D'; goto conv1; + case UINT32_C(0x2146): c1 = 'd'; goto conv1; + case UINT32_C(0x2147): c1 = 'e'; goto conv1; + case UINT32_C(0x2148): c1 = 'i'; goto conv1; + case UINT32_C(0x2149): c1 = 'j'; goto conv1; /* fraktur */ - case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv; - case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv; - case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv; + case UINT32_C(0x212D): c1 = 'C'; goto conv1; + case UINT32_C(0x210C): c1 = 'H'; goto conv1; + case UINT32_C(0x2111): c1 = 'I'; goto conv1; + case UINT32_C(0x211C): c1 = 'R'; goto conv1; + case UINT32_C(0x2128): c1 = 'Z'; goto conv1; /* script */ - case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv; - case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv; - case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv; - case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv; - case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv; - case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv; - case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv; - case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv; - case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv; - case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv; - case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv; + case UINT32_C(0x212C): c1 = 'B'; goto conv1; + case UINT32_C(0x2130): c1 = 'E'; goto conv1; + case UINT32_C(0x2131): c1 = 'F'; goto conv1; + case UINT32_C(0x210B): c1 = 'H'; goto conv1; + case UINT32_C(0x2110): c1 = 'I'; goto conv1; + case UINT32_C(0x2112): c1 = 'L'; goto conv1; + case UINT32_C(0x2133): c1 = 'M'; goto conv1; + case UINT32_C(0x211B): c1 = 'R'; goto conv1; + case UINT32_C(0x212F): c1 = 'e'; goto conv1; + case UINT32_C(0x210A): c1 = 'g'; goto conv1; + case UINT32_C(0x2134): c1 = 'o'; goto conv1; /* buhid */ - case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv; - case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv; - case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv; - case UINT32_C(0x1752): c2 = 'i'; goto budih_combining; - case UINT32_C(0x1753): c2 = 'u'; goto budih_combining; - budih_combining: - c1 = '^'; - goto conv2; - case UINT32_C(0x174A): c1 = 'b'; goto budih; - case UINT32_C(0x1747): c1 = 'd'; goto budih; - case UINT32_C(0x1744): c1 = 'g'; goto budih; - case UINT32_C(0x1751): c1 = 'h'; goto budih; - case UINT32_C(0x1743): c1 = 'k'; goto budih; - case UINT32_C(0x174E): c1 = 'l'; goto budih; - case UINT32_C(0x174B): c1 = 'm'; goto budih; - case UINT32_C(0x1748): c1 = 'n'; goto budih; - case UINT32_C(0x1749): c1 = 'p'; goto budih; - case UINT32_C(0x174D): c1 = 'r'; goto budih; - case UINT32_C(0x1750): c1 = 's'; goto budih; - case UINT32_C(0x1746): c1 = 't'; goto budih; - case UINT32_C(0x174C): c1 = 'y'; goto budih; - case UINT32_C(0x174F): c1 = 'w'; goto budih; - case UINT32_C(0x1745): c1 = '-'; goto budih; - budih: - if (*n) - goto no_conv; - c2 = 'a'; - s = &s[clen]; - *n += clen; - if (!slen) { - ret = LIBCHARCONV_CONVERT_IF_END; - goto budih_conv; - } - clen = libcharconv_decode_utf8_(s, slen, &c); - if (clen > slen) - return LIBCHARCONV_INDETERMINATE; - if (!clen) - goto budih_conv; - switch (c) { - case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; - case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; - default: - break; - } - budih_conv: - if (c1 == '-') { - c3 = c2; - c2 = 'g'; - c1 = 'n'; - goto conv3_prechecked; - } - goto conv2_prechecked; + case UINT32_C(0x1740): c1 = 'a'; goto conv1; + case UINT32_C(0x1741): c1 = 'i'; goto conv1; + case UINT32_C(0x1742): c1 = 'u'; goto conv1; + case UINT32_C(0x1752): c1 = 'i'; goto combining; + case UINT32_C(0x1753): c1 = 'u'; goto combining; /* replacement */ case UINT32_C(0xFFFC): @@ -1038,10 +1123,10 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3; /* invisible */ - case UINT32_C(0x2061): c = (uint_least32_t)'('; goto conv; - case UINT32_C(0x2062): c = (uint_least32_t)'*'; goto conv; - case UINT32_C(0x2063): c = (uint_least32_t)'|'; goto conv; - case UINT32_C(0x2064): c = (uint_least32_t)'+'; goto conv; + case UINT32_C(0x2061): c1 = '('; goto conv1; + case UINT32_C(0x2062): c1 = '*'; goto conv1; + case UINT32_C(0x2063): c1 = '|'; goto conv1; + case UINT32_C(0x2064): c1 = '+'; goto conv1; /* enclosed (positive) */ case UINT32_C(0x24EA): c1 = '('; c2 = '0'; c3 = ')'; goto conv3; @@ -1097,6 +1182,32 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2; case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2; + /* gothic */ + case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2; + case UINT32_C(0x1034A): c1 = '9'; c2 = '0'; c3 = '0'; goto conv3; + + /* mandaic */ + case UINT32_C(0x085E): c1 = '.'; goto conv1; + + /* rumi */ + case UINT32_C(0x10E7B): c1 = '1'; c2 = '/'; c3 = '2'; goto conv3; + case UINT32_C(0x10E7C): c1 = '1'; c2 = '/'; c3 = '4'; goto conv3; + case UINT32_C(0x10E7D): c1 = '1'; c2 = '/'; c3 = '3'; goto conv3; + case UINT32_C(0x10E7E): c1 = '2'; c2 = '/'; c3 = '3'; goto conv3; + + /* scores */ + case UINT32_C(0x1F4AE): c1 = '*'; goto conv1; + case UINT32_C(0x1F51F): c1 = '1'; c2 = '0'; goto conv2; + case UINT32_C(0x1F4AF): c1 = '1'; c2 = '0'; c3 = '0'; goto conv3; + + /* buginese */ + case UINT32_C(0x1A1E): c1 = '.'; goto conv1; + case UINT32_C(0x1A1F): c1 = '|'; goto conv1; + + /* makasar */ + case UINT32_C(0x11EF7): c1 = '.'; goto conv1; + case UINT32_C(0x11EF8): c1 = '|'; goto conv1; + default: no_match: *n += clen; @@ -1114,11 +1225,15 @@ conv: if (*n) goto no_conv; *n += clen; +conv_prechecked: if (*ncp) *cp = c; *ncp = 1u; return ret; +combining: + c2 = c1; + c1 = '^'; conv2: if (*n) goto no_conv; diff --git a/libcharconv_makasar.c b/libcharconv_makasar.c new file mode 100644 index 0000000..3948d9e --- /dev/null +++ b/libcharconv_makasar.c @@ -0,0 +1,107 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_makasar(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c, c2; + *n = 0; + for (; slen--; s++) { + switch (s[0]) { + case 'k': c = UINT32_C(0x11EE0); goto conv; + case 'g': c = UINT32_C(0x11EE1); goto conv; + case 'G': c = UINT32_C(0x11EE2); goto conv; + case 'p': c = UINT32_C(0x11EE3); goto conv; + case 'b': c = UINT32_C(0x11EE4); goto conv; + case 'm': c = UINT32_C(0x11EE5); goto conv; + case 't': c = UINT32_C(0x11EE6); goto conv; + case 'd': c = UINT32_C(0x11EE7); goto conv; + case 'n': c = UINT32_C(0x11EE8); goto conv; + case 'c': c = UINT32_C(0x11EE9); goto conv; + case 'j': c = UINT32_C(0x11EEA); goto conv; + case 'Y': c = UINT32_C(0x11EEB); goto conv; + case 'y': c = UINT32_C(0x11EEC); goto conv; + case 'r': c = UINT32_C(0x11EED); goto conv; + case 'l': c = UINT32_C(0x11EEE); goto conv; + case 'v': c = UINT32_C(0x11EEF); goto conv; + case 's': c = UINT32_C(0x11EF0); goto conv; + case '-': c = UINT32_C(0x11EF2); goto conv; + case 'a': c = UINT32_C(0x11EF1); goto conv1; + case 'i': c = UINT32_C(0x11EF3); goto conv_vowel; + case 'u': c = UINT32_C(0x11EF4); goto conv_vowel; + case 'e': c = UINT32_C(0x11EF5); goto conv_vowel; + case 'o': c = UINT32_C(0x11EF6); goto conv_vowel; + case '.': c = UINT32_C(0x11EF7); goto conv1; + case '|': c = UINT32_C(0x11EF8); goto conv1; + case '^': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'i': c = UINT32_C(0x11EF3); goto conv2; + case 'u': c = UINT32_C(0x11EF4); goto conv2; + case 'e': c = UINT32_C(0x11EF5); goto conv2; + case 'o': c = UINT32_C(0x11EF6); goto conv2; + default: + goto no_match; + } + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'a': goto conv2; + case 'i': c2 = UINT32_C(0x11EF3); break; + case 'u': c2 = UINT32_C(0x11EF4); break; + case 'e': c2 = UINT32_C(0x11EF5); break; + case 'o': c2 = UINT32_C(0x11EF6); break; + default: + goto no_match; + } + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = c2; + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + +conv1: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + cp[0] = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv_vowel: + if (*n) + goto no_conv; + if (*ncp >= 1u) + cp[0] = UINT32_C(0x11EF1); + if (*ncp >= 2u) + cp[1] = c; + *n += 1u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_mandaic.c b/libcharconv_mandaic.c new file mode 100644 index 0000000..a29ea66 --- /dev/null +++ b/libcharconv_mandaic.c @@ -0,0 +1,65 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +static struct { + unsigned char cp_low; + char latin; +} mandaic[] = { + {0x40, 'a'}, + {0x41, 'b'}, + {0x42, 'g'}, + {0x43, 'd'}, + {0x44, 'h'}, + {0x45, 'u'}, + {0x45, 'w'}, + {0x46, 'z'}, + {0x47, 'H'}, + {0x48, 'T'}, + {0x49, 'j'}, + {0x49, 'j'}, + {0x4A, 'k'}, + {0x4B, 'l'}, + {0x4C, 'm'}, + {0x4D, 'n'}, + {0x4E, 's'}, + {0x4F, 'e'}, + {0x50, 'p'}, + {0x51, 'Z'}, + {0x52, 'q'}, + {0x53, 'r'}, + {0x54, 'S'}, + {0x55, 't'}, + {0x56, 'D'}, + {0x57, 'K'}, + {0x58, 'o'}, + {0x59, '\''}, + {0x5A, '*'}, + {0x5B, ':'}, + {0x5E, '.'} +}; + + +enum libcharconv_result +libcharconv_mandaic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i; + *n = 0; + for (; slen--; s++) { + for (i = 0u; i < sizeof(mandaic) / sizeof(*mandaic); i++) + if (*s == mandaic[i].latin) + goto conv; + *n += 1u; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + *n += 1u; + if (*ncp) + *cp = (uint_least32_t)(UINT32_C(0x0800) | mandaic[i].cp_low); + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_mirrored.c b/libcharconv_mirrored.c index a71e480..4c13c43 100644 --- a/libcharconv_mirrored.c +++ b/libcharconv_mirrored.c @@ -50,6 +50,11 @@ libcharconv_mirrored(const char *s, size_t slen, size_t *n, uint_least32_t *cp, c = c % 7u * 7u + c / 7u; c += UINT32_C(0x1F031); goto conv; + } else if (UINT32_C(0x2800) <= c && c <= UINT32_C(0x28FF)) { + c = ((c & 0x07u) << 3) | ((c & 0x38u) >> 3) + | ((c & 0x40u) << 1) | ((c & 0x80u) >> 1) + | UINT32_C(0x2800); + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { diff --git a/libcharconv_negative.c b/libcharconv_negative.c index 4f35489..670fa47 100644 --- a/libcharconv_negative.c +++ b/libcharconv_negative.c @@ -25,7 +25,10 @@ static struct { {UINT32_C(0x1FA48), UINT32_C(0x1FA49)}, {UINT32_C(0x1FA4B), UINT32_C(0x1FA4C)}, {UINT32_C(0x24EA), UINT32_C(0x24FF)}, - {UINT32_C(0x1F10B), UINT32_C(0x1F10C)} + {UINT32_C(0x1F10B), UINT32_C(0x1F10C)}, + {UINT32_C(0x2690), UINT32_C(0x2691)}, + {UINT32_C(0x1F3F3), UINT32_C(0x1F3F4)}, + {UINT32_C(0x1F3F1), UINT32_C(0x1F3F2)} }; diff --git a/libcharconv_overlaid.c b/libcharconv_overlaid.c index 0c6d525..05cf0d3 100644 --- a/libcharconv_overlaid.c +++ b/libcharconv_overlaid.c @@ -52,7 +52,10 @@ static struct { {UINT32_C(0x23C9), UINT32_C(0x23C6), UINT32_C(0x23C7)}, {UINT32_C(0x23CA), UINT32_C(0x23C0), UINT32_C(0x23C2)}, {UINT32_C(0x23CA), UINT32_C(0x23C3), UINT32_C(0x23C5)}, - {UINT32_C(0x23CA), UINT32_C(0x23C6), UINT32_C(0x23C8)} + {UINT32_C(0x23CA), UINT32_C(0x23C6), UINT32_C(0x23C8)}, + {(uint_least32_t)'-', UINT32_C(0x238F), UINT32_C(0x2390)}, + {(uint_least32_t)'-', UINT32_C(0x2391), UINT32_C(0x2392)}, + {(uint_least32_t)'-', (uint_least32_t)'~', UINT32_C(0x23E6)} }; diff --git a/libcharconv_rumi.c b/libcharconv_rumi.c new file mode 100644 index 0000000..ccd9203 --- /dev/null +++ b/libcharconv_rumi.c @@ -0,0 +1,69 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_rumi(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('1' <= *s && *s <= '9') { + if (*n) + goto no_conv; + c = UINT32_C(0x10E60) + (uint_least32_t)(*s - '1'); + *n = 1u; + if (slen == 0u) + goto conv_if_end; + if (s[0] == '1' && s[1] == '/') { + if (slen == 1u) + return LIBCHARCONV_INDETERMINATE; + if (s[2] == '2') { + *n += 2u; + c = UINT32_C(0x10E7B); + } else if (s[2] == '4') { + *n += 2u; + c = UINT32_C(0x10E7C); + } else if (s[2] == '3') { + *n += 2u; + c = UINT32_C(0x10E7D); + } + goto conv; + } else if (s[0] == '2' && s[1] == '/') { + if (slen == 1u) + return LIBCHARCONV_INDETERMINATE; + if (s[2] == '3') { + *n += 2u; + c = UINT32_C(0x10E7E); + } + goto conv; + } + if (s[1] != '0') + goto conv; + c += 9u; + *n += 1u; + if (slen == 1u) + goto conv_if_end; + if (s[2] == '0') { + c += 9u; + *n += 1u; + } + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +conv_if_end: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERT_IF_END; +} diff --git a/libcharconv_scores.c b/libcharconv_scores.c new file mode 100644 index 0000000..0c6e38f --- /dev/null +++ b/libcharconv_scores.c @@ -0,0 +1,51 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_scores(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if (*s == '1') { + if (*n) + goto no_conv; + if (!slen) + return LIBCHARCONV_INDETERMINATE; + if (s[1] != '0') + goto no_match; + *n = 2u; + c = UINT32_C(0x1F51F); + if (slen == 1u) + goto conv_if_end; + if (s[2] == '0') { + *n += 1u; + c = UINT32_C(0x1F4AF); + } + goto conv; + } else if (*s == '*') { + if (*n) + goto no_conv; + *n = 1u; + c = UINT32_C(0x1F4AE); + goto conv; + } else { + no_match: + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +conv_if_end: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERT_IF_END; +} diff --git a/libcharconv_turned.c b/libcharconv_turned.c index 081bd9b..ff51618 100644 --- a/libcharconv_turned.c +++ b/libcharconv_turned.c @@ -55,7 +55,9 @@ static struct { {UINT32_C(0x23CB), UINT32_C(0x23BF)}, {UINT32_C(0x23C9), UINT32_C(0x23CA)}, {UINT32_C(0x23C1), UINT32_C(0x23C2)}, - {UINT32_C(0x23C7), UINT32_C(0x23C8)} + {UINT32_C(0x23C7), UINT32_C(0x23C8)}, + {UINT32_C(0x238F), UINT32_C(0x2390)}, + {UINT32_C(0x2391), UINT32_C(0x2392)} }; @@ -128,6 +130,13 @@ libcharconv_turned(const char *s, size_t slen, size_t *n, uint_least32_t *cp, si c |= (i & 1u) ? 32u : 0u; c = UINT32_C(0x4D00) | yijing_hexagrams[c]; goto conv; + } else if (UINT32_C(0x2800) <= c && c <= UINT32_C(0x28FF)) { + c = ((c & 0x01u) << 7) | ((c & 0x80u) >> 7) + | ((c & 0x02u) << 4) | ((c & 0x20u) >> 4) + | ((c & 0x04u) << 2) | ((c & 0x10u) >> 2) + | ((c & 0x08u) << 3) | ((c & 0x40u) >> 3) + | UINT32_C(0x2800); + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { |
