diff options
30 files changed, 1159 insertions, 15 deletions
@@ -93,7 +93,15 @@ BIN =\ convert-to-rumi\ convert-to-scores\ convert-to-buginese\ - convert-to-makasar + convert-to-makasar\ + convert-to-tagbanwa\ + convert-to-marchen\ + convert-to-mahjong-tiles\ + convert-to-ogham\ + convert-to-mahajani\ + convert-to-box-drawing\ + convert-to-lowered\ + convert-to-raised LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -180,7 +188,15 @@ LIBOBJ =\ libcharconv_rumi.o\ libcharconv_scores.o\ libcharconv_buginese.o\ - libcharconv_makasar.o + libcharconv_makasar.o\ + libcharconv_tagbanwa.o\ + libcharconv_marchen.o\ + libcharconv_mahjong_tiles.o\ + libcharconv_ogham.o\ + libcharconv_mahajani.o\ + libcharconv_box_drawing.o\ + libcharconv_lowered.o\ + libcharconv_raised.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-box-drawing.c b/convert-to-box-drawing.c new file mode 100644 index 0000000..0d49981 --- /dev/null +++ b/convert-to-box-drawing.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_box_drawing) diff --git a/convert-to-lowered.c b/convert-to-lowered.c new file mode 100644 index 0000000..6cb9d75 --- /dev/null +++ b/convert-to-lowered.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_lowered) diff --git a/convert-to-mahajani.c b/convert-to-mahajani.c new file mode 100644 index 0000000..996489a --- /dev/null +++ b/convert-to-mahajani.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_mahajani) diff --git a/convert-to-mahjong-tiles.c b/convert-to-mahjong-tiles.c new file mode 100644 index 0000000..b6c4766 --- /dev/null +++ b/convert-to-mahjong-tiles.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_mahjong_tiles) diff --git a/convert-to-marchen.c b/convert-to-marchen.c new file mode 100644 index 0000000..506632f --- /dev/null +++ b/convert-to-marchen.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_marchen) diff --git a/convert-to-ogham.c b/convert-to-ogham.c new file mode 100644 index 0000000..71a8eeb --- /dev/null +++ b/convert-to-ogham.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_ogham) diff --git a/convert-to-raised.c b/convert-to-raised.c new file mode 100644 index 0000000..3869129 --- /dev/null +++ b/convert-to-raised.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_raised) diff --git a/convert-to-tagbanwa.c b/convert-to-tagbanwa.c new file mode 100644 index 0000000..41e322b --- /dev/null +++ b/convert-to-tagbanwa.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_tagbanwa) diff --git a/libcharconv.h b/libcharconv.h index c107aa8..3271bc5 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -544,6 +544,46 @@ LIBCHARCONV_FUNC_(libcharconv_buginese); */ LIBCHARCONV_FUNC_(libcharconv_makasar); +/** + * Convert from Latin to Tagbanwa + */ +LIBCHARCONV_FUNC_(libcharconv_tagbanwa); + +/** + * Convert from Latin to Marchen + */ +LIBCHARCONV_FUNC_(libcharconv_marchen); + +/** + * Convert from Latin to mahjong tiles + */ +LIBCHARCONV_FUNC_(libcharconv_mahjong_tiles); + +/** + * Convert from Latin to Ogham + */ +LIBCHARCONV_FUNC_(libcharconv_ogham); + +/** + * Convert from Latin to Mahajani + */ +LIBCHARCONV_FUNC_(libcharconv_mahajani); + +/** + * Convert from Latin to BOX DRAWING elements + */ +LIBCHARCONV_FUNC_(libcharconv_box_drawing); + +/** + * Convert to lowered symbols + */ +LIBCHARCONV_FUNC_(libcharconv_lowered); + +/** + * Convert to raised symbols + */ +LIBCHARCONV_FUNC_(libcharconv_raised); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_box_drawing.c b/libcharconv_box_drawing.c new file mode 100644 index 0000000..0cba3bb --- /dev/null +++ b/libcharconv_box_drawing.c @@ -0,0 +1,52 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_box_drawing(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t a, c; + size_t i; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_CASE_SELECT("/\\X", 0x2571); + if ('1' <= *s && *s <= '4') { + c = (uint_least32_t)1 << (*s - '1'); + for (i = 1u;; i++) { + if (slen < i) + goto indeterminate; + if ('1' <= s[i] && s[i] <= '4') { + a = (uint_least32_t)1 << (s[i] - '1'); + if (a & c) + goto next; + c |= a; + } else if (s[i] == 'O') { + c = (uint_least32_t)" \x00\x01\x07\x02\x04\x09\x0D\x03\x08\x05\x0C\x06\x0B\x0A\x0E"[c]; + c |= UINT32_C(0x1FBA0); + if (*n) + goto no_conv; + *n += i + 1u; + goto conv_prechecked; + } + } + } + next:; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +indeterminate: + if (*n) + goto no_conv; + return LIBCHARCONV_INDETERMINATE; + +conv: + if (*n) + goto no_conv; + *n += 1u; +conv_prechecked: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_chess_black.c b/libcharconv_chess_black.c index 480c872..60f5090 100644 --- a/libcharconv_chess_black.c +++ b/libcharconv_chess_black.c @@ -12,6 +12,8 @@ libcharconv_chess_black(const char *s, size_t slen, size_t *n, uint_least32_t *c PLAIN_SELECT("kqrbnp", 0x265A); PLAIN_SINGLE('E', 0x1FA48); PLAIN_SINGLE('e', 0x1FA49); + PLAIN_SELECT("=-.|ij", 0x2BF9); + PLAIN_SELECT("012", 0x2BBA); } no_conv: return LIBCHARCONV_NO_CONVERT; diff --git a/libcharconv_chess_neutral.c b/libcharconv_chess_neutral.c index 2a11eda..9df515f 100644 --- a/libcharconv_chess_neutral.c +++ b/libcharconv_chess_neutral.c @@ -10,6 +10,8 @@ libcharconv_chess_neutral(const char *s, size_t slen, size_t *n, uint_least32_t for (; slen--; s++, ++*n) { PLAIN_CASE_SELECT("kqrbnp", 0x1FA00); PLAIN_CASE_SINGLE('e', 0x1FA4A); + PLAIN_SELECT("=-.|ij", 0x2BF9); + PLAIN_SELECT("012", 0x2BBA); } no_conv: return LIBCHARCONV_NO_CONVERT; diff --git a/libcharconv_chess_white.c b/libcharconv_chess_white.c index f6233a1..15f45e7 100644 --- a/libcharconv_chess_white.c +++ b/libcharconv_chess_white.c @@ -12,6 +12,9 @@ libcharconv_chess_white(const char *s, size_t slen, size_t *n, uint_least32_t *c PLAIN_SELECT("KQRBNP", 0x265A); PLAIN_SINGLE('e', 0x1FA48); PLAIN_SINGLE('E', 0x1FA49); + PLAIN_SELECT("=-.|ij", 0x2BF9); + PLAIN_SELECT("012", 0x2BBA); + } no_conv: return LIBCHARCONV_NO_CONVERT; diff --git a/libcharconv_flipped.c b/libcharconv_flipped.c index 3da791c..05de818 100644 --- a/libcharconv_flipped.c +++ b/libcharconv_flipped.c @@ -22,7 +22,52 @@ static struct { {UINT32_C(0x23C9), UINT32_C(0x23CA)}, {UINT32_C(0x23C1), UINT32_C(0x23C2)}, {UINT32_C(0x238F), UINT32_C(0x2390)}, - {UINT32_C(0x2391), UINT32_C(0x2392)} + {UINT32_C(0x2391), UINT32_C(0x2392)}, + {UINT32_C(0x2571), UINT32_C(0x2572)}, + {UINT32_C(0x1FBA2), UINT32_C(0x1FBA0)}, + {UINT32_C(0x1FBA3), UINT32_C(0x1FBA1)}, + {UINT32_C(0x1FBA6), UINT32_C(0x1FBA7)}, + {UINT32_C(0x1FBA8), UINT32_C(0x1FBA9)}, + {UINT32_C(0x1FBAA), UINT32_C(0x1FBAC)}, + {UINT32_C(0x1FBAB), UINT32_C(0x1FBAD)}, + {UINT32_C(0x250C), UINT32_C(0x2514)}, + {UINT32_C(0x250D), UINT32_C(0x2515)}, + {UINT32_C(0x250E), UINT32_C(0x2516)}, + {UINT32_C(0x250F), UINT32_C(0x2517)}, + {UINT32_C(0x2510), UINT32_C(0x2518)}, + {UINT32_C(0x2511), UINT32_C(0x2519)}, + {UINT32_C(0x2512), UINT32_C(0x251A)}, + {UINT32_C(0x2513), UINT32_C(0x251B)}, + {UINT32_C(0x251F), UINT32_C(0x251E)}, + {UINT32_C(0x2522), UINT32_C(0x2521)}, + {UINT32_C(0x2527), UINT32_C(0x2526)}, + {UINT32_C(0x252A), UINT32_C(0x2529)}, + {UINT32_C(0x252C), UINT32_C(0x2534)}, + {UINT32_C(0x252D), UINT32_C(0x2535)}, + {UINT32_C(0x252E), UINT32_C(0x2536)}, + {UINT32_C(0x252F), UINT32_C(0x2537)}, + {UINT32_C(0x2530), UINT32_C(0x2538)}, + {UINT32_C(0x2531), UINT32_C(0x2539)}, + {UINT32_C(0x2532), UINT32_C(0x253A)}, + {UINT32_C(0x2533), UINT32_C(0x253B)}, + {UINT32_C(0x2541), UINT32_C(0x2540)}, + {UINT32_C(0x2545), UINT32_C(0x2543)}, + {UINT32_C(0x2546), UINT32_C(0x2544)}, + {UINT32_C(0x2548), UINT32_C(0x2547)}, + {UINT32_C(0x2552), UINT32_C(0x2558)}, + {UINT32_C(0x2553), UINT32_C(0x2559)}, + {UINT32_C(0x2554), UINT32_C(0x255A)}, + {UINT32_C(0x2555), UINT32_C(0x255B)}, + {UINT32_C(0x2556), UINT32_C(0x255C)}, + {UINT32_C(0x2557), UINT32_C(0x255D)}, + {UINT32_C(0x2564), UINT32_C(0x2567)}, + {UINT32_C(0x2565), UINT32_C(0x2568)}, + {UINT32_C(0x2566), UINT32_C(0x2569)}, + {UINT32_C(0x256D), UINT32_C(0x2570)}, + {UINT32_C(0x256E), UINT32_C(0x256F)}, + {UINT32_C(0x2575), UINT32_C(0x2577)}, + {UINT32_C(0x2579), UINT32_C(0x257B)}, + {UINT32_C(0x257D), UINT32_C(0x257F)} }; @@ -84,6 +129,12 @@ libcharconv_flipped(const char *s, size_t slen, size_t *n, uint_least32_t *cp, s | ((c & 0x08u) << 4) | ((c & 0x80u) >> 4) | UINT32_C(0x2800); goto conv; + } else if (UINT32_C(0x1681) <= c && c <= UINT32_C(0x1685)) { + c += 5u; + goto conv; + } else if (UINT32_C(0x1686) <= c && c <= UINT32_C(0x168A)) { + c -= 5u; + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { diff --git a/libcharconv_joined.c b/libcharconv_joined.c index e8e0a59..e8ab45f 100644 --- a/libcharconv_joined.c +++ b/libcharconv_joined.c @@ -11,6 +11,8 @@ static struct { {(uint_least32_t)'?', (uint_least32_t)'?', UINT32_C(0x2047)}, {(uint_least32_t)'!', (uint_least32_t)'!', UINT32_C(0x203C)}, {(uint_least32_t)'!', (uint_least32_t)'?', UINT32_C(0x2049)}, + {(uint_least32_t)':', (uint_least32_t)':', UINT32_C(0x2E2C)}, + {(uint_least32_t)'|', (uint_least32_t)'|', UINT32_C(0x2016)}, {UINT32_C(0x23CB), UINT32_C(0x23BE), UINT32_C(0x23C9)}, {UINT32_C(0x23CC), UINT32_C(0x23BF), UINT32_C(0x23CA)} }; @@ -19,7 +21,7 @@ static struct { enum libcharconv_result libcharconv_joined(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { - uint_least32_t a, b; + uint_least32_t a, b, c; size_t i, alen, blen; *n = 0; while (slen) { @@ -36,6 +38,46 @@ libcharconv_joined(const char *s, size_t slen, size_t *n, uint_least32_t *cp, si continue; } + if (UINT32_C(0x1681) <= a && a <= UINT32_C(0x1694)) { + if (*n) + goto no_conv; + if (slen == alen) + return LIBCHARCONV_INDETERMINATE; + blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b); + if (blen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!blen) + goto no_conv; + if (UINT32_C(0x1681) > b || b > UINT32_C(0x1694)) + goto no_conv_consume; + if ((a - UINT32_C(0x1681)) / 5u != (b - UINT32_C(0x1681)) / 5u) + goto no_conv_consume; + c = (a - UINT32_C(0x1681)) % 5u + (b - UINT32_C(0x1681)) % 5u + 1u; + if (c >= 5u) + goto no_conv_consume; + a = c += (a - UINT32_C(0x1681)) / 5u * 5u + UINT32_C(0x1681); + alen += blen; + for (;;) { + if (slen == alen) + goto conv_if_end_calc; + blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b); + if (blen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!blen) + goto conv_calc; + if (UINT32_C(0x1681) > b || b > UINT32_C(0x1694)) + goto conv_calc; + if ((a - UINT32_C(0x1681)) / 5u != (b - UINT32_C(0x1681)) / 5u) + goto conv_calc; + b = (a - UINT32_C(0x1681)) % 5u + (b - UINT32_C(0x1681)) % 5u + 1u; + if (b >= 5u) + goto conv_calc; + a = c = b + (a - UINT32_C(0x1681)) / 5u * 5u + UINT32_C(0x1681); + alen += blen; + } + goto conv_calc; + } + for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (a != pairs[i].a) continue; @@ -59,12 +101,34 @@ libcharconv_joined(const char *s, size_t slen, size_t *n, uint_least32_t *cp, si no_conv: return LIBCHARCONV_NO_CONVERT; +no_conv_consume: + *n += alen; + return LIBCHARCONV_NO_CONVERT; + conv: if (*n) goto no_conv; if (*ncp) - *cp = pairs[i].to; + *cp = c = pairs[i].to; *n += alen + blen; *ncp = 1u; return LIBCHARCONV_CONVERTED; + +conv_calc: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += alen; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv_if_end_calc: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += alen; + *ncp = 1u; + return LIBCHARCONV_CONVERT_IF_END; } diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 49de445..8be47c8 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -5,6 +5,7 @@ enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { + char csbuf[8]; enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; char c1, c2, c3, c4, c5, c6; @@ -218,20 +219,20 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz *n += clen; if (!slen) { ret = LIBCHARCONV_CONVERT_IF_END; - goto budih_conv; + goto buhid_conv; } clen = libcharconv_decode_utf8_(s, slen, &c); if (clen > slen) return LIBCHARCONV_INDETERMINATE; if (!clen) - goto budih_conv; + goto buhid_conv; switch (c) { case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; default: break; } - budih_conv: + buhid_conv: if (c1 == 'G') { c3 = c2; c2 = 'g'; @@ -492,6 +493,14 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz /* chess */ c = (uint_least32_t)"eEe"[(c - UINT32_C(0x1FA48)) % 3u]; goto conv; + } else if (UINT32_C(0x2BF9) <= c && c <= UINT32_C(0x2BFE)) { + /* chess */ + c = (uint_least32_t)"=-.|ij"[c - UINT32_C(0x2BF9)]; + goto conv; + } else if (UINT32_C(0x2BBA) <= c && c <= UINT32_C(0x2BBC)) { + /* chess */ + c = (uint_least32_t)"012"[c - UINT32_C(0x2BBA)]; + goto conv; } else if (UINT32_C(0x2460) <= c && c <= UINT32_C(0x2468)) { /* enclosed (positive) */ @@ -747,6 +756,127 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz } goto conv2_prechecked; + } else if (UINT32_C(0x1763) <= c && c <= UINT32_C(0x1770)) { + /* tagbanwa */ + c1 = "kgGtdnpbmylws"[c - UINT32_C(0x1763)]; + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto conv2_prechecked; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto conv2_prechecked; + switch (c) { + case UINT32_C(0x1772): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1773): c2 = 'u'; *n += clen; break; + default: + break; + } + goto conv2_prechecked; + + } else if (UINT32_C(0x11C72) <= c && c <= UINT32_C(0x11C8F)) { + /* marchen */ + char root; + char vowel = 0; + char subjoined = 0; + if (*n) + goto no_conv; + root = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C72)]; + for (;;) { + s = &s[clen]; + *n += clen; + if (subjoined && vowel) + break; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + break; + } + clen = libcharconv_decode_utf8_(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + break; + slen -= clen; + if (!subjoined && UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF)) + subjoined = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)]; + else if (!vowel && UINT32_C(0x11CB0) <= c && c <= UINT32_C(0x11CB4)) + vowel = "Aiueo"[c - UINT32_C(0x11CB0)]; + else + break; + } + if (root == 'a') + root = '\0'; + if (!vowel) + vowel = 'a'; + i = 0u; + if (root) + csbuf[i++] = root; + if (subjoined && !root) { + csbuf[i++] = vowel; + csbuf[i++] = '_'; + vowel = '\0'; + } else if (subjoined == 'a') { + if (vowel) + csbuf[i++] = vowel; + csbuf[i++] = '_'; + vowel = '\0'; + } + if (subjoined) + csbuf[i++] = subjoined; + if (vowel) + csbuf[i++] = vowel; + csbuf[i] = '\0'; + cs = csbuf; + goto conv_str_prechecked; + } else if (UINT32_C(0x11C92) <= c && c <= UINT32_C(0x11CAF)) { + /* marchen */ + c1 = '_'; + c2 = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsha"[c - UINT32_C(0x11C92)]; + goto conv2; + + } else if (UINT32_C(0x1F007) <= c && c <= UINT32_C(0x1F021)) { + /* mahjong tiles */ + c1 = "cbo"[(c - UINT32_C(0x1F007)) / 9u]; + c2 = (char)((c - UINT32_C(0x1F007)) % 9u + (unsigned)'1'); + goto conv2; + + } else if (UINT32_C(0x1680) <= c && c <= UINT32_C(0x169C)) { + /* ogham */ + c1 = " blfsnhdtcqaoueimgGzrEOUIAp><"[c - UINT32_C(0x1680)]; + goto conv1; + + } else if (UINT32_C(0x11150) <= c && c <= UINT32_C(0x11176)) { + /* mahajani */ + c1 = "aiueokkggccjjYTTDDNttddnppbbmrlvsHR.'|s"[c - UINT32_C(0x11150)]; + c2 = " h h h h h h h h h h r"[c - UINT32_C(0x11150)]; + if (c2 == ' ') + goto conv1; + goto conv2; + + } else if (UINT32_C(0x1FBA0) <= c && c <= UINT32_C(0x1FBAE)) { + /* box drawing */ + c = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[c & 0xF]; + i = 0u; + if (c & 0x01u) + csbuf[i++] = '1'; + if (c & 0x02u) + csbuf[i++] = '2'; + if (c & 0x04u) + csbuf[i++] = '3'; + if (c & 0x08u) + csbuf[i++] = '4'; + csbuf[i++] = 'O'; + csbuf[i] = '\0'; + cs = csbuf; + goto conv_str; + } else { use_switch: switch (c) { @@ -901,12 +1031,22 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x1FA51): c1 = 'N'; c2 = 'Q'; goto conv2; case UINT32_C(0x1FA52): c1 = 'N'; c2 = 'R'; goto conv2; case UINT32_C(0x1FA53): c1 = 'N'; c2 = 'B'; goto conv2; + case UINT32_C(0x00A7): c1 = 'S'; c2 = 'S'; goto conv2; + case UINT32_C(0x2E16): c1 = '>'; c2 = ':'; goto conv2; + + /* joined + overlaid */ + case UINT32_C(0x205C): c1 = '+'; c2 = ':'; c3 = ':'; goto conv3; + + /* joined + overlaid + rotated 45 degrees */ + case UINT32_C(0x203B): c1 = 'x'; c2 = ':'; c3 = ':'; goto conv3; /* joined */ case UINT32_C(0x2048): c1 = '?'; c2 = '!'; goto conv2; case UINT32_C(0x2047): c1 = '?'; c2 = '?'; goto conv2; case UINT32_C(0x203C): c1 = '!'; c2 = '!'; goto conv2; case UINT32_C(0x2049): c1 = '!'; c2 = '?'; goto conv2; + case UINT32_C(0x2E2C): c1 = ':'; c2 = ':'; goto conv2; + case UINT32_C(0x2016): c1 = '|'; c2 = '|'; goto conv2; /* mirrored */ case UINT32_C(0x204F): c = UINT32_C(0x003B); goto conv; @@ -1169,6 +1309,8 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz /* stacked */ case UINT32_C(0x2051): c1 = '*'; c2 = '*'; goto conv2; case UINT32_C(0x2E49): c1 = ','; c2 = ','; goto conv2; + case UINT32_C(0x2E40): c1 = '-'; c2 = '-'; goto conv2; + case UINT32_C(0x2017): c1 = '_'; c2 = '_'; goto conv2; /* gothic */ case UINT32_C(0x10341): c1 = '9'; c2 = '0'; goto conv2; @@ -1196,6 +1338,58 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x11EF7): c1 = '.'; goto conv1; case UINT32_C(0x11EF8): c1 = '|'; goto conv1; + /* tagbanwa */ + case UINT32_C(0x1760): c1 = 'a'; goto conv1; + case UINT32_C(0x1761): c1 = 'i'; goto conv1; + case UINT32_C(0x1762): c1 = 'u'; goto conv1; + case UINT32_C(0x1772): c1 = 'i'; goto combining; + case UINT32_C(0x1773): c1 = 'u'; goto combining; + + /* marchen */ + case UINT32_C(0x11CB5): c1 = '\''; goto conv1; + case UINT32_C(0x11CB6): c1 = '\"'; goto conv1; + case UINT32_C(0x11C70): c1 = '.'; goto conv1; + case UINT32_C(0x11C71): c1 = ':'; goto conv1; + case UINT32_C(0x11CB0): c1 = 'A'; goto combining; + case UINT32_C(0x11CB1): c1 = 'i'; goto combining; + case UINT32_C(0x11CB2): c1 = 'u'; goto combining; + case UINT32_C(0x11CB3): c1 = 'e'; goto combining; + case UINT32_C(0x11CB4): c1 = 'o'; goto combining; + + /* mahjong tiles */ + case UINT32_C(0x1F000): c1 = 'e'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F001): c1 = 's'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F002): c1 = 'w'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F003): c1 = 'n'; c2 = 'w'; goto conv2; + case UINT32_C(0x1F004): c1 = 'r'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F005): c1 = 'g'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F006): c1 = 'w'; c2 = 'd'; goto conv2; + case UINT32_C(0x1F022): c1 = 'p'; goto conv1; + case UINT32_C(0x1F023): c1 = 'o'; goto conv1; + case UINT32_C(0x1F024): c1 = 'b'; goto conv1; + case UINT32_C(0x1F025): c1 = 'c'; goto conv1; + case UINT32_C(0x1F026): c1 = 's'; c2 = 'p'; goto conv2; + case UINT32_C(0x1F027): c1 = 's'; c2 = 'u'; goto conv2; + case UINT32_C(0x1F028): c1 = 'a'; c2 = 'u'; goto conv2; + case UINT32_C(0x1F029): c1 = 'w'; c2 = 'i'; goto conv2; + case UINT32_C(0x1F02A): c1 = 'j'; goto conv1; + case UINT32_C(0x1F02B): c1 = '#'; goto conv1; + + /* box drawing */ + case UINT32_C(0x2571): c1 = '/'; goto conv1; + case UINT32_C(0x2572): c1 = '\\'; goto conv1; + case UINT32_C(0x2573): c1 = 'X'; goto conv1; + + /* lowered */ + case UINT32_C(0x204E): c1 = '*'; goto conv1; + + /* raised */ + case UINT32_C(0x2E33): c1 = '.'; goto conv1; + case UINT32_C(0x2E34): c1 = ','; goto conv1; + + /* raised + turned */ + case UINT32_C(0x2E32): c1 = ','; goto conv1; + default: no_match: *n += clen; @@ -1303,6 +1497,7 @@ conv_str: if (*n) goto no_conv; *n += clen; +conv_str_prechecked: i = 0u; for (; cs[i]; i++) if (*ncp > i) diff --git a/libcharconv_lowered.c b/libcharconv_lowered.c new file mode 100644 index 0000000..a0688e1 --- /dev/null +++ b/libcharconv_lowered.c @@ -0,0 +1,24 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_lowered(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_SINGLE('*', 0x204E); + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_mahajani.c b/libcharconv_mahajani.c new file mode 100644 index 0000000..4320e60 --- /dev/null +++ b/libcharconv_mahajani.c @@ -0,0 +1,36 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_mahajani(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + enum libcharconv_result ret = LIBCHARCONV_CONVERTED; + uint_least32_t c; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_SELECT("aiueokkggccjjYTTDDNttddnppbbmrlvsHR.'|s", 0x11150); + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (strchr("kgcjTDtdpbs", *s)) { + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + } else if (s[0] != 's' && s[1] == 'h') { + c += 1u; + *n += 1u; + } else if (s[0] == 's' && s[1] == 'r') { + c = UINT32_C(0x11176); + *n += 1u; + } + } + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return ret; +} diff --git a/libcharconv_mahjong_tiles.c b/libcharconv_mahjong_tiles.c new file mode 100644 index 0000000..7bc60ba --- /dev/null +++ b/libcharconv_mahjong_tiles.c @@ -0,0 +1,131 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_mahjong_tiles(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_CASE_SELECT("pobc", 0x1F022); + PLAIN_CASE_SELECT("j#", 0x1F02A); + if (tolower(s[0]) == 'e') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'w') { + c = UINT32_C(0x1F000); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 's') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'w') { + c = UINT32_C(0x1F001); + goto conv2; + } else if (tolower(s[1]) == 'p') { + c = UINT32_C(0x1F026); + goto conv2; + } else if (tolower(s[1]) == 'u') { + c = UINT32_C(0x1F027); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 'w') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'w') { + c = UINT32_C(0x1F002); + goto conv2; + } else if (tolower(s[1]) == 'd') { + c = UINT32_C(0x1F006); + goto conv2; + } else if (tolower(s[1]) == 'i') { + c = UINT32_C(0x1F029); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 'n') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'w') { + c = UINT32_C(0x1F003); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 'r') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'd') { + c = UINT32_C(0x1F004); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 'g') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'd') { + c = UINT32_C(0x1F005); + goto conv2; + } + goto no_conv; + } + if (tolower(s[0]) == 'a') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'u') { + c = UINT32_C(0x1F028); + goto conv2; + } + goto no_conv; + } + if ('1' <= s[0] && s[0] <= '9') { + if (!slen) + goto indeterminate; + if (tolower(s[1]) == 'c') + c = UINT32_C(0x1F007); + else if (tolower(s[1]) == 'b') + c = UINT32_C(0x1F010); + else if (tolower(s[1]) == 'o') + c = UINT32_C(0x1F019); + else + goto no_conv; + c += (uint_least32_t)(s[0] - '1'); + goto conv2; + } + } + return LIBCHARCONV_NO_CONVERT; + +no_conv: + *n += 1u; + return LIBCHARCONV_NO_CONVERT; + +indeterminate: + if (*n) + goto no_conv; + return LIBCHARCONV_INDETERMINATE; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_marchen.c b/libcharconv_marchen.c new file mode 100644 index 0000000..12fc414 --- /dev/null +++ b/libcharconv_marchen.c @@ -0,0 +1,134 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_marchen(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + static const char *const letters = "kKgGcCjYtTdnpPbmxXDwZz-yrlSsh"; + uint_least32_t c, c2, c3; + size_t i; + *n = 0; + for (; slen--; s++) { + PLAIN_SELECT(letters, 0x11C72); + switch (s[0]) { + case 'a': c = UINT32_C(0x11C8F); goto conv1_subjoinable; + case 'A': c = UINT32_C(0x11CB0); goto conv_vowel; + case 'i': c = UINT32_C(0x11CB1); goto conv_vowel; + case 'u': c = UINT32_C(0x11CB2); goto conv_vowel; + case 'e': c = UINT32_C(0x11CB3); goto conv_vowel; + case 'o': c = UINT32_C(0x11CB4); goto conv_vowel; + case '\'': c = UINT32_C(0x11CB5); goto conv1; + case '\"': c = UINT32_C(0x11CB6); goto conv1; + case '.': c = UINT32_C(0x11C70); goto conv1; + case ':': c = UINT32_C(0x11C71); goto conv1; + case '^': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': c = UINT32_C(0x11CB0); goto conv2; + case 'i': c = UINT32_C(0x11CB1); goto conv2; + case 'u': c = UINT32_C(0x11CB2); goto conv2; + case 'e': c = UINT32_C(0x11CB3); goto conv2; + case 'o': c = UINT32_C(0x11CB4); goto conv2; + default: + goto no_match; + } + case '_': + if (*n) + goto no_conv; + *n += 1u; + break; + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n == 1u && s[-1] == '_' && c != UINT32_C(0x11C88)) { + c += 0x20u; + goto conv1_prechecked; + } + if (*n) + goto no_conv; + if (!slen) + return LIBCHARCONV_INDETERMINATE; + for (i = 0u; letters[i]; i++) + if (letters[i] == s[1]) + break; + c3 = (letters[i] && letters[i] != '-') ? (uint_least32_t)(UINT32_C(0x11C92) + i) : 0u; + if (c3 && slen == 1u) + return LIBCHARCONV_INDETERMINATE; + switch (s[c3 ? 2 : 1]) { + case 'a': + if (c3) + goto conv3_prechecked; + else + goto conv2_prechecked; + case 'A': c2 = UINT32_C(0x11CB0); break; + case 'i': c2 = UINT32_C(0x11CB1); break; + case 'u': c2 = UINT32_C(0x11CB2); break; + case 'e': c2 = UINT32_C(0x11CB3); break; + case 'o': c2 = UINT32_C(0x11CB4); break; + default: + goto no_match; + } + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = c2; + if (c3 && *ncp >= 3u) + cp[2] = c3; + *n += c3 ? 3u : 2u; + *ncp = c3 ? 3u : 2u; + return LIBCHARCONV_CONVERTED; + +conv1_subjoinable: + if (*n == 1u && s[-1] == '_') { + c += 0x20u; + } else { + conv1: + if (*n) + goto no_conv; + } +conv1_prechecked: + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; +conv2_prechecked: + if (*ncp) + cp[0] = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv_vowel: + if (*n) + goto no_conv; + if (*ncp >= 1u) + cp[0] = UINT32_C(0x11C8F) + (s[-1] == '_' ? 0x20u : 0u); + if (*ncp >= 2u) + cp[1] = c; + *n += 1u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + +conv3_prechecked: + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = c3; + *n += 3u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_mirrored.c b/libcharconv_mirrored.c index 4c13c43..dc75fb7 100644 --- a/libcharconv_mirrored.c +++ b/libcharconv_mirrored.c @@ -21,7 +21,47 @@ static struct { {UINT32_C(0x230C), UINT32_C(0x230D)}, {UINT32_C(0x230E), UINT32_C(0x230F)}, {UINT32_C(0x23BE), UINT32_C(0x23CB)}, - {UINT32_C(0x23BF), UINT32_C(0x23CC)} + {UINT32_C(0x23BF), UINT32_C(0x23CC)}, + {UINT32_C(0x169B), UINT32_C(0x169C)}, + {UINT32_C(0x2571), UINT32_C(0x2572)}, + {UINT32_C(0x250C), UINT32_C(0x2510)}, + {UINT32_C(0x250D), UINT32_C(0x2511)}, + {UINT32_C(0x250E), UINT32_C(0x2512)}, + {UINT32_C(0x250F), UINT32_C(0x2513)}, + {UINT32_C(0x2514), UINT32_C(0x2518)}, + {UINT32_C(0x2515), UINT32_C(0x2519)}, + {UINT32_C(0x2516), UINT32_C(0x251A)}, + {UINT32_C(0x2517), UINT32_C(0x251B)}, + {UINT32_C(0x251C), UINT32_C(0x2524)}, + {UINT32_C(0x251D), UINT32_C(0x2525)}, + {UINT32_C(0x251E), UINT32_C(0x2526)}, + {UINT32_C(0x251F), UINT32_C(0x2527)}, + {UINT32_C(0x2520), UINT32_C(0x2528)}, + {UINT32_C(0x2521), UINT32_C(0x2529)}, + {UINT32_C(0x2522), UINT32_C(0x252A)}, + {UINT32_C(0x2523), UINT32_C(0x252B)}, + {UINT32_C(0x252E), UINT32_C(0x252D)}, + {UINT32_C(0x2532), UINT32_C(0x2531)}, + {UINT32_C(0x2536), UINT32_C(0x2535)}, + {UINT32_C(0x253A), UINT32_C(0x2539)}, + {UINT32_C(0x253E), UINT32_C(0x253D)}, + {UINT32_C(0x2544), UINT32_C(0x2543)}, + {UINT32_C(0x2546), UINT32_C(0x2545)}, + {UINT32_C(0x254A), UINT32_C(0x2549)}, + {UINT32_C(0x2552), UINT32_C(0x2555)}, + {UINT32_C(0x2553), UINT32_C(0x2556)}, + {UINT32_C(0x2554), UINT32_C(0x2557)}, + {UINT32_C(0x2558), UINT32_C(0x255B)}, + {UINT32_C(0x2559), UINT32_C(0x255C)}, + {UINT32_C(0x255A), UINT32_C(0x255D)}, + {UINT32_C(0x255E), UINT32_C(0x2561)}, + {UINT32_C(0x255F), UINT32_C(0x2562)}, + {UINT32_C(0x2560), UINT32_C(0x2563)}, + {UINT32_C(0x256D), UINT32_C(0x256E)}, + {UINT32_C(0x2570), UINT32_C(0x256F)}, + {UINT32_C(0x2574), UINT32_C(0x2576)}, + {UINT32_C(0x2578), UINT32_C(0x257A)}, + {UINT32_C(0x257C), UINT32_C(0x257E)}, }; @@ -55,6 +95,9 @@ libcharconv_mirrored(const char *s, size_t slen, size_t *n, uint_least32_t *cp, | ((c & 0x40u) << 1) | ((c & 0x80u) >> 1) | UINT32_C(0x2800); goto conv; + } else if (UINT32_C(0x1FBA0) <= c && c <= UINT32_C(0x1FBAD)) { + c ^= 1u; + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { diff --git a/libcharconv_ogham.c b/libcharconv_ogham.c new file mode 100644 index 0000000..222f802 --- /dev/null +++ b/libcharconv_ogham.c @@ -0,0 +1,31 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_ogham(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_SELECT(" blfsnhdtcqaoueimgGzrEOUIAp><", 0x1680); + PLAIN_SINGLE('-', 0x1680); + PLAIN_SINGLE('\'', 0x1686); + PLAIN_SINGLE('\"', 0x1687); + PLAIN_SINGLE(',', 0x1681); + PLAIN_SINGLE('/', 0x168B); + PLAIN_SINGLE('|', 0x1690); + PLAIN_SELECT("XO@W#=", 0x1695); /* O here matches O above */ + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_overlaid.c b/libcharconv_overlaid.c index 05cf0d3..5b738b9 100644 --- a/libcharconv_overlaid.c +++ b/libcharconv_overlaid.c @@ -55,14 +55,26 @@ static struct { {UINT32_C(0x23CA), UINT32_C(0x23C6), UINT32_C(0x23C8)}, {(uint_least32_t)'-', UINT32_C(0x238F), UINT32_C(0x2390)}, {(uint_least32_t)'-', UINT32_C(0x2391), UINT32_C(0x2392)}, - {(uint_least32_t)'-', (uint_least32_t)'~', UINT32_C(0x23E6)} + {(uint_least32_t)'-', (uint_least32_t)'~', UINT32_C(0x23E6)}, + {UINT32_C(0x1681), UINT32_C(0x1686), UINT32_C(0x168B)}, + {UINT32_C(0x1682), UINT32_C(0x1687), UINT32_C(0x168C)}, + {UINT32_C(0x1683), UINT32_C(0x1688), UINT32_C(0x168D)}, + {UINT32_C(0x1684), UINT32_C(0x1689), UINT32_C(0x168E)}, + {UINT32_C(0x1685), UINT32_C(0x168A), UINT32_C(0x168F)}, + {UINT32_C(0x2571), UINT32_C(0x2572), UINT32_C(0x2573)}, + {(uint_least32_t)'S', (uint_least32_t)'S', UINT32_C(0x00A7)}, + {(uint_least32_t)'s', (uint_least32_t)'s', UINT32_C(0x00A7)}, + {UINT32_C(0x2E2C), (uint_least32_t)'+', UINT32_C(0x205C)}, + {UINT32_C(0x2058), (uint_least32_t)'x', UINT32_C(0x203B)}, + {UINT32_C(0x2058), (uint_least32_t)'X', UINT32_C(0x203B)}, + {(uint_least32_t)'>', (uint_least32_t)':', UINT32_C(0x2E16)} }; enum libcharconv_result libcharconv_overlaid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { - uint_least32_t a, b; + uint_least32_t a, b, c; size_t i, alen, blen; *n = 0; while (slen) { @@ -79,6 +91,44 @@ libcharconv_overlaid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, continue; } + if (UINT32_C(0x1FBA0) <= a && a <= UINT32_C(0x1FBAE)) { + if (*n) + goto no_conv; + if (slen == alen) + return LIBCHARCONV_INDETERMINATE; + blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b); + if (blen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!blen) + goto no_conv; + if (UINT32_C(0x1FBA0) > b || b > UINT32_C(0x1FBAE)) + goto no_conv_consume; + a = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[a & 0xF]; + b = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[b & 0xF]; + c = (uint_least32_t)" \x00\x01\x07\x02\x04\x09\x0D\x03\x08\x05\x0C\x06\x0B\x0A\x0E"[a | b]; + a = c |= UINT32_C(0x1FBA0); + alen += blen; + for (;;) { + if (slen == alen) + goto conv_if_end_calc; + blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b); + if (blen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!blen) + goto conv_calc; + if (UINT32_C(0x1FBA0) > b || b > UINT32_C(0x1FBAE)) + goto conv_calc; + a &= 0xF; + b &= 0xF; + a = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[a & 0xF]; + b = (uint_least32_t)"\x01\x02\x04\x08\x05\x0A\x0C\x03\x09\x06\x0E\x0D\x0B\x07\x0F"[b & 0xF]; + c = (uint_least32_t)" \x00\x01\x07\x02\x04\x09\x0D\x03\x08\x05\x0C\x06\x0B\x0A\x0E"[a | b]; + a = c |= UINT32_C(0x1FBA0); + alen += blen; + } + goto conv_calc; + } + for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (a != pairs[i].a && a != pairs[i].b) continue; @@ -102,6 +152,10 @@ libcharconv_overlaid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, no_conv: return LIBCHARCONV_NO_CONVERT; +no_conv_consume: + *n += alen; + return LIBCHARCONV_NO_CONVERT; + conv: if (*n) goto no_conv; @@ -110,4 +164,22 @@ conv: *n += alen + blen; *ncp = 1u; return LIBCHARCONV_CONVERTED; + +conv_calc: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += alen; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv_if_end_calc: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += alen; + *ncp = 1u; + return LIBCHARCONV_CONVERT_IF_END; } diff --git a/libcharconv_raised.c b/libcharconv_raised.c new file mode 100644 index 0000000..8970b00 --- /dev/null +++ b/libcharconv_raised.c @@ -0,0 +1,25 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_raised(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++, ++*n) { + PLAIN_SINGLE('.', 0x2E33); + PLAIN_SINGLE(',', 0x2E34); + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_rotated_45deg_cw.c b/libcharconv_rotated_45deg_cw.c index 2780781..366b43e 100644 --- a/libcharconv_rotated_45deg_cw.c +++ b/libcharconv_rotated_45deg_cw.c @@ -15,7 +15,9 @@ static struct { } pairs[] = { CYCLE_8(0x1FA04, 0x1FA08, 0x1FA19, 0x1FA1D, 0x1FA2E, 0x1FA32, 0x1FA43, 0x1FA47), CYCLE_8(0x02658, 0x1FA06, 0x1FA0D, 0x1FA1B, 0x1FA22, 0x1FA30, 0x1FA37, 0x1FA45), - CYCLE_8(0x0265E, 0x1FA07, 0x1FA13, 0x1FA1C, 0x1FA28, 0x1FA31, 0x1FA3D, 0x1FA46) + CYCLE_8(0x0265E, 0x1FA07, 0x1FA13, 0x1FA1C, 0x1FA28, 0x1FA31, 0x1FA3D, 0x1FA46), + CYCLE_2(0x2E2C, 0x2058), + CYCLE_2(0x203B, 0x205C) }; diff --git a/libcharconv_rotated_90deg_cw.c b/libcharconv_rotated_90deg_cw.c index 7b23dba..32296f9 100644 --- a/libcharconv_rotated_90deg_cw.c +++ b/libcharconv_rotated_90deg_cw.c @@ -38,7 +38,47 @@ static struct { CYCLE_4(0x265C, 0x1FA11, 0x1FA26, 0x1FA3B), CYCLE_4(0x265D, 0x1FA12, 0x1FA27, 0x1FA3C), CYCLE_4(0x265F, 0x1FA14, 0x1FA29, 0x1FA3E), - {UINT32_C(0x213A), (uint_least32_t)'Q'} + {UINT32_C(0x213A), (uint_least32_t)'Q'}, + CYCLE_4(0x1FBA0, 0x1FBA1, 0x1FBA3, 0x1FBA2), + CYCLE_4(0x1FBAA, 0x1FBAB, 0x1FBAD, 0x1FBAC), + CYCLE_4(0x1FBA4, 0x1FBA7, 0x1FBA5, 0x1FBA6), + CYCLE_2(0x1FBA8, 0x1FBA9), + CYCLE_2(0x2500, 0x2502), + CYCLE_2(0x2501, 0x2503), + CYCLE_2(0x2504, 0x2506), + CYCLE_2(0x2505, 0x2507), + CYCLE_2(0x2508, 0x250A), + CYCLE_2(0x2509, 0x250B), + CYCLE_4(0x250C, 0x2510, 0x2518, 0x2514), + CYCLE_4(0x250D, 0x2511, 0x2519, 0x2515), + CYCLE_4(0x250E, 0x2512, 0x251A, 0x2516), + CYCLE_4(0x250F, 0x2513, 0x251B, 0x2517), + CYCLE_4(0x251F, 0x252D, 0x2526, 0x2536), + CYCLE_4(0x2522, 0x2531, 0x2529, 0x253A), + CYCLE_4(0x2527, 0x2535, 0x251E, 0x252E), + CYCLE_4(0x252A, 0x2539, 0x2521, 0x2532), + CYCLE_4(0x2534, 0x251C, 0x252C, 0x2524), + CYCLE_4(0x2537, 0x251D, 0x252F, 0x2525), + CYCLE_4(0x2538, 0x2520, 0x2530, 0x2528), + CYCLE_4(0x253B, 0x2523, 0x2533, 0x252B), + CYCLE_2(0x253F, 0x2542), + CYCLE_4(0x2541, 0x253D, 0x2540, 0x253E), + CYCLE_4(0x2546, 0x2545, 0x2543, 0x2544), + CYCLE_4(0x2548, 0x2549, 0x2547, 0x254A), + CYCLE_2(0x254C, 0x254E), + CYCLE_2(0x254D, 0x254F), + CYCLE_2(0x2550, 0x2551), + CYCLE_4(0x2552, 0x2556, 0x255B, 0x2559), + CYCLE_4(0x2553, 0x2555, 0x255C, 0x2558), + CYCLE_4(0x2554, 0x2557, 0x255D, 0x255A), + CYCLE_4(0x255E, 0x2565, 0x2561, 0x2568), + CYCLE_4(0x2564, 0x2562, 0x2567, 0x255F), + CYCLE_4(0x2566, 0x2563, 0x2569, 0x2560), + CYCLE_2(0x256A, 0x256B), + CYCLE_4(0x256D, 0x256E, 0x256F, 0x2570), + CYCLE_4(0x2574, 0x2575, 0x2576, 0x2577), + CYCLE_4(0x2578, 0x2579, 0x257A, 0x257B), + CYCLE_4(0x257C, 0x257D, 0x257E, 0x257F), }; diff --git a/libcharconv_stacked.c b/libcharconv_stacked.c index 6f89834..13fc561 100644 --- a/libcharconv_stacked.c +++ b/libcharconv_stacked.c @@ -8,7 +8,9 @@ static struct { uint_least32_t to; } pairs[] = { {(uint_least32_t)'*', (uint_least32_t)'*', UINT32_C(0x2051)}, - {(uint_least32_t)',', (uint_least32_t)',', UINT32_C(0x2E49)} + {(uint_least32_t)',', (uint_least32_t)',', UINT32_C(0x2E49)}, + {(uint_least32_t)'-', (uint_least32_t)'-', UINT32_C(0x2E40)}, + {(uint_least32_t)'_', (uint_least32_t)'_', UINT32_C(0x2017)} }; diff --git a/libcharconv_tagbanwa.c b/libcharconv_tagbanwa.c new file mode 100644 index 0000000..fd23d8c --- /dev/null +++ b/libcharconv_tagbanwa.c @@ -0,0 +1,84 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_tagbanwa(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + PLAIN_SELECT("kgGtdnpbmylws", 0x1763); + switch (s[0]) { + case 'a': c = UINT32_C(0x1760); goto conv1; + case 'i': c = UINT32_C(0x1761); goto conv1; + case 'u': c = UINT32_C(0x1762); goto conv1; + case '^': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'i': c = UINT32_C(0x1772); goto conv2; + case 'u': c = UINT32_C(0x1773); goto conv2; + default: + goto no_match; + } + goto no_match; + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'a': + if (*ncp >= 1u) + cp[0] = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + case 'i': + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = UINT32_C(0x1772); + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + case 'u': + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = UINT32_C(0x1773); + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + default: + goto no_match; + } + +conv1: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_turned.c b/libcharconv_turned.c index 2ad33f0..ceeb91c 100644 --- a/libcharconv_turned.c +++ b/libcharconv_turned.c @@ -45,7 +45,64 @@ static struct { {UINT32_C(0x23C1), UINT32_C(0x23C2)}, {UINT32_C(0x23C7), UINT32_C(0x23C8)}, {UINT32_C(0x238F), UINT32_C(0x2390)}, - {UINT32_C(0x2391), UINT32_C(0x2392)} + {UINT32_C(0x2391), UINT32_C(0x2392)}, + {UINT32_C(0x169B), UINT32_C(0x169C)}, + {UINT32_C(0x1FBA0), UINT32_C(0x1FBA3)}, + {UINT32_C(0x1FBA2), UINT32_C(0x1FBA1)}, + {UINT32_C(0x1FBA4), UINT32_C(0x1FBA5)}, + {UINT32_C(0x1FBAA), UINT32_C(0x1FBAD)}, + {UINT32_C(0x1FBAC), UINT32_C(0x1FBAB)}, + {UINT32_C(0x250C), UINT32_C(0x2518)}, + {UINT32_C(0x250D), UINT32_C(0x2519)}, + {UINT32_C(0x250E), UINT32_C(0x251A)}, + {UINT32_C(0x250F), UINT32_C(0x251B)}, + {UINT32_C(0x2510), UINT32_C(0x2514)}, + {UINT32_C(0x2511), UINT32_C(0x2515)}, + {UINT32_C(0x2512), UINT32_C(0x2516)}, + {UINT32_C(0x2513), UINT32_C(0x2517)}, + {UINT32_C(0x251C), UINT32_C(0x2524)}, + {UINT32_C(0x251D), UINT32_C(0x2525)}, + {UINT32_C(0x251F), UINT32_C(0x2526)}, + {UINT32_C(0x2520), UINT32_C(0x2528)}, + {UINT32_C(0x2522), UINT32_C(0x2529)}, + {UINT32_C(0x2523), UINT32_C(0x252B)}, + {UINT32_C(0x2527), UINT32_C(0x251E)}, + {UINT32_C(0x252A), UINT32_C(0x2521)}, + {UINT32_C(0x252C), UINT32_C(0x2534)}, + {UINT32_C(0x252D), UINT32_C(0x2536)}, + {UINT32_C(0x252E), UINT32_C(0x2535)}, + {UINT32_C(0x252F), UINT32_C(0x2537)}, + {UINT32_C(0x2530), UINT32_C(0x2538)}, + {UINT32_C(0x2531), UINT32_C(0x253A)}, + {UINT32_C(0x2532), UINT32_C(0x2539)}, + {UINT32_C(0x2533), UINT32_C(0x253B)}, + {UINT32_C(0x253E), UINT32_C(0x253D)}, + {UINT32_C(0x2540), UINT32_C(0x2541)}, + {UINT32_C(0x2545), UINT32_C(0x2544)}, + {UINT32_C(0x2546), UINT32_C(0x2543)}, + {UINT32_C(0x2547), UINT32_C(0x2548)}, + {UINT32_C(0x254A), UINT32_C(0x2549)}, + {UINT32_C(0x2552), UINT32_C(0x255B)}, + {UINT32_C(0x2553), UINT32_C(0x255C)}, + {UINT32_C(0x2554), UINT32_C(0x255D)}, + {UINT32_C(0x2555), UINT32_C(0x2558)}, + {UINT32_C(0x2556), UINT32_C(0x2559)}, + {UINT32_C(0x2557), UINT32_C(0x255A)}, + {UINT32_C(0x255E), UINT32_C(0x2561)}, + {UINT32_C(0x255F), UINT32_C(0x2562)}, + {UINT32_C(0x2560), UINT32_C(0x2563)}, + {UINT32_C(0x2564), UINT32_C(0x2567)}, + {UINT32_C(0x2565), UINT32_C(0x2568)}, + {UINT32_C(0x2566), UINT32_C(0x2569)}, + {UINT32_C(0x256D), UINT32_C(0x256F)}, + {UINT32_C(0x256E), UINT32_C(0x2570)}, + {UINT32_C(0x2574), UINT32_C(0x2576)}, + {UINT32_C(0x2575), UINT32_C(0x2577)}, + {UINT32_C(0x2578), UINT32_C(0x257A)}, + {UINT32_C(0x2579), UINT32_C(0x257B)}, + {UINT32_C(0x257C), UINT32_C(0x257E)}, + {UINT32_C(0x257D), UINT32_C(0x257F)}, + {UINT32_C(0x2E34), UINT32_C(0x2E32)} }; @@ -115,6 +172,12 @@ libcharconv_turned(const char *s, size_t slen, size_t *n, uint_least32_t *cp, si | ((c & 0x08u) << 3) | ((c & 0x40u) >> 3) | UINT32_C(0x2800); goto conv; + } else if (UINT32_C(0x1681) <= c && c <= UINT32_C(0x1685)) { + c += 5u; + goto conv; + } else if (UINT32_C(0x1686) <= c && c <= UINT32_C(0x168A)) { + c -= 5u; + goto conv; } else { for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { if (c == pairs[i].a) { |
