From a7c404e03a4b814160ec4c97797a56ee4581a0f7 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 25 Jan 2026 12:04:20 +0100 Subject: Add yijing n-grams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- libcharconv_latin.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'libcharconv_latin.c') diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 9dc81cf..232eb5c 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -2,12 +2,24 @@ #include "lib-common.h" +static unsigned char yijing_hexagrams[] = { + 0xC0, 0xEB, 0xCC, 0xE0, 0xC9, 0xC5, 0xD8, 0xCB, + 0xC8, 0xF8, 0xE4, 0xF4, 0xFC, 0xFA, 0xE9, 0xD3, + 0xCD, 0xF1, 0xDD, 0xF7, 0xE5, 0xFF, 0xD4, 0xE2, + 0xD9, 0xD1, 0xD5, 0xF3, 0xE8, 0xC3, 0xDA, 0xD6, + 0xEA, 0xDB, 0xF0, 0xDE, 0xF9, 0xEE, 0xD0, 0xEC, + 0xC4, 0xEF, 0xFE, 0xE6, 0xFB, 0xDC, 0xC2, 0xC7, + 0xE1, 0xDF, 0xF6, 0xFD, 0xF5, 0xE7, 0xF2, 0xCF, + 0xCA, 0xED, 0xE3, 0xCE, 0xD2, 0xC6, 0xD7, 0xC1 +}; + + enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; - char c1, c2, c3, c4, c5; + char c1, c2, c3, c4, c5, c6; size_t i, clen; unsigned num; @@ -389,6 +401,39 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz goto use_switch; goto conv1; + } else if (UINT32_C(0x2630) <= c && c <= UINT32_C(0x2637)) { + /* yijing trigrams */ + c1 = (c & 1u) ? '2' : '1'; + c2 = (c & 2u) ? '2' : '1'; + c3 = (c & 4u) ? '2' : '1'; + goto conv3; + + } else if (UINT32_C(0x1D306) <= c && c <= UINT32_C(0x1D356)) { + /* yijing tetragrams */ + c -= UINT32_C(0x1D306); + c4 = (char)((c % 3u) + (unsigned)'1'); + c /= 3u; + c3 = (char)((c % 3u) + (unsigned)'1'); + c /= 3u; + c2 = (char)((c % 3u) + (unsigned)'1'); + c /= 3u; + c1 = (char)((c % 3u) + (unsigned)'1'); + goto conv4; + + } else if (UINT32_C(0x4DC0) <= c && c <= UINT32_C(0x4DFF)) { + /* yijing hexagrams */ + c &= 0xFFu; + for (i = 0u;; i++) + if (yijing_hexagrams[i] == (unsigned char)c) + break; + c6 = (i & 1u) ? '2' : '1'; + c5 = (i & 2u) ? '2' : '1'; + c4 = (i & 4u) ? '2' : '1'; + c3 = (i & 8u) ? '2' : '1'; + c2 = (i & 16u) ? '2' : '1'; + c1 = (i & 32u) ? '2' : '1'; + goto conv6; + } else { use_switch: switch (c) { @@ -542,6 +587,22 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0xA4FA): c1 = '.'; c2 = '.'; goto conv2; case UINT32_C(0x11FB0): c1 = 'Y'; goto conv1; + /* yijing monograms */ + case UINT32_C(0x268A): c1 = '1'; goto conv1; + case UINT32_C(0x268B): c1 = '2'; goto conv1; + case UINT32_C(0x1D300): c1 = '3'; goto conv1; + + /* yijing digrams */ + case UINT32_C(0x268C): c1 = '1'; c2 = '1'; goto conv2; + case UINT32_C(0x268E): c1 = '1'; c2 = '2'; goto conv2; + case UINT32_C(0x1D301): c1 = '1'; c2 = '3'; goto conv2; + case UINT32_C(0x268D): c1 = '2'; c2 = '1'; goto conv2; + case UINT32_C(0x268F): c1 = '2'; c2 = '2'; goto conv2; + case UINT32_C(0x1D302): c1 = '2'; c2 = '3'; goto conv2; + case UINT32_C(0x1D303): c1 = '3'; c2 = '1'; goto conv2; + case UINT32_C(0x1D304): c1 = '3'; c2 = '2'; goto conv2; + case UINT32_C(0x1D305): c1 = '3'; c2 = '3'; goto conv2; + default: no_match: *n += clen; @@ -621,4 +682,23 @@ conv5: cp[4] = (uint_least32_t)c5; *ncp = 5u; return ret; + +conv6: + if (*n) + goto no_conv; + *n += clen; + if (*ncp >= 1u) + cp[0] = (uint_least32_t)c1; + if (*ncp >= 2u) + cp[1] = (uint_least32_t)c2; + if (*ncp >= 3u) + cp[2] = (uint_least32_t)c3; + if (*ncp >= 4u) + cp[3] = (uint_least32_t)c4; + if (*ncp >= 5u) + cp[4] = (uint_least32_t)c5; + if (*ncp >= 6u) + cp[5] = (uint_least32_t)c6; + *ncp = 6u; + return ret; } -- cgit v1.2.3-70-g09d2