diff options
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-overlaid.c | 4 | ||||
| -rw-r--r-- | libcharconv.h | 5 | ||||
| -rw-r--r-- | libcharconv_latin.c | 17 | ||||
| -rw-r--r-- | libcharconv_overlaid.c | 66 |
5 files changed, 95 insertions, 3 deletions
@@ -61,7 +61,8 @@ BIN =\ convert-to-yijing-tetragrams\ convert-to-yijing-hexagrams\ convert-to-vulgar-fractions\ - convert-to-flipped + convert-to-flipped\ + convert-to-overlaid LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -111,7 +112,8 @@ LIBOBJ =\ libcharconv_yijing_tetragrams.o\ libcharconv_yijing_hexagrams.o\ libcharconv_vulgar_fractions.o\ - libcharconv_flipped.o + libcharconv_flipped.o\ + libcharconv_overlaid.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-overlaid.c b/convert-to-overlaid.c new file mode 100644 index 0000000..803cdd6 --- /dev/null +++ b/convert-to-overlaid.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_overlaid) diff --git a/libcharconv.h b/libcharconv.h index 4aa9d27..289f9e8 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -333,6 +333,11 @@ LIBCHARCONV_FUNC_(libcharconv_vulgar_fractions); */ LIBCHARCONV_FUNC_(libcharconv_flipped); +/** + * Overlay characters + */ +LIBCHARCONV_FUNC_(libcharconv_overlaid); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_latin.c b/libcharconv_latin.c index c52044c..ee975e1 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -18,7 +18,7 @@ enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { enum libcharconv_result ret = LIBCHARCONV_CONVERTED; - uint_least32_t c; + uint_least32_t c, cp1, cp2; char c1, c2, c3, c4, c5, c6; size_t i, clen; unsigned num; @@ -626,6 +626,10 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x215E): c1 = '7'; c2 = '/'; c3 = '8'; goto conv3; case UINT32_C(0x2152): c1 = '1'; c2 = '/'; c3 = '1'; c4 = '0'; goto conv4; + /* overlaid */ + case UINT32_C(0x203D): c1 = '!'; c2 = '?'; goto conv2; + case UINT32_C(0x2E18): cp1 = UINT32_C(0xBF); cp2 = UINT32_C(0xA1); goto conv2cp; + default: no_match: *n += clen; @@ -648,6 +652,17 @@ conv: *ncp = 1u; return ret; +conv2cp: + if (*n) + goto no_conv; + *n += clen; + if (*ncp >= 1u) + cp[0] = cp1; + if (*ncp >= 2u) + cp[1] = cp2; + *ncp = 2u; + return ret; + conv2: if (*n) goto no_conv; diff --git a/libcharconv_overlaid.c b/libcharconv_overlaid.c new file mode 100644 index 0000000..7f3cab1 --- /dev/null +++ b/libcharconv_overlaid.c @@ -0,0 +1,66 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +static struct { + uint_least32_t a; + uint_least32_t b; + uint_least32_t to; +} pairs[] = { + {0x00BF, 0x00A1, 0x2E18}, + {0x0021, 0x003F, 0x203D} +}; + + +enum libcharconv_result +libcharconv_overlaid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t a, b; + size_t i, alen, blen; + *n = 0; + while (slen) { + alen = libcharconv_decode_utf8_(s, slen, &a); + if (alen > slen) { + if (*n) + goto no_conv; + return LIBCHARCONV_INDETERMINATE; + } + if (!alen) { + *n += 1u; + slen -= 1u; + s = &s[1]; + continue; + } + + for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) { + if (a != pairs[i].a && a != pairs[i].b) + continue; + if (*n) + goto no_conv; + if (slen == alen) + return LIBCHARCONV_INDETERMINATE; + blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b); + if (blen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!blen) + goto no_conv; + if ((a ^ b) == (pairs[i].a ^ pairs[i].b)) + goto conv; + } + + *n += alen; + s = &s[alen]; + slen -= alen; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = pairs[i].to; + *n += alen + blen; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} |
