diff options
Diffstat (limited to '')
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-variation-selectors.c | 18 | ||||
| -rw-r--r-- | libcharconv.h | 32 | ||||
| -rw-r--r-- | libcharconv_latin.c | 28 | ||||
| -rw-r--r-- | libcharconv_variation_selectors.c | 54 |
5 files changed, 136 insertions, 2 deletions
@@ -43,7 +43,8 @@ BIN =\ convert-to-bold-script\ convert-to-buhid\ convert-to-replacement\ - convert-to-bracketed + convert-to-bracketed\ + convert-to-variation-selectors LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -74,7 +75,8 @@ LIBOBJ =\ libcharconv_bold_script.o\ libcharconv_buhid.o\ libcharconv_replacement.o\ - libcharconv_bracketed.o + libcharconv_bracketed.o\ + libcharconv_variation_selectors.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-variation-selectors.c b/convert-to-variation-selectors.c new file mode 100644 index 0000000..285d888 --- /dev/null +++ b/convert-to-variation-selectors.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_variation_selectors); +} diff --git a/libcharconv.h b/libcharconv.h index 76bdfd7..969d95d 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -933,4 +933,36 @@ enum libcharconv_result libcharconv_replacement(const char *s, size_t slen, size enum libcharconv_result libcharconv_bracketed(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); +/** + * Convert numbers to VARIATION SELECTORs + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_variation_selectors(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + #endif diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 25aae91..d3c1817 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -9,6 +9,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz uint_least32_t c; char c1, c2, c3, c4; size_t clen; + unsigned num; *n = 0; while (slen) { @@ -273,6 +274,31 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c3 = '.'; goto conv3; + } else if (UINT32_C(0xFE00) <= c && c <= UINT32_C(0xFE0F)) { + /* variation selectors */ + num = (unsigned)(c - UINT32_C(0xFE00)) + 1u; + if (num < 10) { + c1 = (char)('0' + num); + goto conv1; + } else { + c1 = (char)('0' + num / 10); + c2 = (char)('0' + num % 10); + goto conv2; + } + } else if (UINT32_C(0xE0100) <= c && c <= UINT32_C(0xE01EF)) { + /* variation selectors */ + num = (unsigned)(c - UINT32_C(0xE0100)) + 17u; + if (num < 100) { + c1 = (char)('0' + num / 10); + c2 = (char)('0' + num % 10); + goto conv2; + } else { + c1 = (char)('0' + num / 100); + c2 = (char)('0' + num / 10 % 10); + c3 = (char)('0' + num % 10); + goto conv3; + } + } else { switch (c) { /* shogi */ @@ -413,6 +439,8 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz no_conv: return LIBCHARCONV_NO_CONVERT; +conv1: + c = (uint_least32_t)c1; conv: if (*n) goto no_conv; diff --git a/libcharconv_variation_selectors.c b/libcharconv_variation_selectors.c new file mode 100644 index 0000000..a5ec8e0 --- /dev/null +++ b/libcharconv_variation_selectors.c @@ -0,0 +1,54 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_variation_selectors(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + enum libcharconv_result ret = LIBCHARCONV_CONVERTED; + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('1' <= s[0] && s[0] <= '9') { + c = (uint_least32_t)(s[0] - '0'); + if (!slen--) + goto conv_if_end; + if ('0' > s[1] || s[1] > '9') + goto conv; + c *= 10u; + c += (uint_least32_t)(s[1] - '0'); + if (!slen--) + goto conv_if_end; + if ('0' > s[2] || s[2] > '9') + goto conv; + c *= 10u; + c += (uint_least32_t)(s[2] - '0'); + if (c > 256u) { + c /= 10u; + goto conv; + } + goto conv; + } else { + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv_if_end: + ret = LIBCHARCONV_CONVERT_IF_END; +conv: + if (*n) + goto no_conv; + *n = c < 10u ? 1u : c < 100u ? 2u : 3u; + if (c < 17u) + c = UINT32_C(0xFE00) + (c - 1u); + else + c = UINT32_C(0xE0100) + (c - 17u); + if (*ncp) + *cp = c; + *ncp = 1u; + return ret; + +} |
