diff options
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-bracketed.c | 18 | ||||
| -rw-r--r-- | libcharconv.h | 32 | ||||
| -rw-r--r-- | libcharconv_bracketed.c | 141 | ||||
| -rw-r--r-- | libcharconv_decode_utf8_.c | 4 | ||||
| -rw-r--r-- | libcharconv_latin.c | 93 |
6 files changed, 289 insertions, 5 deletions
@@ -42,7 +42,8 @@ BIN =\ convert-to-script\ convert-to-bold-script\ convert-to-buhid\ - convert-to-replacement + convert-to-replacement\ + convert-to-bracketed LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -72,7 +73,8 @@ LIBOBJ =\ libcharconv_script.o\ libcharconv_bold_script.o\ libcharconv_buhid.o\ - libcharconv_replacement.o + libcharconv_replacement.o\ + libcharconv_bracketed.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-bracketed.c b/convert-to-bracketed.c new file mode 100644 index 0000000..1314677 --- /dev/null +++ b/convert-to-bracketed.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_bracketed); +} diff --git a/libcharconv.h b/libcharconv.h index 8e3c4bc..76bdfd7 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -901,4 +901,36 @@ enum libcharconv_result libcharconv_buhid(const char *s, size_t slen, size_t *n, enum libcharconv_result libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); +/** + * Convert alphanumerics to bracketed form + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_bracketed(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + #endif diff --git a/libcharconv_bracketed.c b/libcharconv_bracketed.c new file mode 100644 index 0000000..cff49c9 --- /dev/null +++ b/libcharconv_bracketed.c @@ -0,0 +1,141 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_bracketed(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('0' <= s[0] && s[0] <= '9') { + if (!slen) + goto indeterminate; + if (s[1] == ',') { + c = UINT32_C(0x1F101) + (unsigned)(s[0] - '0'); + goto conv2; + } else if (s[1] == '.') { + if (s[0] == '0') + c = UINT32_C(0x1F100); + else + c = UINT32_C(0x2488) + (unsigned)(s[0] - '1'); + goto conv2; + } else if (s[0] == '1' && '0' <= s[1] && s[1] <= '9') { + if (slen < 2u) + goto indeterminate; + if (s[2] == '.') + c = UINT32_C(0x2491) + (unsigned)(s[1] - '0'); + else + goto no_match; + goto conv3; + } else if (s[0] == '2' && s[1] == '0') { + if (slen < 2u) + goto indeterminate; + if (s[2] == '.') + c = UINT32_C(0x249B); + else + goto no_match; + goto conv3; + } else { + goto no_match; + } + } else if (s[0] == '(') { + if (!slen--) + goto indeterminate; + if (s[1] == '1') { + if (!slen--) + goto indeterminate; + if (s[2] == ')') { + c = UINT32_C(0x2474); + goto conv3; + } else if ('0' <= s[2] && s[2] <= '9') { + if (!slen--) + goto indeterminate; + if (s[3] != ')') + goto no_match; + c = UINT32_C(0x247D) + (unsigned)(s[2] - '0'); + goto conv4; + } else { + goto no_match; + } + } else if (s[1] == '2') { + if (!slen--) + goto indeterminate; + if (s[2] == ')') { + c = UINT32_C(0x2475); + goto conv3; + } else if (s[2] == '0') { + if (!slen--) + goto indeterminate; + if (s[3] != ')') + goto no_match; + c = UINT32_C(0x2487); + goto conv4; + } else { + goto no_match; + } + } else if ('3' <= s[1] && s[1] <= '9') { + if (!slen--) + goto indeterminate; + if (s[2] != ')') + goto no_match; + c = UINT32_C(0x2474) + (unsigned)(s[1] - '0'); + goto conv3; + } else if ('a' <= s[1] && s[1] <= 'z') { + if (!slen--) + goto indeterminate; + if (s[2] != ')') + goto no_match; + c = UINT32_C(0x249C) + (unsigned)(s[1] - 'a'); + goto conv3; + } else if ('A' <= s[1] && s[1] <= 'Z') { + if (!slen--) + goto indeterminate; + if (s[2] != ')') + goto no_match; + c = UINT32_C(0x1F110) + (unsigned)(s[1] - 'A'); + goto conv3; + } else { + goto no_match; + } + } else { + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +indeterminate: + if (*n) + goto no_conv; + return LIBCHARCONV_INDETERMINATE; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv3: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 3u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv4: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 4u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_decode_utf8_.c b/libcharconv_decode_utf8_.c index db66040..7b488e3 100644 --- a/libcharconv_decode_utf8_.c +++ b/libcharconv_decode_utf8_.c @@ -1,5 +1,7 @@ /* See LICENSE file for copyright and license details. */ #include "lib-common.h" +#include <stdlib.h> +#include <stdio.h> size_t @@ -37,6 +39,8 @@ libcharconv_decode_utf8_(const char *s, size_t slen, uint_least32_t *cp) return n; for (i = 1u; i < n; i++) { + if ((s[i] & 0xC0) != 0x80) + return 0u; *cp <<= 6; *cp |= (uint_least32_t)s[i] & 0x3Fu; } diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 052ab7b..25aae91 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -7,17 +7,21 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz { enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; - char c1, c2, c3; + char c1, c2, c3, c4; size_t clen; *n = 0; - for (; slen; s++) { + while (slen) { clen = libcharconv_decode_utf8_(s, slen, &c); - if (clen > slen) + if (clen > slen) { + if (*n) + goto no_conv; return LIBCHARCONV_INDETERMINATE; + } if (!clen) { *n += 1u; slen -= 1u; + s = &s[1]; continue; } slen -= clen; @@ -202,6 +206,73 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; goto conv; + } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) { + /* bracketed (parenthesised capital) */ + c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A'; + c1 = '('; + c2 = (char)c; + c3 = ')'; + goto conv3; + } else if (UINT32_C(0x249C) <= c && c <= UINT32_C(0x24B5)) { + /* bracketed (parenthesised small) */ + c -= (uint_least32_t)UINT32_C(0x249C) - (uint_least32_t)'a'; + c1 = '('; + c2 = (char)c; + c3 = ')'; + goto conv3; + } else if (UINT32_C(0x2474) <= c && c <= UINT32_C(0x247C)) { + /* bracketed (parenthesised number) */ + c -= (uint_least32_t)UINT32_C(0x2474) - (uint_least32_t)'1'; + c1 = '('; + c2 = (char)c; + c3 = ')'; + goto conv3; + } else if (UINT32_C(0x247D) <= c && c <= UINT32_C(0x2486)) { + /* bracketed (parenthesised number) */ + c -= (uint_least32_t)UINT32_C(0x247D) - (uint_least32_t)'0'; + c1 = '('; + c2 = '1'; + c3 = (char)c; + c4 = ')'; + goto conv4; + } else if (c == UINT32_C(0x2487)) { + /* bracketed (parenthesised number) */ + c1 = '('; + c2 = '2'; + c3 = '0'; + c4 = ')'; + goto conv4; + } else if (UINT32_C(0x1F101) <= c && c <= UINT32_C(0x1F10A)) { + /* bracketed (number comma) */ + c -= (uint_least32_t)UINT32_C(0x1F101) - (uint_least32_t)'0'; + c1 = (char)c; + c2 = ','; + goto conv2; + } else if (c == UINT32_C(0x1F100)) { + /* bracketed (number full stop) */ + c1 = '0'; + c2 = '.'; + goto conv2; + } else if (UINT32_C(0x2488) <= c && c <= UINT32_C(0x2490)) { + /* bracketed (number full stop) */ + c -= (uint_least32_t)UINT32_C(0x2488) - (uint_least32_t)'1'; + c1 = (char)c; + c2 = '.'; + goto conv2; + } else if (UINT32_C(0x2491) <= c && c <= UINT32_C(0x249A)) { + /* bracketed (number full stop) */ + c -= (uint_least32_t)UINT32_C(0x2491) - (uint_least32_t)'0'; + c1 = '1'; + c2 = (char)c; + c3 = '.'; + goto conv3; + } else if (c == UINT32_C(0x249B)) { + /* bracketed (number full stop) */ + c1 = '2'; + c2 = '0'; + c3 = '.'; + goto conv3; + } else { switch (c) { /* shogi */ @@ -334,6 +405,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz default: no_match: *n += clen; + s = &s[clen]; break; } } @@ -375,4 +447,19 @@ conv3_prechecked: cp[2] = (uint_least32_t)c3; *ncp = 3u; return ret; + +conv4: + if (*n) + goto no_conv; + *n += clen; + if (*ncp >= 1u) + cp[0] = (uint_least32_t)c1; + if (*ncp >= 2u) + cp[1] = (uint_least32_t)c2; + if (*ncp >= 3u) + cp[2] = (uint_least32_t)c3; + if (*ncp >= 4u) + cp[3] = (uint_least32_t)c4; + *ncp = 4u; + return ret; } |
