diff options
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | convert-to-lycian.c | 18 | ||||
| -rw-r--r-- | convert-to-lydian.c | 18 | ||||
| -rw-r--r-- | convert-to-subscript.c | 18 | ||||
| -rw-r--r-- | convert-to-superscript.c | 18 | ||||
| -rw-r--r-- | libcharconv.h | 128 | ||||
| -rw-r--r-- | libcharconv_latin.c | 31 | ||||
| -rw-r--r-- | libcharconv_lycian.c | 63 | ||||
| -rw-r--r-- | libcharconv_lydian.c | 62 | ||||
| -rw-r--r-- | libcharconv_subscript.c | 29 | ||||
| -rw-r--r-- | libcharconv_superscript.c | 34 |
11 files changed, 429 insertions, 2 deletions
@@ -44,7 +44,11 @@ BIN =\ convert-to-buhid\ convert-to-replacement\ convert-to-bracketed\ - convert-to-variation-selectors + convert-to-variation-selectors\ + convert-to-superscript\ + convert-to-subscript\ + convert-to-lydian\ + convert-to-lycian LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -76,7 +80,11 @@ LIBOBJ =\ libcharconv_buhid.o\ libcharconv_replacement.o\ libcharconv_bracketed.o\ - libcharconv_variation_selectors.o + libcharconv_variation_selectors.o\ + libcharconv_superscript.o\ + libcharconv_subscript.o\ + libcharconv_lydian.o\ + libcharconv_lycian.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-lycian.c b/convert-to-lycian.c new file mode 100644 index 0000000..23caae1 --- /dev/null +++ b/convert-to-lycian.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_lycian); +} diff --git a/convert-to-lydian.c b/convert-to-lydian.c new file mode 100644 index 0000000..989def7 --- /dev/null +++ b/convert-to-lydian.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_lydian); +} diff --git a/convert-to-subscript.c b/convert-to-subscript.c new file mode 100644 index 0000000..6f39c92 --- /dev/null +++ b/convert-to-subscript.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_subscript); +} diff --git a/convert-to-superscript.c b/convert-to-superscript.c new file mode 100644 index 0000000..62037ec --- /dev/null +++ b/convert-to-superscript.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_superscript); +} diff --git a/libcharconv.h b/libcharconv.h index 969d95d..9eab393 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -965,4 +965,132 @@ enum libcharconv_result libcharconv_bracketed(const char *s, size_t slen, size_t enum libcharconv_result libcharconv_variation_selectors(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); +/** + * Convert to superscript + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_superscript(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert to subscript + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_subscript(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to Lydian + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_lydian(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to Lycian + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_lycian(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + #endif diff --git a/libcharconv_latin.c b/libcharconv_latin.c index d3c1817..60a2fb5 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -299,6 +299,34 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz goto conv3; } + } else if (UINT32_C(0x2070) <= c && c <= UINT32_C(0x2079)) { + /* superscript */ + c -= (uint_least32_t)UINT32_C(0x2070) - (uint_least32_t)'0'; + goto conv; + } else if (UINT32_C(0x00B2) <= c && c <= UINT32_C(0x00B3)) { + /* superscript */ + c -= (uint_least32_t)UINT32_C(0x00B2) - (uint_least32_t)'2'; + goto conv; + } else if (c == UINT32_C(0x00B9)) { + /* superscript */ + c -= (uint_least32_t)UINT32_C(0x00B9) - (uint_least32_t)'1'; + goto conv; + + } else if (UINT32_C(0x2080) <= c && c <= UINT32_C(0x2089)) { + /* subscript */ + c -= (uint_least32_t)UINT32_C(0x2080) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x10920) <= c && c <= UINT32_C(0x10939)) { + /* lydian */ + c = (uint_least32_t)"abgdeviyklmnorStufqsTAELNc"[c - UINT32_C(0x10920)]; + goto conv; + + } else if (UINT32_C(0x10280) <= c && c <= UINT32_C(0x1029C)) { + /* lycian */ + c = (uint_least32_t)"aebBgdiwzDjkqlmnMNupKrstTAEhx"[c - UINT32_C(0x10280)]; + goto conv; + } else { switch (c) { /* shogi */ @@ -428,6 +456,9 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c = (uint_least32_t)'?'; goto conv; + /* lydian */ + case UINT32_C(0x1093F): c = (uint_least32_t)'"'; goto conv; + default: no_match: *n += clen; diff --git a/libcharconv_lycian.c b/libcharconv_lycian.c new file mode 100644 index 0000000..7998ecb --- /dev/null +++ b/libcharconv_lycian.c @@ -0,0 +1,63 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +static struct { + unsigned char cp_low; + char latin; +} lycian[] = { + {0x80, 'a'}, + {0x82, 'b'}, + {0x85, 'd'}, + {0x81, 'e'}, + {0x84, 'g'}, + {0x9B, 'h'}, + {0x86, 'i'}, + {0x8A, 'j'}, + {0x8B, 'k'}, + {0x8D, 'l'}, + {0x8E, 'm'}, + {0x8F, 'n'}, + {0x93, 'p'}, + {0x8C, 'q'}, + {0x95, 'r'}, + {0x96, 's'}, + {0x97, 't'}, + {0x92, 'u'}, + {0x87, 'w'}, + {0x9C, 'x'}, + {0x88, 'z'}, + {0x99, 'A'}, + {0x83, 'B'}, + {0x89, 'D'}, + {0x9A, 'E'}, + {0x94, 'K'}, + {0x90, 'M'}, + {0x91, 'N'}, + {0x98, 'T'} +}; + + +enum libcharconv_result +libcharconv_lycian(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i; + *n = 0; + for (; slen--; s++) { + for (i = 0u; i < sizeof(lycian) / sizeof(*lycian); i++) + if (*s == lycian[i].latin) + goto conv; + *n += 1u; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = (uint_least32_t)(UINT32_C(0x10200) | lycian[i].cp_low); + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_lydian.c b/libcharconv_lydian.c new file mode 100644 index 0000000..a3427fa --- /dev/null +++ b/libcharconv_lydian.c @@ -0,0 +1,62 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +static struct { + unsigned char cp_low; + char latin; +} lydian[] = { + {0x20, 'a'}, + {0x21, 'b'}, + {0x39, 'c'}, + {0x23, 'd'}, + {0x24, 'e'}, + {0x31, 'f'}, + {0x22, 'g'}, + {0x26, 'i'}, + {0x28, 'k'}, + {0x29, 'l'}, + {0x2A, 'm'}, + {0x2B, 'n'}, + {0x2C, 'o'}, + {0x32, 'q'}, + {0x2D, 'r'}, + {0x33, 's'}, + {0x2F, 't'}, + {0x30, 'u'}, + {0x25, 'v'}, + {0x27, 'y'}, + {0x35, 'A'}, + {0x36, 'E'}, + {0x37, 'L'}, + {0x38, 'N'}, + {0x2E, 'S'}, + {0x34, 'T'}, + {0x3F, '\"'}, + {0x3F, '\''} +}; + + +enum libcharconv_result +libcharconv_lydian(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i; + *n = 0; + for (; slen--; s++) { + for (i = 0u; i < sizeof(lydian) / sizeof(*lydian); i++) + if (*s == lydian[i].latin) + goto conv; + *n += 1u; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = (uint_least32_t)(UINT32_C(0x10900) | lydian[i].cp_low); + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_subscript.c b/libcharconv_subscript.c new file mode 100644 index 0000000..35a3a72 --- /dev/null +++ b/libcharconv_subscript.c @@ -0,0 +1,29 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_subscript(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x2080) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_superscript.c b/libcharconv_superscript.c new file mode 100644 index 0000000..d15f490 --- /dev/null +++ b/libcharconv_superscript.c @@ -0,0 +1,34 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" + + +enum libcharconv_result +libcharconv_superscript(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if (*s == '1') { + c = (uint_least32_t)UINT32_C(0x00B9); + } else if ('2' <= *s && *s <= '3') { + c = (uint_least32_t)(UINT32_C(0x00B2) + (unsigned)(*s - '2')); + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x2070) + (unsigned)(*s - '0')); + } else { + *n += 1u; + continue; + } + goto conv; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} |
