diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-25 18:17:01 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-25 18:17:01 +0100 |
| commit | aec513e234367b738ce6bf52bf4fab8fba0928c1 (patch) | |
| tree | 82f8f8bac275e39ef63ff1df68d1eac1c3649056 | |
| parent | Add negative (diff) | |
| download | charconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.gz charconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.bz2 charconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.xz | |
Add symbols
Signed-off-by: Mattias Andrée <m@maandree.se>
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-symbols.c | 4 | ||||
| -rw-r--r-- | libcharconv.h | 5 | ||||
| -rw-r--r-- | libcharconv_latin.c | 70 | ||||
| -rw-r--r-- | libcharconv_symbols.c | 149 |
5 files changed, 232 insertions, 2 deletions
@@ -70,7 +70,8 @@ BIN =\ convert-to-sora-sompeng\ convert-to-tally-marks\ convert-to-ideographic-tally-marks\ - convert-to-negative + convert-to-negative\ + convert-to-symbols LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -129,7 +130,8 @@ LIBOBJ =\ libcharconv_sora_sompeng.o\ libcharconv_tally_marks.o\ libcharconv_ideographic_tally_marks.o\ - libcharconv_negative.o + libcharconv_negative.o\ + libcharconv_symbols.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-symbols.c b/convert-to-symbols.c new file mode 100644 index 0000000..a381e0d --- /dev/null +++ b/convert-to-symbols.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_symbols) diff --git a/libcharconv.h b/libcharconv.h index 966b8ec..e220062 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -378,6 +378,11 @@ LIBCHARCONV_FUNC_(libcharconv_ideographic_tally_marks); */ LIBCHARCONV_FUNC_(libcharconv_negative); +/** + * Convert characters and character sequences to symbols + */ +LIBCHARCONV_FUNC_(libcharconv_symbols); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 1961b18..d8c27d0 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -20,6 +20,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz enum libcharconv_result ret = LIBCHARCONV_CONVERTED; uint_least32_t c; char c1, c2, c3, c4, c5, c6; + const char *cs; size_t i, clen; unsigned num; @@ -669,6 +670,64 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x13D75): c1 = '4'; goto conv1; case UINT32_C(0x13D76): c1 = '5'; goto conv1; + /* symbols */ + case UINT32_C(0x269E): c1 = '-'; c2 = '>'; goto conv2; + case UINT32_C(0x269F): c1 = '<'; c2 = '-'; goto conv2; + case UINT32_C(0x2120): c1 = 'S'; c2 = 'M'; goto conv2; + case UINT32_C(0x2122): c1 = 'T'; c2 = 'M'; goto conv2; + case UINT32_C(0x00A9): c1 = '('; c2 = 'C'; c3 = ')'; goto conv3; + case UINT32_C(0x1F1AD): c1 = '('; c2 = 'M'; c3 = ')'; goto conv3; + case UINT32_C(0x2117): c1 = '('; c2 = 'P'; c3 = ')'; goto conv3; + case UINT32_C(0x00AE): c1 = '('; c2 = 'R'; c3 = ')'; goto conv3; + case UINT32_C(0x1F10D): c1 = '('; c2 = '0'; c3 = ')'; goto conv3; + case UINT32_C(0x1F10F): c1 = '\\'; c2 = '('; c3 = '$'; c4 = ')'; goto conv4; + case UINT32_C(0x1F16E): c1 = '\\'; c2 = '('; c3 = 'C'; c4 = ')'; goto conv4; + case UINT32_C(0x1F16D): c1 = '('; c2 = 'C'; c3 = 'C'; c4 = ')'; goto conv4; + case UINT32_C(0x212E): c1 = 'e'; goto conv1; + case UINT32_C(0x2139): c1 = 'i'; goto conv1; + case UINT32_C(0x212A): c1 = 'K'; goto conv1; + case UINT32_C(0x213B): c1 = 'F'; c2 = 'A'; c3 = 'X'; goto conv3; + case UINT32_C(0x2121): c1 = 'T'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x2100): c1 = 'a'; c2 = '/'; c3 = 'c'; goto conv3; + case UINT32_C(0x214D): c1 = 'A'; c2 = '/'; c3 = 'S'; goto conv3; + case UINT32_C(0x2101): c1 = 'a'; c2 = '/'; c3 = 's'; goto conv3; + case UINT32_C(0x00AA): c1 = 'a'; goto conv1; + case UINT32_C(0x00BA): c1 = 'o'; goto conv1; + case UINT32_C(0x2300): c1 = '/'; c2 = 'o'; goto conv2; + case UINT32_C(0x2116): c1 = 'N'; c2 = 'o'; goto conv2; + case UINT32_C(0x2118): c1 = 'P'; goto conv1; + case UINT32_C(0x214A): c1 = 'P'; c2 = 'L'; goto conv2; + case UINT32_C(0x211E): c1 = 'P'; c2 = 'x'; goto conv2; + case UINT32_C(0x211F): c1 = 'R'; c2 = '/'; goto conv2; + case UINT32_C(0x2123): c1 = 'V'; c2 = '/'; goto conv2; + case UINT32_C(0x2125): c1 = 'z'; goto conv1; + case UINT32_C(0x23E8): c1 = '1'; c2 = '0'; goto conv2; + case UINT32_C(0x2104): c1 = 'C'; c2 = 'L'; goto conv2; + case UINT32_C(0x2105): c1 = 'c'; c2 = '/'; c3 = 'o'; goto conv3; + case UINT32_C(0x2106): c1 = 'c'; c2 = '/'; c3 = 'u'; goto conv3; + case UINT32_C(0x2113): c1 = 'l'; goto conv1; + case UINT32_C(0x2114): c1 = 'l'; c2 = 'b'; goto conv2; + case UINT32_C(0x1F19B): c1 = '['; c2 = '3'; c3 = 'D'; c4 = ']'; goto conv4; + case UINT32_C(0x1F19C): cs = "[2ND SCR]"; goto conv_str; + case UINT32_C(0x1F1A7): c1 = '['; c2 = 'H'; c3 = 'D'; c4 = 'R'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A6): c1 = '['; c2 = 'H'; c3 = 'C'; c4 = ']'; goto conv4; + case UINT32_C(0x1F1AC): c1 = '['; c2 = 'V'; c3 = 'O'; c4 = 'D'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A3): c1 = '['; c2 = '6'; c3 = '0'; c4 = 'P'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A8): cs = "[HI-RES]"; goto conv_str; + case UINT32_C(0x1F1A4): c1 = '['; c2 = '1'; c3 = '2'; c4 = '0'; c5 = 'P'; c6 = ']'; goto conv6; + case UINT32_C(0x1F19D): c1 = '['; c2 = '2'; c3 = 'K'; c4 = ']'; goto conv4; + case UINT32_C(0x1F19E): c1 = '['; c2 = '4'; c3 = 'K'; c4 = ']'; goto conv4; + case UINT32_C(0x1F19F): c1 = '['; c2 = '8'; c3 = 'K'; c4 = ']'; goto conv4; + case UINT32_C(0x1F1AB): c1 = '['; c2 = 'U'; c3 = 'H'; c4 = 'D'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1AA): c1 = '['; c2 = 'S'; c3 = 'H'; c4 = 'V'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A9): cs = "[LOSSLESS]"; goto conv_str; + case UINT32_C(0x1F1A0): c1 = '['; c2 = '5'; c3 = '.'; c4 = '1'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A1): c1 = '['; c2 = '7'; c3 = '.'; c4 = '1'; c5 = ']'; goto conv5; + case UINT32_C(0x1F1A2): c1 = '['; c2 = '2'; c3 = '2'; c4 = '.'; c5 = '2'; c6 = ']'; goto conv6; + case UINT32_C(0x2141): c1 = 'G'; goto conv1; + case UINT32_C(0x2142): c1 = 'L'; goto conv1; + case UINT32_C(0x2144): c1 = 'Y'; goto conv1; + default: no_match: *n += clen; @@ -767,4 +826,15 @@ conv6: cp[5] = (uint_least32_t)c6; *ncp = 6u; return ret; + +conv_str: + if (*n) + goto no_conv; + *n += clen; + i = 0u; + for (; cs[i]; i++) + if (*ncp > i) + cp[i] = (uint_least32_t)cs[i]; + *ncp = i; + return ret; } diff --git a/libcharconv_symbols.c b/libcharconv_symbols.c new file mode 100644 index 0000000..dd2f0ab --- /dev/null +++ b/libcharconv_symbols.c @@ -0,0 +1,149 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" +#include <string.h> + + +static struct { + uint_least32_t cp; + const char *s; +} symbols[] = { + {UINT32_C(0x269E), "->"}, + {UINT32_C(0x269F), "<-"}, + {UINT32_C(0x2120), "SM"}, + {UINT32_C(0x2122), "TM"}, + {UINT32_C(0x00A9), "(C)"}, + {UINT32_C(0x1F1AD), "(M)"}, + {UINT32_C(0x2117), "(P)"}, + {UINT32_C(0x00AE), "(R)"}, + {UINT32_C(0x1F10D), "(0)"}, + {UINT32_C(0x1F10F), "\\($)"}, + {UINT32_C(0x1F10F), "(\\$)"}, + {UINT32_C(0x1F10F), "($\\)"}, + {UINT32_C(0x1F10F), "($)\\"}, + {UINT32_C(0x1F16E), "\\(C)"}, + {UINT32_C(0x1F16E), "(\\C)"}, + {UINT32_C(0x1F16E), "(C\\)"}, + {UINT32_C(0x1F16E), "(C)\\"}, + {UINT32_C(0x1F16D), "(CC)"}, + {UINT32_C(0x212E), "e"}, + {UINT32_C(0x2139), "i"}, + {UINT32_C(0x212A), "K"}, + {UINT32_C(0x213B), "FAX"}, + {UINT32_C(0x213B), "Fax"}, + {UINT32_C(0x2121), "TEL"}, + {UINT32_C(0x2121), "Tel"}, + {UINT32_C(0x212B), "A"}, + {UINT32_C(0x2100), "ac"}, + {UINT32_C(0x2100), "a/c"}, + {UINT32_C(0x214D), "AS"}, + {UINT32_C(0x214D), "A/S"}, + {UINT32_C(0x2101), "as"}, + {UINT32_C(0x2101), "a/s"}, + {UINT32_C(0x00AA), "a"}, + {UINT32_C(0x00BA), "o"}, + {UINT32_C(0x2300), "/o"}, + {UINT32_C(0x2300), "o/"}, + {UINT32_C(0x2116), "No"}, + {UINT32_C(0x2118), "p"}, + {UINT32_C(0x2118), "P"}, + {UINT32_C(0x214A), "PL"}, + {UINT32_C(0x211E), "Px"}, + {UINT32_C(0x211F), "R/"}, + {UINT32_C(0x211F), "/R"}, + {UINT32_C(0x2123), "V/"}, + {UINT32_C(0x2123), "/V"}, + {UINT32_C(0x2125), "z"}, + {UINT32_C(0x23E8), "10"}, + {UINT32_C(0x2104), "CL"}, + {UINT32_C(0x2105), "c/o"}, + {UINT32_C(0x2105), "co"}, + {UINT32_C(0x2105), "C/O"}, + {UINT32_C(0x2105), "CO"}, + {UINT32_C(0x2106), "c/u"}, + {UINT32_C(0x2106), "cu"}, + {UINT32_C(0x2106), "C/U"}, + {UINT32_C(0x2106), "CU"}, + {UINT32_C(0x2113), "l"}, + {UINT32_C(0x2114), "lb"}, + {UINT32_C(0x1F19B), "[3D]"}, + {UINT32_C(0x1F19B), "3D"}, + {UINT32_C(0x1F19C), "[2ND SCR]"}, + {UINT32_C(0x1F19C), "2ND SCR"}, + {UINT32_C(0x1F1A7), "[HDR]"}, + {UINT32_C(0x1F1A7), "HDR"}, + {UINT32_C(0x1F1A6), "[HC]"}, + {UINT32_C(0x1F1A6), "HC"}, + {UINT32_C(0x1F1AC), "[VOD]"}, + {UINT32_C(0x1F1AC), "VOD"}, + {UINT32_C(0x1F1A3), "[60P]"}, + {UINT32_C(0x1F1A3), "60P"}, + {UINT32_C(0x1F1A8), "[HI-RES]"}, + {UINT32_C(0x1F1A8), "HI-RES"}, + {UINT32_C(0x1F1A4), "[120P]"}, + {UINT32_C(0x1F1A4), "120P"}, + {UINT32_C(0x1F19D), "[2K]"}, + {UINT32_C(0x1F19D), "2K"}, + {UINT32_C(0x1F19E), "[4K]"}, + {UINT32_C(0x1F19E), "4K"}, + {UINT32_C(0x1F19F), "[8K]"}, + {UINT32_C(0x1F19F), "8K"}, + {UINT32_C(0x1F1AB), "[UHD]"}, + {UINT32_C(0x1F1AB), "UHD"}, + {UINT32_C(0x1F1AA), "[SHV]"}, + {UINT32_C(0x1F1AA), "SHV"}, + {UINT32_C(0x1F1A9), "[LOSSLESS]"}, + {UINT32_C(0x1F1A9), "LOSSLESS"}, + {UINT32_C(0x1F1A0), "[5.1]"}, + {UINT32_C(0x1F1A0), "5.1"}, + {UINT32_C(0x1F1A1), "[7.1]"}, + {UINT32_C(0x1F1A1), "7.1"}, + {UINT32_C(0x1F1A2), "[22.2]"}, + {UINT32_C(0x1F1A2), "22.2"}, + {UINT32_C(0x2141), "G"}, + {UINT32_C(0x2142), "L"}, + {UINT32_C(0x2144), "Y"} +}; + + +enum libcharconv_result +libcharconv_symbols(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i, len, found, found_len; + int indeterminate; + *n = 0; + for (; slen; s++, slen--, ++*n) { + indeterminate = 0; + found = SIZE_MAX; + found_len = 0u; + for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) { + len = strlen(symbols[i].s); + if (strncmp(s, symbols[i].s, len < slen ? len : slen)) + continue; + if (slen < len) { + indeterminate = 1; + continue; + } + if (len > found_len) { + found = i; + found_len = len; + } + } + if (found_len) + goto conv; + if (*n) + goto no_conv; + if (indeterminate) + return LIBCHARCONV_INDETERMINATE; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = symbols[found].cp; + *n += found_len; + *ncp = 1u; + return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED; +} |
