aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-25 18:17:01 +0100
committerMattias Andrée <m@maandree.se>2026-01-25 18:17:01 +0100
commitaec513e234367b738ce6bf52bf4fab8fba0928c1 (patch)
tree82f8f8bac275e39ef63ff1df68d1eac1c3649056
parentAdd negative (diff)
downloadcharconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.gz
charconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.bz2
charconv-aec513e234367b738ce6bf52bf4fab8fba0928c1.tar.xz
Add symbols
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile6
-rw-r--r--convert-to-symbols.c4
-rw-r--r--libcharconv.h5
-rw-r--r--libcharconv_latin.c70
-rw-r--r--libcharconv_symbols.c149
5 files changed, 232 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 520abea..b917d65 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,8 @@ BIN =\
convert-to-sora-sompeng\
convert-to-tally-marks\
convert-to-ideographic-tally-marks\
- convert-to-negative
+ convert-to-negative\
+ convert-to-symbols
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -129,7 +130,8 @@ LIBOBJ =\
libcharconv_sora_sompeng.o\
libcharconv_tally_marks.o\
libcharconv_ideographic_tally_marks.o\
- libcharconv_negative.o
+ libcharconv_negative.o\
+ libcharconv_symbols.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-symbols.c b/convert-to-symbols.c
new file mode 100644
index 0000000..a381e0d
--- /dev/null
+++ b/convert-to-symbols.c
@@ -0,0 +1,4 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+SIMPLE(libcharconv_symbols)
diff --git a/libcharconv.h b/libcharconv.h
index 966b8ec..e220062 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -378,6 +378,11 @@ LIBCHARCONV_FUNC_(libcharconv_ideographic_tally_marks);
*/
LIBCHARCONV_FUNC_(libcharconv_negative);
+/**
+ * Convert characters and character sequences to symbols
+ */
+LIBCHARCONV_FUNC_(libcharconv_symbols);
+
#undef LIBCHARCONV_FUNC_
#endif
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 1961b18..d8c27d0 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -20,6 +20,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
enum libcharconv_result ret = LIBCHARCONV_CONVERTED;
uint_least32_t c;
char c1, c2, c3, c4, c5, c6;
+ const char *cs;
size_t i, clen;
unsigned num;
@@ -669,6 +670,64 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x13D75): c1 = '4'; goto conv1;
case UINT32_C(0x13D76): c1 = '5'; goto conv1;
+ /* symbols */
+ case UINT32_C(0x269E): c1 = '-'; c2 = '>'; goto conv2;
+ case UINT32_C(0x269F): c1 = '<'; c2 = '-'; goto conv2;
+ case UINT32_C(0x2120): c1 = 'S'; c2 = 'M'; goto conv2;
+ case UINT32_C(0x2122): c1 = 'T'; c2 = 'M'; goto conv2;
+ case UINT32_C(0x00A9): c1 = '('; c2 = 'C'; c3 = ')'; goto conv3;
+ case UINT32_C(0x1F1AD): c1 = '('; c2 = 'M'; c3 = ')'; goto conv3;
+ case UINT32_C(0x2117): c1 = '('; c2 = 'P'; c3 = ')'; goto conv3;
+ case UINT32_C(0x00AE): c1 = '('; c2 = 'R'; c3 = ')'; goto conv3;
+ case UINT32_C(0x1F10D): c1 = '('; c2 = '0'; c3 = ')'; goto conv3;
+ case UINT32_C(0x1F10F): c1 = '\\'; c2 = '('; c3 = '$'; c4 = ')'; goto conv4;
+ case UINT32_C(0x1F16E): c1 = '\\'; c2 = '('; c3 = 'C'; c4 = ')'; goto conv4;
+ case UINT32_C(0x1F16D): c1 = '('; c2 = 'C'; c3 = 'C'; c4 = ')'; goto conv4;
+ case UINT32_C(0x212E): c1 = 'e'; goto conv1;
+ case UINT32_C(0x2139): c1 = 'i'; goto conv1;
+ case UINT32_C(0x212A): c1 = 'K'; goto conv1;
+ case UINT32_C(0x213B): c1 = 'F'; c2 = 'A'; c3 = 'X'; goto conv3;
+ case UINT32_C(0x2121): c1 = 'T'; c2 = 'E'; c3 = 'L'; goto conv3;
+ case UINT32_C(0x2100): c1 = 'a'; c2 = '/'; c3 = 'c'; goto conv3;
+ case UINT32_C(0x214D): c1 = 'A'; c2 = '/'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x2101): c1 = 'a'; c2 = '/'; c3 = 's'; goto conv3;
+ case UINT32_C(0x00AA): c1 = 'a'; goto conv1;
+ case UINT32_C(0x00BA): c1 = 'o'; goto conv1;
+ case UINT32_C(0x2300): c1 = '/'; c2 = 'o'; goto conv2;
+ case UINT32_C(0x2116): c1 = 'N'; c2 = 'o'; goto conv2;
+ case UINT32_C(0x2118): c1 = 'P'; goto conv1;
+ case UINT32_C(0x214A): c1 = 'P'; c2 = 'L'; goto conv2;
+ case UINT32_C(0x211E): c1 = 'P'; c2 = 'x'; goto conv2;
+ case UINT32_C(0x211F): c1 = 'R'; c2 = '/'; goto conv2;
+ case UINT32_C(0x2123): c1 = 'V'; c2 = '/'; goto conv2;
+ case UINT32_C(0x2125): c1 = 'z'; goto conv1;
+ case UINT32_C(0x23E8): c1 = '1'; c2 = '0'; goto conv2;
+ case UINT32_C(0x2104): c1 = 'C'; c2 = 'L'; goto conv2;
+ case UINT32_C(0x2105): c1 = 'c'; c2 = '/'; c3 = 'o'; goto conv3;
+ case UINT32_C(0x2106): c1 = 'c'; c2 = '/'; c3 = 'u'; goto conv3;
+ case UINT32_C(0x2113): c1 = 'l'; goto conv1;
+ case UINT32_C(0x2114): c1 = 'l'; c2 = 'b'; goto conv2;
+ case UINT32_C(0x1F19B): c1 = '['; c2 = '3'; c3 = 'D'; c4 = ']'; goto conv4;
+ case UINT32_C(0x1F19C): cs = "[2ND SCR]"; goto conv_str;
+ case UINT32_C(0x1F1A7): c1 = '['; c2 = 'H'; c3 = 'D'; c4 = 'R'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A6): c1 = '['; c2 = 'H'; c3 = 'C'; c4 = ']'; goto conv4;
+ case UINT32_C(0x1F1AC): c1 = '['; c2 = 'V'; c3 = 'O'; c4 = 'D'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A3): c1 = '['; c2 = '6'; c3 = '0'; c4 = 'P'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A8): cs = "[HI-RES]"; goto conv_str;
+ case UINT32_C(0x1F1A4): c1 = '['; c2 = '1'; c3 = '2'; c4 = '0'; c5 = 'P'; c6 = ']'; goto conv6;
+ case UINT32_C(0x1F19D): c1 = '['; c2 = '2'; c3 = 'K'; c4 = ']'; goto conv4;
+ case UINT32_C(0x1F19E): c1 = '['; c2 = '4'; c3 = 'K'; c4 = ']'; goto conv4;
+ case UINT32_C(0x1F19F): c1 = '['; c2 = '8'; c3 = 'K'; c4 = ']'; goto conv4;
+ case UINT32_C(0x1F1AB): c1 = '['; c2 = 'U'; c3 = 'H'; c4 = 'D'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1AA): c1 = '['; c2 = 'S'; c3 = 'H'; c4 = 'V'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A9): cs = "[LOSSLESS]"; goto conv_str;
+ case UINT32_C(0x1F1A0): c1 = '['; c2 = '5'; c3 = '.'; c4 = '1'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A1): c1 = '['; c2 = '7'; c3 = '.'; c4 = '1'; c5 = ']'; goto conv5;
+ case UINT32_C(0x1F1A2): c1 = '['; c2 = '2'; c3 = '2'; c4 = '.'; c5 = '2'; c6 = ']'; goto conv6;
+ case UINT32_C(0x2141): c1 = 'G'; goto conv1;
+ case UINT32_C(0x2142): c1 = 'L'; goto conv1;
+ case UINT32_C(0x2144): c1 = 'Y'; goto conv1;
+
default:
no_match:
*n += clen;
@@ -767,4 +826,15 @@ conv6:
cp[5] = (uint_least32_t)c6;
*ncp = 6u;
return ret;
+
+conv_str:
+ if (*n)
+ goto no_conv;
+ *n += clen;
+ i = 0u;
+ for (; cs[i]; i++)
+ if (*ncp > i)
+ cp[i] = (uint_least32_t)cs[i];
+ *ncp = i;
+ return ret;
}
diff --git a/libcharconv_symbols.c b/libcharconv_symbols.c
new file mode 100644
index 0000000..dd2f0ab
--- /dev/null
+++ b/libcharconv_symbols.c
@@ -0,0 +1,149 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+#include <string.h>
+
+
+static struct {
+ uint_least32_t cp;
+ const char *s;
+} symbols[] = {
+ {UINT32_C(0x269E), "->"},
+ {UINT32_C(0x269F), "<-"},
+ {UINT32_C(0x2120), "SM"},
+ {UINT32_C(0x2122), "TM"},
+ {UINT32_C(0x00A9), "(C)"},
+ {UINT32_C(0x1F1AD), "(M)"},
+ {UINT32_C(0x2117), "(P)"},
+ {UINT32_C(0x00AE), "(R)"},
+ {UINT32_C(0x1F10D), "(0)"},
+ {UINT32_C(0x1F10F), "\\($)"},
+ {UINT32_C(0x1F10F), "(\\$)"},
+ {UINT32_C(0x1F10F), "($\\)"},
+ {UINT32_C(0x1F10F), "($)\\"},
+ {UINT32_C(0x1F16E), "\\(C)"},
+ {UINT32_C(0x1F16E), "(\\C)"},
+ {UINT32_C(0x1F16E), "(C\\)"},
+ {UINT32_C(0x1F16E), "(C)\\"},
+ {UINT32_C(0x1F16D), "(CC)"},
+ {UINT32_C(0x212E), "e"},
+ {UINT32_C(0x2139), "i"},
+ {UINT32_C(0x212A), "K"},
+ {UINT32_C(0x213B), "FAX"},
+ {UINT32_C(0x213B), "Fax"},
+ {UINT32_C(0x2121), "TEL"},
+ {UINT32_C(0x2121), "Tel"},
+ {UINT32_C(0x212B), "A"},
+ {UINT32_C(0x2100), "ac"},
+ {UINT32_C(0x2100), "a/c"},
+ {UINT32_C(0x214D), "AS"},
+ {UINT32_C(0x214D), "A/S"},
+ {UINT32_C(0x2101), "as"},
+ {UINT32_C(0x2101), "a/s"},
+ {UINT32_C(0x00AA), "a"},
+ {UINT32_C(0x00BA), "o"},
+ {UINT32_C(0x2300), "/o"},
+ {UINT32_C(0x2300), "o/"},
+ {UINT32_C(0x2116), "No"},
+ {UINT32_C(0x2118), "p"},
+ {UINT32_C(0x2118), "P"},
+ {UINT32_C(0x214A), "PL"},
+ {UINT32_C(0x211E), "Px"},
+ {UINT32_C(0x211F), "R/"},
+ {UINT32_C(0x211F), "/R"},
+ {UINT32_C(0x2123), "V/"},
+ {UINT32_C(0x2123), "/V"},
+ {UINT32_C(0x2125), "z"},
+ {UINT32_C(0x23E8), "10"},
+ {UINT32_C(0x2104), "CL"},
+ {UINT32_C(0x2105), "c/o"},
+ {UINT32_C(0x2105), "co"},
+ {UINT32_C(0x2105), "C/O"},
+ {UINT32_C(0x2105), "CO"},
+ {UINT32_C(0x2106), "c/u"},
+ {UINT32_C(0x2106), "cu"},
+ {UINT32_C(0x2106), "C/U"},
+ {UINT32_C(0x2106), "CU"},
+ {UINT32_C(0x2113), "l"},
+ {UINT32_C(0x2114), "lb"},
+ {UINT32_C(0x1F19B), "[3D]"},
+ {UINT32_C(0x1F19B), "3D"},
+ {UINT32_C(0x1F19C), "[2ND SCR]"},
+ {UINT32_C(0x1F19C), "2ND SCR"},
+ {UINT32_C(0x1F1A7), "[HDR]"},
+ {UINT32_C(0x1F1A7), "HDR"},
+ {UINT32_C(0x1F1A6), "[HC]"},
+ {UINT32_C(0x1F1A6), "HC"},
+ {UINT32_C(0x1F1AC), "[VOD]"},
+ {UINT32_C(0x1F1AC), "VOD"},
+ {UINT32_C(0x1F1A3), "[60P]"},
+ {UINT32_C(0x1F1A3), "60P"},
+ {UINT32_C(0x1F1A8), "[HI-RES]"},
+ {UINT32_C(0x1F1A8), "HI-RES"},
+ {UINT32_C(0x1F1A4), "[120P]"},
+ {UINT32_C(0x1F1A4), "120P"},
+ {UINT32_C(0x1F19D), "[2K]"},
+ {UINT32_C(0x1F19D), "2K"},
+ {UINT32_C(0x1F19E), "[4K]"},
+ {UINT32_C(0x1F19E), "4K"},
+ {UINT32_C(0x1F19F), "[8K]"},
+ {UINT32_C(0x1F19F), "8K"},
+ {UINT32_C(0x1F1AB), "[UHD]"},
+ {UINT32_C(0x1F1AB), "UHD"},
+ {UINT32_C(0x1F1AA), "[SHV]"},
+ {UINT32_C(0x1F1AA), "SHV"},
+ {UINT32_C(0x1F1A9), "[LOSSLESS]"},
+ {UINT32_C(0x1F1A9), "LOSSLESS"},
+ {UINT32_C(0x1F1A0), "[5.1]"},
+ {UINT32_C(0x1F1A0), "5.1"},
+ {UINT32_C(0x1F1A1), "[7.1]"},
+ {UINT32_C(0x1F1A1), "7.1"},
+ {UINT32_C(0x1F1A2), "[22.2]"},
+ {UINT32_C(0x1F1A2), "22.2"},
+ {UINT32_C(0x2141), "G"},
+ {UINT32_C(0x2142), "L"},
+ {UINT32_C(0x2144), "Y"}
+};
+
+
+enum libcharconv_result
+libcharconv_symbols(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ size_t i, len, found, found_len;
+ int indeterminate;
+ *n = 0;
+ for (; slen; s++, slen--, ++*n) {
+ indeterminate = 0;
+ found = SIZE_MAX;
+ found_len = 0u;
+ for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) {
+ len = strlen(symbols[i].s);
+ if (strncmp(s, symbols[i].s, len < slen ? len : slen))
+ continue;
+ if (slen < len) {
+ indeterminate = 1;
+ continue;
+ }
+ if (len > found_len) {
+ found = i;
+ found_len = len;
+ }
+ }
+ if (found_len)
+ goto conv;
+ if (*n)
+ goto no_conv;
+ if (indeterminate)
+ return LIBCHARCONV_INDETERMINATE;
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = symbols[found].cp;
+ *n += found_len;
+ *ncp = 1u;
+ return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED;
+}