aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-25 15:34:56 +0100
committerMattias Andrée <m@maandree.se>2026-01-25 15:34:56 +0100
commit13e9e58d77da2c041c2d0e6ea4c648bdcd14dc2c (patch)
tree1868a36dedd32008e4566795962a7a6c1daa22eb
parentAdd flipped (diff)
downloadcharconv-13e9e58d77da2c041c2d0e6ea4c648bdcd14dc2c.tar.gz
charconv-13e9e58d77da2c041c2d0e6ea4c648bdcd14dc2c.tar.bz2
charconv-13e9e58d77da2c041c2d0e6ea4c648bdcd14dc2c.tar.xz
Add overlaid
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile6
-rw-r--r--convert-to-overlaid.c4
-rw-r--r--libcharconv.h5
-rw-r--r--libcharconv_latin.c17
-rw-r--r--libcharconv_overlaid.c66
5 files changed, 95 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 55369ae..f09b1a3 100644
--- a/Makefile
+++ b/Makefile
@@ -61,7 +61,8 @@ BIN =\
convert-to-yijing-tetragrams\
convert-to-yijing-hexagrams\
convert-to-vulgar-fractions\
- convert-to-flipped
+ convert-to-flipped\
+ convert-to-overlaid
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -111,7 +112,8 @@ LIBOBJ =\
libcharconv_yijing_tetragrams.o\
libcharconv_yijing_hexagrams.o\
libcharconv_vulgar_fractions.o\
- libcharconv_flipped.o
+ libcharconv_flipped.o\
+ libcharconv_overlaid.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-overlaid.c b/convert-to-overlaid.c
new file mode 100644
index 0000000..803cdd6
--- /dev/null
+++ b/convert-to-overlaid.c
@@ -0,0 +1,4 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+SIMPLE(libcharconv_overlaid)
diff --git a/libcharconv.h b/libcharconv.h
index 4aa9d27..289f9e8 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -333,6 +333,11 @@ LIBCHARCONV_FUNC_(libcharconv_vulgar_fractions);
*/
LIBCHARCONV_FUNC_(libcharconv_flipped);
+/**
+ * Overlay characters
+ */
+LIBCHARCONV_FUNC_(libcharconv_overlaid);
+
#undef LIBCHARCONV_FUNC_
#endif
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index c52044c..ee975e1 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -18,7 +18,7 @@ enum libcharconv_result
libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
{
enum libcharconv_result ret = LIBCHARCONV_CONVERTED;
- uint_least32_t c;
+ uint_least32_t c, cp1, cp2;
char c1, c2, c3, c4, c5, c6;
size_t i, clen;
unsigned num;
@@ -626,6 +626,10 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x215E): c1 = '7'; c2 = '/'; c3 = '8'; goto conv3;
case UINT32_C(0x2152): c1 = '1'; c2 = '/'; c3 = '1'; c4 = '0'; goto conv4;
+ /* overlaid */
+ case UINT32_C(0x203D): c1 = '!'; c2 = '?'; goto conv2;
+ case UINT32_C(0x2E18): cp1 = UINT32_C(0xBF); cp2 = UINT32_C(0xA1); goto conv2cp;
+
default:
no_match:
*n += clen;
@@ -648,6 +652,17 @@ conv:
*ncp = 1u;
return ret;
+conv2cp:
+ if (*n)
+ goto no_conv;
+ *n += clen;
+ if (*ncp >= 1u)
+ cp[0] = cp1;
+ if (*ncp >= 2u)
+ cp[1] = cp2;
+ *ncp = 2u;
+ return ret;
+
conv2:
if (*n)
goto no_conv;
diff --git a/libcharconv_overlaid.c b/libcharconv_overlaid.c
new file mode 100644
index 0000000..7f3cab1
--- /dev/null
+++ b/libcharconv_overlaid.c
@@ -0,0 +1,66 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+static struct {
+ uint_least32_t a;
+ uint_least32_t b;
+ uint_least32_t to;
+} pairs[] = {
+ {0x00BF, 0x00A1, 0x2E18},
+ {0x0021, 0x003F, 0x203D}
+};
+
+
+enum libcharconv_result
+libcharconv_overlaid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ uint_least32_t a, b;
+ size_t i, alen, blen;
+ *n = 0;
+ while (slen) {
+ alen = libcharconv_decode_utf8_(s, slen, &a);
+ if (alen > slen) {
+ if (*n)
+ goto no_conv;
+ return LIBCHARCONV_INDETERMINATE;
+ }
+ if (!alen) {
+ *n += 1u;
+ slen -= 1u;
+ s = &s[1];
+ continue;
+ }
+
+ for (i = 0u; i < sizeof(pairs) / sizeof(*pairs); i++) {
+ if (a != pairs[i].a && a != pairs[i].b)
+ continue;
+ if (*n)
+ goto no_conv;
+ if (slen == alen)
+ return LIBCHARCONV_INDETERMINATE;
+ blen = libcharconv_decode_utf8_(&s[alen], slen - alen, &b);
+ if (blen > slen)
+ return LIBCHARCONV_INDETERMINATE;
+ if (!blen)
+ goto no_conv;
+ if ((a ^ b) == (pairs[i].a ^ pairs[i].b))
+ goto conv;
+ }
+
+ *n += alen;
+ s = &s[alen];
+ slen -= alen;
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = pairs[i].to;
+ *n += alen + blen;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+}