Add ocr

Signed-off-by: Mattias Andrée <m@maandree.se>
author: Mattias Andrée <m@maandree.se> 2026-01-24 23:41:48 +0100
committer: Mattias Andrée <m@maandree.se> 2026-01-24 23:41:48 +0100
commit: 0b2c9a1f4a4acaa8005c216dd331a91670e92cae (patch)
tree: ff0187838e4ab2eea1d2503760abb434d05ed5e4
parent: Improve go (diff)
download: charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.gz
charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.bz2
charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.xz
5 files changed, 119 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 771cce9..a33bec8 100644
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,8 @@ BIN =\
 	convert-to-lydian\
 	convert-to-lycian\
 	convert-to-domino-tiles\
-	convert-to-clock-faces
+	convert-to-clock-faces\
+	convert-to-ocr
 
 LIBOBJ =\
 	libcharconv_decode_utf8_.o\
@@ -89,7 +90,8 @@ LIBOBJ =\
 	libcharconv_lycian.o\
 	libcharconv_domino_tiles_horizontal.o\
 	libcharconv_domino_tiles_vertical.o\
-	libcharconv_clock_faces.o
+	libcharconv_clock_faces.o\
+	libcharconv_ocr.o
 
 LOBJ = $(LIBOBJ:.o=.lo)
 
diff --git a/convert-to-ocr.c b/convert-to-ocr.c
new file mode 100644
index 0000000..c0cfab0
--- /dev/null
+++ b/convert-to-ocr.c
@@ -0,0 +1,18 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+USAGE("");
+
+
+int
+main(int argc, char *argv[])
+{
+	ARGBEGIN {
+	default:
+		usage();
+	} ARGEND;
+	if (argc)
+		usage();
+
+	return convert(&libcharconv_ocr);
+}
diff --git a/libcharconv.h b/libcharconv.h
index b1d6599..f8d15c1 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -1202,4 +1202,48 @@ enum libcharconv_result libcharconv_domino_tiles_vertical(const char *s, size_t
 enum libcharconv_result libcharconv_clock_faces(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
 
 
+/**
+ * Convert
+ * 'S' and 's' to OCR HOOK,
+ * 'd' and 'h' to OCR CHAIR,
+ * 'Y' to OCR FORK,
+ * 'A' to OCR INVERTED FORK,
+ * 'I' to OCR BELT BUCKLE,
+ * 'X' and 'x' to OCR BOW TIE,
+ * '\' to OCR DOUBLE BACKSLASH,
+ * 'C' and 'c' to OCR BRANCH BANK IDENTIFICATION,
+ * '/' to OCR AMOUNT OF CHECK,
+ * 'P' and 'p' to OCR DASH,
+ * '_' to OCR CUSTOMER ACCOUNT NUMBER, and
+ * 'E' to HELLSCHREIBER PAUSE SYMBOL
+ * 
+ * @param   s     Text to convert
+ * @param   slen  The number of bytes available in `s`
+ * @param   n     Output parameter for the number of consumed bytes
+ * @param   cp    Output buffer for the codepoints
+ * @param   ncp   Input parameter for the number of codepoints that
+ *                fit in `cp`, and output parameter for the number
+ *                of output codepoints (if it exceeds the original
+ *                value of `ncp`, a larger buffer is needed)
+ * @return        LIBCHARCONV_NO_CONVERT:
+ *                  `*n` is the number of bytes from the beginning
+ *                  of `s` that cannot be converted
+ *                LIBCHARCONV_CONVERTED:
+ *                  `*n` is the number of bytes from the beginning
+ *                  of `s` that was converted to a codepoint which
+ *                  is stored in `*cp`
+ *                LIBCHARCONV_INDETERMINATE:
+ *                  If all text has been input, no more can be
+ *                  converted, otherwise more of the text most
+ *                  be made available before the function can
+ *                  determine whether the beginning of `s` can be
+ *                  converted or what it should be converted to
+ *                LIBCHARCONV_CONVERT_IF_END:
+ *                  As LIBCHARCONV_CONVERTED the entire text has
+ *                  been input, as LIBCHARCONV_INDETERMINATE
+ *                  otherwise
+ */
+enum libcharconv_result libcharconv_ocr(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+
+
 #endif
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 5179388..68bdd7d 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -350,6 +350,11 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
 			c5 = '0';
 			goto conv5;
 
+		} else if (UINT32_C(0x2440) <= c && c <= UINT32_C(0x244A)) {
+			/* ocr */
+			c = (uint_least32_t)"SdYAIXC/P_\\"[c - UINT32_C(0x2440)];
+			goto conv;
+
 		} else {
 			switch (c) {
 			/* shogi */
@@ -488,6 +493,9 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
 			case UINT32_C(0x1F030): c1 = '#'; c2 = '|'; c3 = '#'; goto conv3;
 			case UINT32_C(0x1F062): c1 = '#'; c2 = '-'; c3 = '#'; goto conv3;
 
+			/* ocr (telegraphing) */
+			case UINT32_C(0x2BFF): c1 = 'E'; goto conv1;
+
 			default:
 			no_match:
 				*n += clen;
diff --git a/libcharconv_ocr.c b/libcharconv_ocr.c
new file mode 100644
index 0000000..a849952
--- /dev/null
+++ b/libcharconv_ocr.c
@@ -0,0 +1,45 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+enum libcharconv_result
+libcharconv_ocr(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+	uint_least32_t c;
+	*n = 0;
+	for (; slen--; s++) {
+		switch (*s) {
+		case 's':
+		case 'S': c = UINT32_C(0x2440); goto conv;
+		case 'h':
+		case 'd': c = UINT32_C(0x2441); goto conv;
+		case 'Y': c = UINT32_C(0x2442); goto conv;
+		case 'A': c = UINT32_C(0x2443); goto conv;
+		case 'I': c = UINT32_C(0x2444); goto conv;
+		case 'x':
+		case 'X': c = UINT32_C(0x2445); goto conv;
+		case '\\': c = UINT32_C(0x244A); goto conv;
+		case 'c':
+		case 'C': c = UINT32_C(0x2446); goto conv;
+		case '/': c = UINT32_C(0x2447); goto conv;
+		case 'p':
+		case 'P': c = UINT32_C(0x2448); goto conv;
+		case '_': c = UINT32_C(0x2449); goto conv;
+		case 'E': c = UINT32_C(0x2BFF); goto conv;
+		default:
+			*n += 1u;
+			break;
+		}
+	}
+no_conv:
+	return LIBCHARCONV_NO_CONVERT;
+
+conv:
+	if (*n)
+		goto no_conv;
+	if (*ncp)
+		*cp = c;
+	*n += 1u;
+	*ncp = 1u;
+	return LIBCHARCONV_CONVERTED;
+}
author	Mattias Andrée <m@maandree.se>	2026-01-24 23:41:48 +0100
committer	Mattias Andrée <m@maandree.se>	2026-01-24 23:41:48 +0100
commit	0b2c9a1f4a4acaa8005c216dd331a91670e92cae (patch)
tree	ff0187838e4ab2eea1d2503760abb434d05ed5e4
parent	Improve go (diff)
download	charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.gz charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.bz2 charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.xz