aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-24 23:41:48 +0100
committerMattias Andrée <m@maandree.se>2026-01-24 23:41:48 +0100
commit0b2c9a1f4a4acaa8005c216dd331a91670e92cae (patch)
treeff0187838e4ab2eea1d2503760abb434d05ed5e4
parentImprove go (diff)
downloadcharconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.gz
charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.bz2
charconv-0b2c9a1f4a4acaa8005c216dd331a91670e92cae.tar.xz
Add ocr
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to '')
-rw-r--r--Makefile6
-rw-r--r--convert-to-ocr.c18
-rw-r--r--libcharconv.h44
-rw-r--r--libcharconv_latin.c8
-rw-r--r--libcharconv_ocr.c45
5 files changed, 119 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 771cce9..a33bec8 100644
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,8 @@ BIN =\
convert-to-lydian\
convert-to-lycian\
convert-to-domino-tiles\
- convert-to-clock-faces
+ convert-to-clock-faces\
+ convert-to-ocr
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -89,7 +90,8 @@ LIBOBJ =\
libcharconv_lycian.o\
libcharconv_domino_tiles_horizontal.o\
libcharconv_domino_tiles_vertical.o\
- libcharconv_clock_faces.o
+ libcharconv_clock_faces.o\
+ libcharconv_ocr.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-ocr.c b/convert-to-ocr.c
new file mode 100644
index 0000000..c0cfab0
--- /dev/null
+++ b/convert-to-ocr.c
@@ -0,0 +1,18 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+USAGE("");
+
+
+int
+main(int argc, char *argv[])
+{
+ ARGBEGIN {
+ default:
+ usage();
+ } ARGEND;
+ if (argc)
+ usage();
+
+ return convert(&libcharconv_ocr);
+}
diff --git a/libcharconv.h b/libcharconv.h
index b1d6599..f8d15c1 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -1202,4 +1202,48 @@ enum libcharconv_result libcharconv_domino_tiles_vertical(const char *s, size_t
enum libcharconv_result libcharconv_clock_faces(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+/**
+ * Convert
+ * 'S' and 's' to OCR HOOK,
+ * 'd' and 'h' to OCR CHAIR,
+ * 'Y' to OCR FORK,
+ * 'A' to OCR INVERTED FORK,
+ * 'I' to OCR BELT BUCKLE,
+ * 'X' and 'x' to OCR BOW TIE,
+ * '\' to OCR DOUBLE BACKSLASH,
+ * 'C' and 'c' to OCR BRANCH BANK IDENTIFICATION,
+ * '/' to OCR AMOUNT OF CHECK,
+ * 'P' and 'p' to OCR DASH,
+ * '_' to OCR CUSTOMER ACCOUNT NUMBER, and
+ * 'E' to HELLSCHREIBER PAUSE SYMBOL
+ *
+ * @param s Text to convert
+ * @param slen The number of bytes available in `s`
+ * @param n Output parameter for the number of consumed bytes
+ * @param cp Output buffer for the codepoints
+ * @param ncp Input parameter for the number of codepoints that
+ * fit in `cp`, and output parameter for the number
+ * of output codepoints (if it exceeds the original
+ * value of `ncp`, a larger buffer is needed)
+ * @return LIBCHARCONV_NO_CONVERT:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that cannot be converted
+ * LIBCHARCONV_CONVERTED:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that was converted to a codepoint which
+ * is stored in `*cp`
+ * LIBCHARCONV_INDETERMINATE:
+ * If all text has been input, no more can be
+ * converted, otherwise more of the text most
+ * be made available before the function can
+ * determine whether the beginning of `s` can be
+ * converted or what it should be converted to
+ * LIBCHARCONV_CONVERT_IF_END:
+ * As LIBCHARCONV_CONVERTED the entire text has
+ * been input, as LIBCHARCONV_INDETERMINATE
+ * otherwise
+ */
+enum libcharconv_result libcharconv_ocr(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+
+
#endif
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 5179388..68bdd7d 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -350,6 +350,11 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c5 = '0';
goto conv5;
+ } else if (UINT32_C(0x2440) <= c && c <= UINT32_C(0x244A)) {
+ /* ocr */
+ c = (uint_least32_t)"SdYAIXC/P_\\"[c - UINT32_C(0x2440)];
+ goto conv;
+
} else {
switch (c) {
/* shogi */
@@ -488,6 +493,9 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x1F030): c1 = '#'; c2 = '|'; c3 = '#'; goto conv3;
case UINT32_C(0x1F062): c1 = '#'; c2 = '-'; c3 = '#'; goto conv3;
+ /* ocr (telegraphing) */
+ case UINT32_C(0x2BFF): c1 = 'E'; goto conv1;
+
default:
no_match:
*n += clen;
diff --git a/libcharconv_ocr.c b/libcharconv_ocr.c
new file mode 100644
index 0000000..a849952
--- /dev/null
+++ b/libcharconv_ocr.c
@@ -0,0 +1,45 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+enum libcharconv_result
+libcharconv_ocr(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ uint_least32_t c;
+ *n = 0;
+ for (; slen--; s++) {
+ switch (*s) {
+ case 's':
+ case 'S': c = UINT32_C(0x2440); goto conv;
+ case 'h':
+ case 'd': c = UINT32_C(0x2441); goto conv;
+ case 'Y': c = UINT32_C(0x2442); goto conv;
+ case 'A': c = UINT32_C(0x2443); goto conv;
+ case 'I': c = UINT32_C(0x2444); goto conv;
+ case 'x':
+ case 'X': c = UINT32_C(0x2445); goto conv;
+ case '\\': c = UINT32_C(0x244A); goto conv;
+ case 'c':
+ case 'C': c = UINT32_C(0x2446); goto conv;
+ case '/': c = UINT32_C(0x2447); goto conv;
+ case 'p':
+ case 'P': c = UINT32_C(0x2448); goto conv;
+ case '_': c = UINT32_C(0x2449); goto conv;
+ case 'E': c = UINT32_C(0x2BFF); goto conv;
+ default:
+ *n += 1u;
+ break;
+ }
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = c;
+ *n += 1u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+}