aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-24 23:59:59 +0100
committerMattias Andrée <m@maandree.se>2026-01-24 23:59:59 +0100
commitce4e00037288939d9757cd29e28de82f8d92db76 (patch)
treee99ec793a5c1d756cd44280ab780b5ca6de7a483
parentAdd ocr (diff)
downloadcharconv-ce4e00037288939d9757cd29e28de82f8d92db76.tar.gz
charconv-ce4e00037288939d9757cd29e28de82f8d92db76.tar.bz2
charconv-ce4e00037288939d9757cd29e28de82f8d92db76.tar.xz
Add crop marks
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile6
-rw-r--r--convert-to-crop-marks.c18
-rw-r--r--libcharconv.h32
-rw-r--r--libcharconv_crop_marks.c59
-rw-r--r--libcharconv_latin.c6
5 files changed, 119 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index a33bec8..f2e0bb9 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,8 @@ BIN =\
convert-to-lycian\
convert-to-domino-tiles\
convert-to-clock-faces\
- convert-to-ocr
+ convert-to-ocr\
+ convert-to-crop-marks
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -91,7 +92,8 @@ LIBOBJ =\
libcharconv_domino_tiles_horizontal.o\
libcharconv_domino_tiles_vertical.o\
libcharconv_clock_faces.o\
- libcharconv_ocr.o
+ libcharconv_ocr.o\
+ libcharconv_crop_marks.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-crop-marks.c b/convert-to-crop-marks.c
new file mode 100644
index 0000000..cc33009
--- /dev/null
+++ b/convert-to-crop-marks.c
@@ -0,0 +1,18 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+USAGE("");
+
+
+int
+main(int argc, char *argv[])
+{
+ ARGBEGIN {
+ default:
+ usage();
+ } ARGEND;
+ if (argc)
+ usage();
+
+ return convert(&libcharconv_crop_marks);
+}
diff --git a/libcharconv.h b/libcharconv.h
index f8d15c1..5b165b6 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -1246,4 +1246,36 @@ enum libcharconv_result libcharconv_clock_faces(const char *s, size_t slen, size
enum libcharconv_result libcharconv_ocr(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+/**
+ * Convert "-|", "|-", "_|", and "|_" to CROP marks
+ *
+ * @param s Text to convert
+ * @param slen The number of bytes available in `s`
+ * @param n Output parameter for the number of consumed bytes
+ * @param cp Output buffer for the codepoints
+ * @param ncp Input parameter for the number of codepoints that
+ * fit in `cp`, and output parameter for the number
+ * of output codepoints (if it exceeds the original
+ * value of `ncp`, a larger buffer is needed)
+ * @return LIBCHARCONV_NO_CONVERT:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that cannot be converted
+ * LIBCHARCONV_CONVERTED:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that was converted to a codepoint which
+ * is stored in `*cp`
+ * LIBCHARCONV_INDETERMINATE:
+ * If all text has been input, no more can be
+ * converted, otherwise more of the text most
+ * be made available before the function can
+ * determine whether the beginning of `s` can be
+ * converted or what it should be converted to
+ * LIBCHARCONV_CONVERT_IF_END:
+ * As LIBCHARCONV_CONVERTED the entire text has
+ * been input, as LIBCHARCONV_INDETERMINATE
+ * otherwise
+ */
+enum libcharconv_result libcharconv_crop_marks(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+
+
#endif
diff --git a/libcharconv_crop_marks.c b/libcharconv_crop_marks.c
new file mode 100644
index 0000000..81d3aa1
--- /dev/null
+++ b/libcharconv_crop_marks.c
@@ -0,0 +1,59 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+enum libcharconv_result
+libcharconv_crop_marks(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ uint_least32_t c;
+ *n = 0;
+ for (; slen--; s++) {
+ if (s[0] == '|') {
+ if (!slen)
+ goto indeterminate;
+ if (s[1] == '-')
+ c = UINT32_C(0x230C);
+ else if (s[1] == '_')
+ c = UINT32_C(0x230E);
+ else
+ goto no_conv;
+ goto conv;
+ } else if (s[0] == '-') {
+ if (!slen)
+ goto indeterminate;
+ if (s[1] != '|')
+ goto no_conv;
+ c = UINT32_C(0x230D);
+ goto conv;
+ } else if (s[0] == '_') {
+ if (!slen)
+ goto indeterminate;
+ if (s[1] != '|')
+ goto no_conv;
+ c = UINT32_C(0x230F);
+ goto conv;
+ } else {
+ *n += 1u;
+ }
+ }
+ return LIBCHARCONV_NO_CONVERT;
+
+no_conv:
+ if (!*n)
+ *n = 1u;
+ return LIBCHARCONV_NO_CONVERT;
+
+indeterminate:
+ if (*n)
+ goto no_conv;
+ return LIBCHARCONV_INDETERMINATE;
+
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = c;
+ *n += 2u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+}
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 68bdd7d..de5b618 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -496,6 +496,12 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
/* ocr (telegraphing) */
case UINT32_C(0x2BFF): c1 = 'E'; goto conv1;
+ /* crop marks */
+ case UINT32_C(0x230C): c1 = '|'; c2 = '-'; goto conv2;
+ case UINT32_C(0x230D): c1 = '-'; c2 = '|'; goto conv2;
+ case UINT32_C(0x230E): c1 = '|'; c2 = '_'; goto conv2;
+ case UINT32_C(0x230F): c1 = '_'; c2 = '|'; goto conv2;
+
default:
no_match:
*n += clen;