aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-24 14:39:44 +0100
committerMattias Andrée <m@maandree.se>2026-01-24 14:39:44 +0100
commitd554a18e5f981ae200f7f8a3e46d5da93190e897 (patch)
tree99907dc58e28160b3814a901656f2d915b32e229
parentFirst commit (diff)
downloadcharconv-d554a18e5f981ae200f7f8a3e46d5da93190e897.tar.gz
charconv-d554a18e5f981ae200f7f8a3e46d5da93190e897.tar.bz2
charconv-d554a18e5f981ae200f7f8a3e46d5da93190e897.tar.xz
Add replacement
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile6
-rw-r--r--convert-to-replacement.c18
-rw-r--r--libcharconv.h34
-rw-r--r--libcharconv_latin.c14
-rw-r--r--libcharconv_replacement.c42
5 files changed, 112 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 61f9469..76d3333 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,8 @@ BIN =\
convert-to-bold-fraktur\
convert-to-script\
convert-to-bold-script\
- convert-to-buhid
+ convert-to-buhid\
+ convert-to-replacement
LIBOBJ =\
libcharconv_shogi.o\
@@ -69,7 +70,8 @@ LIBOBJ =\
libcharconv_bold_fraktur.o\
libcharconv_script.o\
libcharconv_bold_script.o\
- libcharconv_buhid.o
+ libcharconv_buhid.o\
+ libcharconv_replacement.o
HDR =\
libcharconv.h
diff --git a/convert-to-replacement.c b/convert-to-replacement.c
new file mode 100644
index 0000000..6407b52
--- /dev/null
+++ b/convert-to-replacement.c
@@ -0,0 +1,18 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+USAGE("");
+
+
+int
+main(int argc, char *argv[])
+{
+ ARGBEGIN {
+ default:
+ usage();
+ } ARGEND;
+ if (argc)
+ usage();
+
+ return convert(&libcharconv_replacement);
+}
diff --git a/libcharconv.h b/libcharconv.h
index aa86a16..8e3c4bc 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -867,4 +867,38 @@ enum libcharconv_result libcharconv_bold_script(const char *s, size_t slen, size
enum libcharconv_result libcharconv_buhid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+/**
+ * Convert
+ * "obj" to OBJECT REPLACEMENT CHARACTER, and
+ * "?" to REPLACEMENT CHARACTER
+ *
+ * @param s Text to convert
+ * @param slen The number of bytes available in `s`
+ * @param n Output parameter for the number of consumed bytes
+ * @param cp Output buffer for the codepoints
+ * @param ncp Input parameter for the number of codepoints that
+ * fit in `cp`, and output parameter for the number
+ * of output codepoints (if it exceeds the original
+ * value of `ncp`, a larger buffer is needed)
+ * @return LIBCHARCONV_NO_CONVERT:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that cannot be converted
+ * LIBCHARCONV_CONVERTED:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that was converted to a codepoint which
+ * is stored in `*cp`
+ * LIBCHARCONV_INDETERMINATE:
+ * If all text has been input, no more can be
+ * converted, otherwise more of the text most
+ * be made available before the function can
+ * determine whether the beginning of `s` can be
+ * converted or what it should be converted to
+ * LIBCHARCONV_CONVERT_IF_END:
+ * As LIBCHARCONV_CONVERTED the entire text has
+ * been input, as LIBCHARCONV_INDETERMINATE
+ * otherwise
+ */
+enum libcharconv_result libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+
+
#endif
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index d682b17..be0b53c 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -367,6 +367,16 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
}
goto conv2_prechecked;
+ /* replacement */
+ case UINT32_C(0xFFFC):
+ c1 = 'o';
+ c2 = 'b';
+ c3 = 'j';
+ goto conv3;
+ case UINT32_C(0xFFFD):
+ c = (uint_least32_t)'?';
+ goto conv;
+
default:
no_match:
*n += clen;
@@ -398,6 +408,10 @@ conv2_prechecked:
*ncp = 2u;
return ret;
+conv3:
+ if (*n)
+ goto no_conv;
+ *n += clen;
conv3_prechecked:
if (*ncp >= 1u)
cp[0] = (uint_least32_t)c1;
diff --git a/libcharconv_replacement.c b/libcharconv_replacement.c
new file mode 100644
index 0000000..a9bd3c5
--- /dev/null
+++ b/libcharconv_replacement.c
@@ -0,0 +1,42 @@
+/* See LICENSE file for copyright and license details. */
+#include "libcharconv.h"
+#include <strings.h>
+
+
+enum libcharconv_result
+libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ uint_least32_t c;
+ *n = 0;
+ while (slen) {
+ if (slen < 3u && !strncasecmp(s, "obj", slen)) {
+ if (*n)
+ goto no_conv;
+ return LIBCHARCONV_INDETERMINATE;
+ } else if (slen >= 3u && !strncasecmp(s, "obj", 3u)) {
+ if (*n)
+ goto no_conv;
+ c = UINT32_C(0xFFFC);
+ *n = 3u;
+ goto conv;
+ } else if (*s == '?') {
+ if (*n)
+ goto no_conv;
+ c = UINT32_C(0xFFFD);
+ *n = 1u;
+ goto conv;
+ } else {
+ *n += 1u;
+ s++;
+ slen--;
+ }
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv:
+ if (*ncp)
+ *cp = c;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+}