From d554a18e5f981ae200f7f8a3e46d5da93190e897 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sat, 24 Jan 2026 14:39:44 +0100 Subject: Add replacement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 6 ++++-- convert-to-replacement.c | 18 ++++++++++++++++++ libcharconv.h | 34 ++++++++++++++++++++++++++++++++++ libcharconv_latin.c | 14 ++++++++++++++ libcharconv_replacement.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 convert-to-replacement.c create mode 100644 libcharconv_replacement.c diff --git a/Makefile b/Makefile index 61f9469..76d3333 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,8 @@ BIN =\ convert-to-bold-fraktur\ convert-to-script\ convert-to-bold-script\ - convert-to-buhid + convert-to-buhid\ + convert-to-replacement LIBOBJ =\ libcharconv_shogi.o\ @@ -69,7 +70,8 @@ LIBOBJ =\ libcharconv_bold_fraktur.o\ libcharconv_script.o\ libcharconv_bold_script.o\ - libcharconv_buhid.o + libcharconv_buhid.o\ + libcharconv_replacement.o HDR =\ libcharconv.h diff --git a/convert-to-replacement.c b/convert-to-replacement.c new file mode 100644 index 0000000..6407b52 --- /dev/null +++ b/convert-to-replacement.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_replacement); +} diff --git a/libcharconv.h b/libcharconv.h index aa86a16..8e3c4bc 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -867,4 +867,38 @@ enum libcharconv_result libcharconv_bold_script(const char *s, size_t slen, size enum libcharconv_result libcharconv_buhid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); +/** + * Convert + * "obj" to OBJECT REPLACEMENT CHARACTER, and + * "?" to REPLACEMENT CHARACTER + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + #endif diff --git a/libcharconv_latin.c b/libcharconv_latin.c index d682b17..be0b53c 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -367,6 +367,16 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz } goto conv2_prechecked; + /* replacement */ + case UINT32_C(0xFFFC): + c1 = 'o'; + c2 = 'b'; + c3 = 'j'; + goto conv3; + case UINT32_C(0xFFFD): + c = (uint_least32_t)'?'; + goto conv; + default: no_match: *n += clen; @@ -398,6 +408,10 @@ conv2_prechecked: *ncp = 2u; return ret; +conv3: + if (*n) + goto no_conv; + *n += clen; conv3_prechecked: if (*ncp >= 1u) cp[0] = (uint_least32_t)c1; diff --git a/libcharconv_replacement.c b/libcharconv_replacement.c new file mode 100644 index 0000000..a9bd3c5 --- /dev/null +++ b/libcharconv_replacement.c @@ -0,0 +1,42 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" +#include + + +enum libcharconv_result +libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + while (slen) { + if (slen < 3u && !strncasecmp(s, "obj", slen)) { + if (*n) + goto no_conv; + return LIBCHARCONV_INDETERMINATE; + } else if (slen >= 3u && !strncasecmp(s, "obj", 3u)) { + if (*n) + goto no_conv; + c = UINT32_C(0xFFFC); + *n = 3u; + goto conv; + } else if (*s == '?') { + if (*n) + goto no_conv; + c = UINT32_C(0xFFFD); + *n = 1u; + goto conv; + } else { + *n += 1u; + s++; + slen--; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*ncp) + *cp = c; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} -- cgit v1.2.3-70-g09d2