aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-25 21:03:48 +0100
committerMattias Andrée <m@maandree.se>2026-01-25 21:03:48 +0100
commitb710c7eb580d8f291bee9bc760d806ec305f26b2 (patch)
tree30f50ee2832a80d37a4a9615ccec543bdfd6a79b
parentlatin: do not covert LF and SP to text (diff)
downloadcharconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.gz
charconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.bz2
charconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.xz
Add control character representations
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile6
-rw-r--r--convert-to-control-character-representations.c4
-rw-r--r--libcharconv.h8
-rw-r--r--libcharconv_control_character_representations.c65
-rw-r--r--libcharconv_control_characters.c24
-rw-r--r--libcharconv_latin.c11
6 files changed, 116 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index c11b562..2777447 100644
--- a/Makefile
+++ b/Makefile
@@ -73,7 +73,8 @@ BIN =\
convert-to-negative\
convert-to-symbols\
convert-to-control-characters\
- convert-to-xiangqi
+ convert-to-xiangqi\
+ convert-to-control-character-representations
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -136,7 +137,8 @@ LIBOBJ =\
libcharconv_symbols.o\
libcharconv_control_characters.o\
libcharconv_xiangqi_red.o\
- libcharconv_xiangqi_black.o
+ libcharconv_xiangqi_black.o\
+ libcharconv_control_character_representations.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-control-character-representations.c b/convert-to-control-character-representations.c
new file mode 100644
index 0000000..1ea01c9
--- /dev/null
+++ b/convert-to-control-character-representations.c
@@ -0,0 +1,4 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+SIMPLE(libcharconv_control_character_representations)
diff --git a/libcharconv.h b/libcharconv.h
index 629fd39..14627c6 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -386,6 +386,7 @@ LIBCHARCONV_FUNC_(libcharconv_symbols);
/**
* Convert characters and character sequences to
* control characters, spaces, and SOFT HYPHENs
+ * (textual representation symbols can be converted)
*/
LIBCHARCONV_FUNC_(libcharconv_control_characters);
@@ -401,6 +402,13 @@ LIBCHARCONV_FUNC_(libcharconv_xiangqi_red);
*/
LIBCHARCONV_FUNC_(libcharconv_xiangqi_black);
+/**
+ * Convert ASCII control characters, and SPACE,
+ * and their abbrevations to textual representation
+ * symbols
+ */
+LIBCHARCONV_FUNC_(libcharconv_control_character_representations);
+
#undef LIBCHARCONV_FUNC_
#endif
diff --git a/libcharconv_control_character_representations.c b/libcharconv_control_character_representations.c
new file mode 100644
index 0000000..57c1c93
--- /dev/null
+++ b/libcharconv_control_character_representations.c
@@ -0,0 +1,65 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+#include <string.h>
+
+
+static const char *texts[] = {
+ "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+ "BS", "HT", "LF", " VT", "FF", "CR", "SS", "SI",
+ "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+ "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
+ "SP", "DEL"
+};
+
+
+enum libcharconv_result
+libcharconv_control_character_representations(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ size_t i, len, found, found_len;
+ int indeterminate;
+ *n = 0;
+ for (; slen; s++, slen--, ++*n) {
+ if (*(const unsigned char *)s <= ' ') {
+ found = *(const unsigned char *)s;
+ goto conv_byte;
+ } else if (*(const unsigned char *)s == 0x7Fu) {
+ found = 0x21u;
+ goto conv_byte;
+ }
+ indeterminate = 0;
+ found = SIZE_MAX;
+ found_len = 0u;
+ for (i = 0u; i < sizeof(texts) / sizeof(*texts); i++) {
+ len = strlen(texts[i]);
+ if (strncmp(s, texts[i], len < slen ? len : slen))
+ continue;
+ if (slen < len) {
+ indeterminate = 1;
+ continue;
+ }
+ if (len > found_len) {
+ found = i;
+ found_len = len;
+ }
+ }
+ if (found_len)
+ goto conv;
+ if (*n)
+ goto no_conv;
+ if (indeterminate)
+ return LIBCHARCONV_INDETERMINATE;
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv_byte:
+ found_len = 1u;
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = (uint_least32_t)UINT32_C(0x2400) | (uint_least32_t)found;
+ *n += found_len;
+ *ncp = 1u;
+ return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED;
+}
diff --git a/libcharconv_control_characters.c b/libcharconv_control_characters.c
index 55e4bbf..c45f03e 100644
--- a/libcharconv_control_characters.c
+++ b/libcharconv_control_characters.c
@@ -132,6 +132,21 @@ libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least
int indeterminate;
*n = 0;
for (; slen; s++, slen--, ++*n) {
+ if (s[0] == '\xE2') {
+ if (slen == 1u)
+ return LIBCHARCONV_INDETERMINATE;
+ if (s[1] != '\x90')
+ goto search;
+ if (slen == 2u)
+ return LIBCHARCONV_INDETERMINATE;
+ i = ((const unsigned char *)s)[2];
+ if (0x80u > i || i > 0xA1u)
+ goto search;
+ i &= 0x3Fu;
+ i = i == 0x21u ? 0x7Fu : i;
+ goto conv_repr;
+ }
+ search:
indeterminate = 0;
found = SIZE_MAX;
found_len = 0u;
@@ -158,6 +173,15 @@ libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least
no_conv:
return LIBCHARCONV_NO_CONVERT;
+conv_repr:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = (uint_least32_t)i;
+ *n += 3u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+
conv:
if (*n)
goto no_conv;
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 360b3dd..047dee5 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -449,6 +449,17 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c = (uint_least32_t)"gaehrcsGAEHRCS"[c - UINT32_C(0x1FA60)];
goto conv;
+ } else if (UINT32_C(0x2400) <= c && c <= UINT32_C(0x2421)) {
+ /* control character representations */
+ cs = ((const char *[]){
+ "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+ "BS", "HT", "LF", " VT", "FF", "CR", "SS", "SI",
+ "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+ "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
+ "SP", "DEL"
+ })[c & 0xFF];
+ goto conv_str;
+
} else {
use_switch:
switch (c) {