diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-25 21:03:48 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-25 21:03:48 +0100 |
| commit | b710c7eb580d8f291bee9bc760d806ec305f26b2 (patch) | |
| tree | 30f50ee2832a80d37a4a9615ccec543bdfd6a79b | |
| parent | latin: do not covert LF and SP to text (diff) | |
| download | charconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.gz charconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.bz2 charconv-b710c7eb580d8f291bee9bc760d806ec305f26b2.tar.xz | |
Add control character representations
Signed-off-by: Mattias Andrée <m@maandree.se>
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-control-character-representations.c | 4 | ||||
| -rw-r--r-- | libcharconv.h | 8 | ||||
| -rw-r--r-- | libcharconv_control_character_representations.c | 65 | ||||
| -rw-r--r-- | libcharconv_control_characters.c | 24 | ||||
| -rw-r--r-- | libcharconv_latin.c | 11 |
6 files changed, 116 insertions, 2 deletions
@@ -73,7 +73,8 @@ BIN =\ convert-to-negative\ convert-to-symbols\ convert-to-control-characters\ - convert-to-xiangqi + convert-to-xiangqi\ + convert-to-control-character-representations LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -136,7 +137,8 @@ LIBOBJ =\ libcharconv_symbols.o\ libcharconv_control_characters.o\ libcharconv_xiangqi_red.o\ - libcharconv_xiangqi_black.o + libcharconv_xiangqi_black.o\ + libcharconv_control_character_representations.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-control-character-representations.c b/convert-to-control-character-representations.c new file mode 100644 index 0000000..1ea01c9 --- /dev/null +++ b/convert-to-control-character-representations.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_control_character_representations) diff --git a/libcharconv.h b/libcharconv.h index 629fd39..14627c6 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -386,6 +386,7 @@ LIBCHARCONV_FUNC_(libcharconv_symbols); /** * Convert characters and character sequences to * control characters, spaces, and SOFT HYPHENs + * (textual representation symbols can be converted) */ LIBCHARCONV_FUNC_(libcharconv_control_characters); @@ -401,6 +402,13 @@ LIBCHARCONV_FUNC_(libcharconv_xiangqi_red); */ LIBCHARCONV_FUNC_(libcharconv_xiangqi_black); +/** + * Convert ASCII control characters, and SPACE, + * and their abbrevations to textual representation + * symbols + */ +LIBCHARCONV_FUNC_(libcharconv_control_character_representations); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_control_character_representations.c b/libcharconv_control_character_representations.c new file mode 100644 index 0000000..57c1c93 --- /dev/null +++ b/libcharconv_control_character_representations.c @@ -0,0 +1,65 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" +#include <string.h> + + +static const char *texts[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", + "BS", "HT", "LF", " VT", "FF", "CR", "SS", "SI", + "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", + "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", + "SP", "DEL" +}; + + +enum libcharconv_result +libcharconv_control_character_representations(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i, len, found, found_len; + int indeterminate; + *n = 0; + for (; slen; s++, slen--, ++*n) { + if (*(const unsigned char *)s <= ' ') { + found = *(const unsigned char *)s; + goto conv_byte; + } else if (*(const unsigned char *)s == 0x7Fu) { + found = 0x21u; + goto conv_byte; + } + indeterminate = 0; + found = SIZE_MAX; + found_len = 0u; + for (i = 0u; i < sizeof(texts) / sizeof(*texts); i++) { + len = strlen(texts[i]); + if (strncmp(s, texts[i], len < slen ? len : slen)) + continue; + if (slen < len) { + indeterminate = 1; + continue; + } + if (len > found_len) { + found = i; + found_len = len; + } + } + if (found_len) + goto conv; + if (*n) + goto no_conv; + if (indeterminate) + return LIBCHARCONV_INDETERMINATE; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv_byte: + found_len = 1u; +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = (uint_least32_t)UINT32_C(0x2400) | (uint_least32_t)found; + *n += found_len; + *ncp = 1u; + return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_control_characters.c b/libcharconv_control_characters.c index 55e4bbf..c45f03e 100644 --- a/libcharconv_control_characters.c +++ b/libcharconv_control_characters.c @@ -132,6 +132,21 @@ libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least int indeterminate; *n = 0; for (; slen; s++, slen--, ++*n) { + if (s[0] == '\xE2') { + if (slen == 1u) + return LIBCHARCONV_INDETERMINATE; + if (s[1] != '\x90') + goto search; + if (slen == 2u) + return LIBCHARCONV_INDETERMINATE; + i = ((const unsigned char *)s)[2]; + if (0x80u > i || i > 0xA1u) + goto search; + i &= 0x3Fu; + i = i == 0x21u ? 0x7Fu : i; + goto conv_repr; + } + search: indeterminate = 0; found = SIZE_MAX; found_len = 0u; @@ -158,6 +173,15 @@ libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least no_conv: return LIBCHARCONV_NO_CONVERT; +conv_repr: + if (*n) + goto no_conv; + if (*ncp) + *cp = (uint_least32_t)i; + *n += 3u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + conv: if (*n) goto no_conv; diff --git a/libcharconv_latin.c b/libcharconv_latin.c index 360b3dd..047dee5 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -449,6 +449,17 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz c = (uint_least32_t)"gaehrcsGAEHRCS"[c - UINT32_C(0x1FA60)]; goto conv; + } else if (UINT32_C(0x2400) <= c && c <= UINT32_C(0x2421)) { + /* control character representations */ + cs = ((const char *[]){ + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", + "BS", "HT", "LF", " VT", "FF", "CR", "SS", "SI", + "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", + "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", + "SP", "DEL" + })[c & 0xFF]; + goto conv_str; + } else { use_switch: switch (c) { |
