From 87f373ca116abbb5a7ba8e6ad7111082e8dfb128 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 25 Jan 2026 18:37:59 +0100 Subject: Add control characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- libcharconv_latin.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'libcharconv_latin.c') diff --git a/libcharconv_latin.c b/libcharconv_latin.c index d8c27d0..db60835 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -728,6 +728,125 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x2142): c1 = 'L'; goto conv1; case UINT32_C(0x2144): c1 = 'Y'; goto conv1; + /* control characters (control characters) */ + case UINT32_C(0x00): c1 = 'N'; c2 = 'U'; c3 = 'L'; goto conv3; + case UINT32_C(0x01): c1 = 'S'; c2 = 'O'; c3 = 'H'; goto conv3; + case UINT32_C(0x02): c1 = 'S'; c2 = 'T'; c3 = 'X'; goto conv3; + case UINT32_C(0x03): c1 = 'E'; c2 = 'T'; c3 = 'X'; goto conv3; + case UINT32_C(0x04): c1 = 'E'; c2 = 'O'; c3 = 'T'; goto conv3; + case UINT32_C(0x05): c1 = 'E'; c2 = 'N'; c3 = 'Q'; goto conv3; + case UINT32_C(0x06): c1 = 'A'; c2 = 'C'; c3 = 'K'; goto conv3; + case UINT32_C(0x07): c1 = 'B'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x08): c1 = 'B'; c2 = 'S'; goto conv2; + case UINT32_C(0x09): c1 = 'H'; c2 = 'T'; goto conv2; + case UINT32_C(0x0A): c1 = 'L'; c2 = 'F'; goto conv2; + case UINT32_C(0x0B): c1 = 'V'; c2 = 'T'; goto conv2; + case UINT32_C(0x0C): c1 = 'F'; c2 = 'F'; goto conv2; + case UINT32_C(0x0D): c1 = 'C'; c2 = 'R'; goto conv2; + case UINT32_C(0x0E): c1 = 'S'; c2 = 'S'; goto conv2; + case UINT32_C(0x0F): c1 = 'S'; c2 = 'I'; goto conv2; + case UINT32_C(0x10): c1 = 'D'; c2 = 'L'; c3 = 'E'; goto conv3; + case UINT32_C(0x11): c1 = 'D'; c2 = 'C'; c3 = '1'; goto conv3; + case UINT32_C(0x12): c1 = 'D'; c2 = 'C'; c3 = '2'; goto conv3; + case UINT32_C(0x13): c1 = 'D'; c2 = 'C'; c3 = '3'; goto conv3; + case UINT32_C(0x14): c1 = 'D'; c2 = 'C'; c3 = '4'; goto conv3; + case UINT32_C(0x15): c1 = 'N'; c2 = 'A'; c3 = 'K'; goto conv3; + case UINT32_C(0x16): c1 = 'S'; c2 = 'Y'; c3 = 'N'; goto conv3; + case UINT32_C(0x17): c1 = 'E'; c2 = 'T'; c3 = 'B'; goto conv3; + case UINT32_C(0x18): c1 = 'C'; c2 = 'A'; c3 = 'N'; goto conv3; + case UINT32_C(0x19): c1 = 'E'; c2 = 'M'; goto conv2; + case UINT32_C(0x1A): c1 = 'S'; c2 = 'U'; c3 = 'B'; goto conv3; + case UINT32_C(0x1B): c1 = 'E'; c2 = 'S'; c3 = 'C'; goto conv3; + case UINT32_C(0x1C): c1 = 'F'; c2 = 'S'; goto conv2; + case UINT32_C(0x1D): c1 = 'G'; c2 = 'S'; goto conv2; + case UINT32_C(0x1E): c1 = 'R'; c2 = 'S'; goto conv2; + case UINT32_C(0x1F): c1 = 'U'; c2 = 'S'; goto conv2; + case UINT32_C(0x7F): c1 = 'D'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x0080): c1 = 'P'; c2 = 'A'; c3 = 'D'; goto conv3; + case UINT32_C(0x0081): c1 = 'H'; c2 = 'O'; c3 = 'P'; goto conv3; + case UINT32_C(0x0082): c1 = 'B'; c2 = 'P'; c3 = 'H'; goto conv3; + case UINT32_C(0x0083): c1 = 'N'; c2 = 'B'; c3 = 'P'; goto conv3; + case UINT32_C(0x0084): c1 = 'I'; c2 = 'N'; c3 = 'D'; goto conv3; + case UINT32_C(0x0085): c1 = 'N'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x0086): c1 = 'S'; c2 = 'S'; c3 = 'A'; goto conv3; + case UINT32_C(0x0087): c1 = 'E'; c2 = 'S'; c3 = 'A'; goto conv3; + case UINT32_C(0x0088): c1 = 'H'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x0089): c1 = 'H'; c2 = 'T'; c3 = 'J'; goto conv3; + case UINT32_C(0x008A): c1 = 'L'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x008B): c1 = 'P'; c2 = 'L'; c3 = 'D'; goto conv3; + case UINT32_C(0x008C): c1 = 'P'; c2 = 'L'; c3 = 'U'; goto conv3; + case UINT32_C(0x008D): c1 = 'R'; c2 = 'I'; goto conv2; + case UINT32_C(0x008E): c1 = 'S'; c2 = 'S'; c3 = '2'; goto conv3; + case UINT32_C(0x008F): c1 = 'S'; c2 = 'S'; c3 = '3'; goto conv3; + case UINT32_C(0x0090): c1 = 'D'; c2 = 'C'; c3 = 'S'; goto conv3; + case UINT32_C(0x0091): c1 = 'P'; c2 = 'U'; c3 = '1'; goto conv3; + case UINT32_C(0x0092): c1 = 'P'; c2 = 'U'; c3 = '2'; goto conv3; + case UINT32_C(0x0093): c1 = 'S'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x0094): c1 = 'C'; c2 = 'C'; c3 = 'H'; goto conv3; + case UINT32_C(0x0095): c1 = 'M'; c2 = 'W'; goto conv2; + case UINT32_C(0x0096): c1 = 'S'; c2 = 'P'; c3 = 'A'; goto conv3; + case UINT32_C(0x0097): c1 = 'E'; c2 = 'P'; c3 = 'A'; goto conv3; + case UINT32_C(0x0098): c1 = 'S'; c2 = 'O'; c3 = 'S'; goto conv3; + case UINT32_C(0x0099): c1 = 'S'; c2 = 'G'; c3 = 'C'; c4 = 'I'; goto conv4; + case UINT32_C(0x009A): c1 = 'S'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x009B): c1 = 'C'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x009C): c1 = 'S'; c2 = 'T'; goto conv2; + case UINT32_C(0x009D): c1 = 'O'; c2 = 'S'; c3 = 'C'; goto conv3; + case UINT32_C(0x009E): c1 = 'P'; c2 = 'M'; goto conv2; + case UINT32_C(0x009F): c1 = 'A'; c2 = 'P'; c3 = 'C'; goto conv3; + case UINT32_C(0x200B): c1 = 'Z'; c2 = 'W'; c3 = 'S'; goto conv3; + case UINT32_C(0x200C): c1 = 'Z'; c2 = 'W'; c3 = 'N'; c4 = 'J'; goto conv4; + case UINT32_C(0x200D): c1 = 'Z'; c2 = 'W'; c3 = 'J'; goto conv3; + case UINT32_C(0x200E): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'M'; goto conv4; + case UINT32_C(0x200F): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'M'; goto conv4; + case UINT32_C(0x202A): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'E'; goto conv4; + case UINT32_C(0x202B): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'E'; goto conv4; + case UINT32_C(0x202C): c1 = 'P'; c2 = 'D'; c3 = 'F'; goto conv3; + case UINT32_C(0x202D): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'O'; goto conv4; + case UINT32_C(0x202E): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'O'; goto conv4; + case UINT32_C(0x2060): c1 = 'W'; c2 = 'J'; goto conv2; + case UINT32_C(0x2066): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'I'; goto conv4; + case UINT32_C(0x2067): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'I'; goto conv4; + case UINT32_C(0x2068): c1 = 'F'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x2069): c1 = 'P'; c2 = 'D'; c3 = 'I'; goto conv3; + case UINT32_C(0x206A): c1 = 'I'; c2 = 'S'; c3 = 'S'; goto conv3; + case UINT32_C(0x206B): c1 = 'A'; c2 = 'S'; c3 = 'S'; goto conv3; + case UINT32_C(0x206C): c1 = 'I'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4; + case UINT32_C(0x206D): c1 = 'A'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4; + case UINT32_C(0x206E): c1 = 'N'; c2 = 'A'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0x206F): c1 = 'N'; c2 = 'O'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0xFFF9): c1 = 'I'; c2 = 'A'; c3 = 'A'; goto conv3; + case UINT32_C(0xFFFA): c1 = 'I'; c2 = 'A'; c3 = 'S'; goto conv3; + case UINT32_C(0xFFFB): c1 = 'I'; c2 = 'A'; c3 = 'T'; goto conv3; + case UINT32_C(0x1BCA0): c1 = 'S'; c2 = 'F'; c3 = 'L'; c4 = 'O'; goto conv4; + case UINT32_C(0x1BCA1): c1 = 'S'; c2 = 'F'; c3 = 'C'; c4 = 'O'; goto conv4; + case UINT32_C(0x1BCA2): c1 = 'S'; c2 = 'F'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0x1BCA3): c1 = 'S'; c2 = 'F'; c3 = 'U'; c4 = 'S'; goto conv4; + case UINT32_C(0xE0001): c1 = 'L'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4; + case UINT32_C(0xE007F): c1 = 'C'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4; + + /* control characters (whitespace) */ + case UINT32_C(0x20): c1 = 'S'; c2 = 'P'; goto conv2; + case UINT32_C(0x00A0): c1 = 'N'; c2 = 'B'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2000): c1 = 'N'; c2 = 'Q'; goto conv2; + case UINT32_C(0x2001): c1 = 'M'; c2 = 'Q'; goto conv2; + case UINT32_C(0x2002): c1 = 'N'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2003): c1 = 'M'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2004): c1 = '3'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2005): c1 = '4'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2006): c1 = '6'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2007): c1 = 'F'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2008): c1 = 'P'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2009): c1 = 'T'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x200A): c1 = 'H'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2028): c1 = 'L'; c2 = 'S'; goto conv2; + case UINT32_C(0x2029): c1 = 'P'; c2 = 'S'; goto conv2; + case UINT32_C(0x202F): c1 = 'N'; c2 = 'N'; c3 = 'B'; c4 = 'S'; c5 = 'P'; goto conv5; + case UINT32_C(0x205F): c1 = 'M'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + + /* control characters (conditional characters) */ + case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3; + default: no_match: *n += clen; -- cgit v1.2.3-70-g09d2