diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-25 18:37:59 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-25 18:37:59 +0100 |
| commit | 87f373ca116abbb5a7ba8e6ad7111082e8dfb128 (patch) | |
| tree | 0943f33e352ae98297cfd55cc058acd7b0c9609d | |
| parent | Add symbols (diff) | |
| download | charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.gz charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.bz2 charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.xz | |
Add control characters
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to '')
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | convert-to-control-characters.c | 4 | ||||
| -rw-r--r-- | libcharconv.h | 6 | ||||
| -rw-r--r-- | libcharconv_control_characters.c | 169 | ||||
| -rw-r--r-- | libcharconv_latin.c | 119 |
5 files changed, 302 insertions, 2 deletions
@@ -71,7 +71,8 @@ BIN =\ convert-to-tally-marks\ convert-to-ideographic-tally-marks\ convert-to-negative\ - convert-to-symbols + convert-to-symbols\ + convert-to-control-characters LIBOBJ =\ libcharconv_decode_utf8_.o\ @@ -131,7 +132,8 @@ LIBOBJ =\ libcharconv_tally_marks.o\ libcharconv_ideographic_tally_marks.o\ libcharconv_negative.o\ - libcharconv_symbols.o + libcharconv_symbols.o\ + libcharconv_control_characters.o LOBJ = $(LIBOBJ:.o=.lo) diff --git a/convert-to-control-characters.c b/convert-to-control-characters.c new file mode 100644 index 0000000..b9c704e --- /dev/null +++ b/convert-to-control-characters.c @@ -0,0 +1,4 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +SIMPLE(libcharconv_control_characters) diff --git a/libcharconv.h b/libcharconv.h index e220062..b6050a9 100644 --- a/libcharconv.h +++ b/libcharconv.h @@ -383,6 +383,12 @@ LIBCHARCONV_FUNC_(libcharconv_negative); */ LIBCHARCONV_FUNC_(libcharconv_symbols); +/** + * Convert characters and character sequences to + * control characters, spaces, and SOFT HYPHENs + */ +LIBCHARCONV_FUNC_(libcharconv_control_characters); + #undef LIBCHARCONV_FUNC_ #endif diff --git a/libcharconv_control_characters.c b/libcharconv_control_characters.c new file mode 100644 index 0000000..55e4bbf --- /dev/null +++ b/libcharconv_control_characters.c @@ -0,0 +1,169 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" +#include <string.h> + + +static struct { + uint_least32_t cp; + const char *s; +} symbols[] = { + {UINT32_C(0x00), "NUL"}, /* NULL */ + {UINT32_C(0x01), "SOH"}, /* START OF HEADING */ + {UINT32_C(0x02), "STX"}, /* START OF TEXT */ + {UINT32_C(0x03), "ETX"}, /* END OF TEXT */ + {UINT32_C(0x04), "EOT"}, /* END OF TRANSMISSION */ + {UINT32_C(0x05), "ENQ"}, /* ENQUIRY */ + {UINT32_C(0x06), "ACK"}, /* ACKNOWLEDGE */ + {UINT32_C(0x07), "BEL"}, /* BELL */ + {UINT32_C(0x08), "BS"}, /* BACKSPACE */ + {UINT32_C(0x09), "HT"}, /* CHARACTER TABULATION */ + {UINT32_C(0x0A), "LF"}, /* LINE FEED */ + {UINT32_C(0x0B), "VT"}, /* LINE TABULATION */ + {UINT32_C(0x0C), "FF"}, /* FORM FEED */ + {UINT32_C(0x0D), "CR"}, /* CARRIAGE RETURN */ + {UINT32_C(0x0E), "SS"}, /* SHIFT OUT */ + {UINT32_C(0x0F), "SI"}, /* SHIFT IN */ + {UINT32_C(0x10), "DLE"}, /* DATA LINK ESCAPE */ + {UINT32_C(0x11), "DC1"}, /* DEVICE CONTROL ONE */ + {UINT32_C(0x12), "DC2"}, /* DEVICE CONTROL TWO */ + {UINT32_C(0x13), "DC3"}, /* DEVICE CONTROL THREE */ + {UINT32_C(0x14), "DC4"}, /* DEVICE CONTROL FOUR */ + {UINT32_C(0x15), "NAK"}, /* NEGATIVE ACKNOWLEDGE */ + {UINT32_C(0x16), "SYN"}, /* SYNCHRONOUS IDLE */ + {UINT32_C(0x17), "ETB"}, /* END OF TRANSMISSION BLOCK */ + {UINT32_C(0x18), "CAN"}, /* CANCEL */ + {UINT32_C(0x19), "EM"}, /* END OF MEDIUM */ + {UINT32_C(0x1A), "SUB"}, /* SUBSTITUTE */ + {UINT32_C(0x1B), "ESC"}, /* ESCAPE */ + {UINT32_C(0x1C), "FS"}, /* INFORMATION SEPARATOR FOUR */ + {UINT32_C(0x1D), "GS"}, /* INFORMATION SEPARATOR THREE */ + {UINT32_C(0x1E), "RS"}, /* INFORMATION SEPARATOR TWO */ + {UINT32_C(0x1F), "US"}, /* INFORMATION SEPARATOR ONE */ + {UINT32_C(0x7F), "DEL"}, /* DELETE */ + {UINT32_C(0x0080), "PAD"}, /* Padding Character */ + {UINT32_C(0x0081), "HOP"}, /* High Octet Preset */ + {UINT32_C(0x0082), "BPH"}, /* BREAK PERMITTED HERE */ + {UINT32_C(0x0083), "NBP"}, /* NO BREAK HERE */ + {UINT32_C(0x0084), "IND"}, /* INDEX */ + {UINT32_C(0x0085), "NEL"}, /* NEXT LINE */ + {UINT32_C(0x0086), "SSA"}, /* START OF SELECTED AREA */ + {UINT32_C(0x0087), "ESA"}, /* END OF SELECTED AREA */ + {UINT32_C(0x0088), "HTS"}, /* CHARACTER TABULATION SET */ + {UINT32_C(0x0089), "HTJ"}, /* CHARACTER TABULATION WITH JUSTIFICATION */ + {UINT32_C(0x008A), "LTS"}, /* LINE TABULATION SET */ + {UINT32_C(0x008B), "PLD"}, /* PARTIAL LINE FORWARD */ + {UINT32_C(0x008C), "PLU"}, /* PARTIAL LINE BACKWARD */ + {UINT32_C(0x008D), "RI"}, /* REVERSE LINE FEED */ + {UINT32_C(0x008E), "SS2"}, /* SINGLE SHIFT TWO */ + {UINT32_C(0x008F), "SS3"}, /* SINGLE SHIFT THREE */ + {UINT32_C(0x0090), "DCS"}, /* DEVICE CONTROL STRING */ + {UINT32_C(0x0091), "PU1"}, /* PRIVATE USE ONE */ + {UINT32_C(0x0092), "PU2"}, /* PRIVATE USE TWO */ + {UINT32_C(0x0093), "STS"}, /* SET TRANSMIT STATE */ + {UINT32_C(0x0094), "CCH"}, /* CANCEL CHARACTER */ + {UINT32_C(0x0095), "MW"}, /* MESSAGE WAITING */ + {UINT32_C(0x0096), "SPA"}, /* START OF GUARDED AREA */ + {UINT32_C(0x0097), "EPA"}, /* END OF GUARDED AREA */ + {UINT32_C(0x0098), "SOS"}, /* START OF STRING */ + {UINT32_C(0x0099), "SGCI"}, /* Single Graphic Character Introducer */ + {UINT32_C(0x009A), "SSI"}, /* SINGLE CHARACTER INTRODUCER */ + {UINT32_C(0x009B), "CSI"}, /* CONTROL SEQUENCE INTRODUCER */ + {UINT32_C(0x009C), "ST"}, /* STRING TERMINATOR */ + {UINT32_C(0x009D), "OSC"}, /* OPERATING SYSTEM COMMAND */ + {UINT32_C(0x009E), "PM"}, /* PRIVACY MESSAGE */ + {UINT32_C(0x009F), "APC"}, /* APPLICATION PROGRAM COMMAND */ + {UINT32_C(0x200B), "ZWS"}, /* ZERO WIDTH SPACE */ + {UINT32_C(0x200C), "ZWNJ"}, /* ZERO WIDTH NON-JOINER */ + {UINT32_C(0x200D), "ZWJ"}, /* ZERO WIDTH JOINER */ + {UINT32_C(0x200E), "LTRM"}, /* LEFT-TO-RIGHT MARK */ + {UINT32_C(0x200F), "RTLM"}, /* RIGHT-TO-LEFT MARK */ + {UINT32_C(0x202A), "LTRE"}, /* LEFT-TO-RIGHT EMBEDDING */ + {UINT32_C(0x202B), "RTLE"}, /* RIGHT-TO-LEFT EMBEDDING */ + {UINT32_C(0x202C), "PDF"}, /* POP DIRECTIONAL FORMATTING */ + {UINT32_C(0x202D), "LTRO"}, /* LEFT-TO-RIGHT OVERRIDE */ + {UINT32_C(0x202E), "RTLO"}, /* RIGHT-TO-LEFT OVERRIDE */ + {UINT32_C(0x2060), "WJ"}, /* WORD JOINER */ + {UINT32_C(0x2066), "LTRI"}, /* LEFT-TO-RIGHT ISOLATE */ + {UINT32_C(0x2067), "RTLI"}, /* RIGHT-TO-LEFT ISOLATE */ + {UINT32_C(0x2068), "FSI"}, /* FIRST STRONG ISOLATE */ + {UINT32_C(0x2069), "PDI"}, /* POP DIRECTIONAL ISOLATE */ + {UINT32_C(0x206A), "ISS"}, /* INHIBIT SYMMETRIC SWAPPING */ + {UINT32_C(0x206B), "ASS"}, /* ACTIVATE SYMMETRIC SWAPPING */ + {UINT32_C(0x206C), "IAFS"}, /* INHIBIT ARABIC FORM SHAPING */ + {UINT32_C(0x206D), "AAFS"}, /* ACTIVATE ARABIC FORM SHAPING */ + {UINT32_C(0x206E), "NADS"}, /* NATIONAL DIGIT SHAPES */ + {UINT32_C(0x206F), "NODS"}, /* NOMINAL DIGIT SHAPES */ + {UINT32_C(0xFFF9), "IAA"}, /* INTERLINEAR ANNOTATION ANCHOR */ + {UINT32_C(0xFFFA), "IAS"}, /* INTERLINEAR ANNOTATION SEPARATOR */ + {UINT32_C(0xFFFB), "IAT"}, /* INTERLINEAR ANNOTATION TERMINATOR */ + {UINT32_C(0x1BCA0), "SFLO"}, /* SHORTHAND FORMAT LETTER OVERLAP */ + {UINT32_C(0x1BCA1), "SFCO"}, /* SHORTHAND FORMAT CONTINUING OVERLAP */ + {UINT32_C(0x1BCA2), "SFDS"}, /* SHORTHAND FORMAT DOWN STEP */ + {UINT32_C(0x1BCA3), "SFUS"}, /* SHORTHAND FORMAT UP STEP */ + {UINT32_C(0xE0001), "LTAG"}, /* LANGUAGE TAG */ + {UINT32_C(0xE007F), "CTAG"}, /* CANCEL TAG */ + + {UINT32_C(0x20), "SP"}, /* SPACE */ + {UINT32_C(0x00A0), "NBSP"}, /* NO-BREAK SPACE */ + {UINT32_C(0x2000), "NQ"}, /* EN QUAD */ + {UINT32_C(0x2001), "MQ"}, /* EM QUAD */ + {UINT32_C(0x2002), "NSP"}, /* EN SPACE */ + {UINT32_C(0x2003), "MSP"}, /* EM SPACE */ + {UINT32_C(0x2004), "3MSP"}, /* THREE-PER-EM SPACE */ + {UINT32_C(0x2005), "4MSP"}, /* FOUR-PER-EM SPACE */ + {UINT32_C(0x2006), "6MSP"}, /* SIX-PER-EM SPACE */ + {UINT32_C(0x2007), "FSP"}, /* FIGURE SPACE */ + {UINT32_C(0x2008), "PSP"}, /* PUNCTUATION SPACE */ + {UINT32_C(0x2009), "TSP"}, /* THIN SPACE */ + {UINT32_C(0x200A), "HSP"}, /* HAIR SPACE */ + {UINT32_C(0x2028), "LS"}, /* LINE SEPARATOR */ + {UINT32_C(0x2029), "PS"}, /* PARAGRAPH SEPARATOR */ + {UINT32_C(0x202F), "NNBSP"}, /* NARROW NO-BREAK SPACE */ + {UINT32_C(0x205F), "MMSP"}, /* MEDIUM MATHEMATICAL SPACE */ + + {UINT32_C(0x00AD), "SHY"} /* SOFT HYPHEN */ +}; + + +enum libcharconv_result +libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i, len, found, found_len; + int indeterminate; + *n = 0; + for (; slen; s++, slen--, ++*n) { + indeterminate = 0; + found = SIZE_MAX; + found_len = 0u; + for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) { + len = strlen(symbols[i].s); + if (strncmp(s, symbols[i].s, len < slen ? len : slen)) + continue; + if (slen < len) { + indeterminate = 1; + continue; + } + if (len > found_len) { + found = i; + found_len = len; + } + } + if (found_len) + goto conv; + if (*n) + goto no_conv; + if (indeterminate) + return LIBCHARCONV_INDETERMINATE; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = symbols[found].cp; + *n += found_len; + *ncp = 1u; + return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_latin.c b/libcharconv_latin.c index d8c27d0..db60835 100644 --- a/libcharconv_latin.c +++ b/libcharconv_latin.c @@ -728,6 +728,125 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz case UINT32_C(0x2142): c1 = 'L'; goto conv1; case UINT32_C(0x2144): c1 = 'Y'; goto conv1; + /* control characters (control characters) */ + case UINT32_C(0x00): c1 = 'N'; c2 = 'U'; c3 = 'L'; goto conv3; + case UINT32_C(0x01): c1 = 'S'; c2 = 'O'; c3 = 'H'; goto conv3; + case UINT32_C(0x02): c1 = 'S'; c2 = 'T'; c3 = 'X'; goto conv3; + case UINT32_C(0x03): c1 = 'E'; c2 = 'T'; c3 = 'X'; goto conv3; + case UINT32_C(0x04): c1 = 'E'; c2 = 'O'; c3 = 'T'; goto conv3; + case UINT32_C(0x05): c1 = 'E'; c2 = 'N'; c3 = 'Q'; goto conv3; + case UINT32_C(0x06): c1 = 'A'; c2 = 'C'; c3 = 'K'; goto conv3; + case UINT32_C(0x07): c1 = 'B'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x08): c1 = 'B'; c2 = 'S'; goto conv2; + case UINT32_C(0x09): c1 = 'H'; c2 = 'T'; goto conv2; + case UINT32_C(0x0A): c1 = 'L'; c2 = 'F'; goto conv2; + case UINT32_C(0x0B): c1 = 'V'; c2 = 'T'; goto conv2; + case UINT32_C(0x0C): c1 = 'F'; c2 = 'F'; goto conv2; + case UINT32_C(0x0D): c1 = 'C'; c2 = 'R'; goto conv2; + case UINT32_C(0x0E): c1 = 'S'; c2 = 'S'; goto conv2; + case UINT32_C(0x0F): c1 = 'S'; c2 = 'I'; goto conv2; + case UINT32_C(0x10): c1 = 'D'; c2 = 'L'; c3 = 'E'; goto conv3; + case UINT32_C(0x11): c1 = 'D'; c2 = 'C'; c3 = '1'; goto conv3; + case UINT32_C(0x12): c1 = 'D'; c2 = 'C'; c3 = '2'; goto conv3; + case UINT32_C(0x13): c1 = 'D'; c2 = 'C'; c3 = '3'; goto conv3; + case UINT32_C(0x14): c1 = 'D'; c2 = 'C'; c3 = '4'; goto conv3; + case UINT32_C(0x15): c1 = 'N'; c2 = 'A'; c3 = 'K'; goto conv3; + case UINT32_C(0x16): c1 = 'S'; c2 = 'Y'; c3 = 'N'; goto conv3; + case UINT32_C(0x17): c1 = 'E'; c2 = 'T'; c3 = 'B'; goto conv3; + case UINT32_C(0x18): c1 = 'C'; c2 = 'A'; c3 = 'N'; goto conv3; + case UINT32_C(0x19): c1 = 'E'; c2 = 'M'; goto conv2; + case UINT32_C(0x1A): c1 = 'S'; c2 = 'U'; c3 = 'B'; goto conv3; + case UINT32_C(0x1B): c1 = 'E'; c2 = 'S'; c3 = 'C'; goto conv3; + case UINT32_C(0x1C): c1 = 'F'; c2 = 'S'; goto conv2; + case UINT32_C(0x1D): c1 = 'G'; c2 = 'S'; goto conv2; + case UINT32_C(0x1E): c1 = 'R'; c2 = 'S'; goto conv2; + case UINT32_C(0x1F): c1 = 'U'; c2 = 'S'; goto conv2; + case UINT32_C(0x7F): c1 = 'D'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x0080): c1 = 'P'; c2 = 'A'; c3 = 'D'; goto conv3; + case UINT32_C(0x0081): c1 = 'H'; c2 = 'O'; c3 = 'P'; goto conv3; + case UINT32_C(0x0082): c1 = 'B'; c2 = 'P'; c3 = 'H'; goto conv3; + case UINT32_C(0x0083): c1 = 'N'; c2 = 'B'; c3 = 'P'; goto conv3; + case UINT32_C(0x0084): c1 = 'I'; c2 = 'N'; c3 = 'D'; goto conv3; + case UINT32_C(0x0085): c1 = 'N'; c2 = 'E'; c3 = 'L'; goto conv3; + case UINT32_C(0x0086): c1 = 'S'; c2 = 'S'; c3 = 'A'; goto conv3; + case UINT32_C(0x0087): c1 = 'E'; c2 = 'S'; c3 = 'A'; goto conv3; + case UINT32_C(0x0088): c1 = 'H'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x0089): c1 = 'H'; c2 = 'T'; c3 = 'J'; goto conv3; + case UINT32_C(0x008A): c1 = 'L'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x008B): c1 = 'P'; c2 = 'L'; c3 = 'D'; goto conv3; + case UINT32_C(0x008C): c1 = 'P'; c2 = 'L'; c3 = 'U'; goto conv3; + case UINT32_C(0x008D): c1 = 'R'; c2 = 'I'; goto conv2; + case UINT32_C(0x008E): c1 = 'S'; c2 = 'S'; c3 = '2'; goto conv3; + case UINT32_C(0x008F): c1 = 'S'; c2 = 'S'; c3 = '3'; goto conv3; + case UINT32_C(0x0090): c1 = 'D'; c2 = 'C'; c3 = 'S'; goto conv3; + case UINT32_C(0x0091): c1 = 'P'; c2 = 'U'; c3 = '1'; goto conv3; + case UINT32_C(0x0092): c1 = 'P'; c2 = 'U'; c3 = '2'; goto conv3; + case UINT32_C(0x0093): c1 = 'S'; c2 = 'T'; c3 = 'S'; goto conv3; + case UINT32_C(0x0094): c1 = 'C'; c2 = 'C'; c3 = 'H'; goto conv3; + case UINT32_C(0x0095): c1 = 'M'; c2 = 'W'; goto conv2; + case UINT32_C(0x0096): c1 = 'S'; c2 = 'P'; c3 = 'A'; goto conv3; + case UINT32_C(0x0097): c1 = 'E'; c2 = 'P'; c3 = 'A'; goto conv3; + case UINT32_C(0x0098): c1 = 'S'; c2 = 'O'; c3 = 'S'; goto conv3; + case UINT32_C(0x0099): c1 = 'S'; c2 = 'G'; c3 = 'C'; c4 = 'I'; goto conv4; + case UINT32_C(0x009A): c1 = 'S'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x009B): c1 = 'C'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x009C): c1 = 'S'; c2 = 'T'; goto conv2; + case UINT32_C(0x009D): c1 = 'O'; c2 = 'S'; c3 = 'C'; goto conv3; + case UINT32_C(0x009E): c1 = 'P'; c2 = 'M'; goto conv2; + case UINT32_C(0x009F): c1 = 'A'; c2 = 'P'; c3 = 'C'; goto conv3; + case UINT32_C(0x200B): c1 = 'Z'; c2 = 'W'; c3 = 'S'; goto conv3; + case UINT32_C(0x200C): c1 = 'Z'; c2 = 'W'; c3 = 'N'; c4 = 'J'; goto conv4; + case UINT32_C(0x200D): c1 = 'Z'; c2 = 'W'; c3 = 'J'; goto conv3; + case UINT32_C(0x200E): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'M'; goto conv4; + case UINT32_C(0x200F): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'M'; goto conv4; + case UINT32_C(0x202A): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'E'; goto conv4; + case UINT32_C(0x202B): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'E'; goto conv4; + case UINT32_C(0x202C): c1 = 'P'; c2 = 'D'; c3 = 'F'; goto conv3; + case UINT32_C(0x202D): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'O'; goto conv4; + case UINT32_C(0x202E): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'O'; goto conv4; + case UINT32_C(0x2060): c1 = 'W'; c2 = 'J'; goto conv2; + case UINT32_C(0x2066): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'I'; goto conv4; + case UINT32_C(0x2067): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'I'; goto conv4; + case UINT32_C(0x2068): c1 = 'F'; c2 = 'S'; c3 = 'I'; goto conv3; + case UINT32_C(0x2069): c1 = 'P'; c2 = 'D'; c3 = 'I'; goto conv3; + case UINT32_C(0x206A): c1 = 'I'; c2 = 'S'; c3 = 'S'; goto conv3; + case UINT32_C(0x206B): c1 = 'A'; c2 = 'S'; c3 = 'S'; goto conv3; + case UINT32_C(0x206C): c1 = 'I'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4; + case UINT32_C(0x206D): c1 = 'A'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4; + case UINT32_C(0x206E): c1 = 'N'; c2 = 'A'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0x206F): c1 = 'N'; c2 = 'O'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0xFFF9): c1 = 'I'; c2 = 'A'; c3 = 'A'; goto conv3; + case UINT32_C(0xFFFA): c1 = 'I'; c2 = 'A'; c3 = 'S'; goto conv3; + case UINT32_C(0xFFFB): c1 = 'I'; c2 = 'A'; c3 = 'T'; goto conv3; + case UINT32_C(0x1BCA0): c1 = 'S'; c2 = 'F'; c3 = 'L'; c4 = 'O'; goto conv4; + case UINT32_C(0x1BCA1): c1 = 'S'; c2 = 'F'; c3 = 'C'; c4 = 'O'; goto conv4; + case UINT32_C(0x1BCA2): c1 = 'S'; c2 = 'F'; c3 = 'D'; c4 = 'S'; goto conv4; + case UINT32_C(0x1BCA3): c1 = 'S'; c2 = 'F'; c3 = 'U'; c4 = 'S'; goto conv4; + case UINT32_C(0xE0001): c1 = 'L'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4; + case UINT32_C(0xE007F): c1 = 'C'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4; + + /* control characters (whitespace) */ + case UINT32_C(0x20): c1 = 'S'; c2 = 'P'; goto conv2; + case UINT32_C(0x00A0): c1 = 'N'; c2 = 'B'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2000): c1 = 'N'; c2 = 'Q'; goto conv2; + case UINT32_C(0x2001): c1 = 'M'; c2 = 'Q'; goto conv2; + case UINT32_C(0x2002): c1 = 'N'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2003): c1 = 'M'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2004): c1 = '3'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2005): c1 = '4'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2006): c1 = '6'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + case UINT32_C(0x2007): c1 = 'F'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2008): c1 = 'P'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2009): c1 = 'T'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x200A): c1 = 'H'; c2 = 'S'; c3 = 'P'; goto conv3; + case UINT32_C(0x2028): c1 = 'L'; c2 = 'S'; goto conv2; + case UINT32_C(0x2029): c1 = 'P'; c2 = 'S'; goto conv2; + case UINT32_C(0x202F): c1 = 'N'; c2 = 'N'; c3 = 'B'; c4 = 'S'; c5 = 'P'; goto conv5; + case UINT32_C(0x205F): c1 = 'M'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4; + + /* control characters (conditional characters) */ + case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3; + default: no_match: *n += clen; |
