aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Makefile6
-rw-r--r--convert-to-control-characters.c4
-rw-r--r--libcharconv.h6
-rw-r--r--libcharconv_control_characters.c169
-rw-r--r--libcharconv_latin.c119
5 files changed, 302 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index b917d65..41eea28 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,8 @@ BIN =\
convert-to-tally-marks\
convert-to-ideographic-tally-marks\
convert-to-negative\
- convert-to-symbols
+ convert-to-symbols\
+ convert-to-control-characters
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -131,7 +132,8 @@ LIBOBJ =\
libcharconv_tally_marks.o\
libcharconv_ideographic_tally_marks.o\
libcharconv_negative.o\
- libcharconv_symbols.o
+ libcharconv_symbols.o\
+ libcharconv_control_characters.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-control-characters.c b/convert-to-control-characters.c
new file mode 100644
index 0000000..b9c704e
--- /dev/null
+++ b/convert-to-control-characters.c
@@ -0,0 +1,4 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+SIMPLE(libcharconv_control_characters)
diff --git a/libcharconv.h b/libcharconv.h
index e220062..b6050a9 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -383,6 +383,12 @@ LIBCHARCONV_FUNC_(libcharconv_negative);
*/
LIBCHARCONV_FUNC_(libcharconv_symbols);
+/**
+ * Convert characters and character sequences to
+ * control characters, spaces, and SOFT HYPHENs
+ */
+LIBCHARCONV_FUNC_(libcharconv_control_characters);
+
#undef LIBCHARCONV_FUNC_
#endif
diff --git a/libcharconv_control_characters.c b/libcharconv_control_characters.c
new file mode 100644
index 0000000..55e4bbf
--- /dev/null
+++ b/libcharconv_control_characters.c
@@ -0,0 +1,169 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+#include <string.h>
+
+
+static struct {
+ uint_least32_t cp;
+ const char *s;
+} symbols[] = {
+ {UINT32_C(0x00), "NUL"}, /* NULL */
+ {UINT32_C(0x01), "SOH"}, /* START OF HEADING */
+ {UINT32_C(0x02), "STX"}, /* START OF TEXT */
+ {UINT32_C(0x03), "ETX"}, /* END OF TEXT */
+ {UINT32_C(0x04), "EOT"}, /* END OF TRANSMISSION */
+ {UINT32_C(0x05), "ENQ"}, /* ENQUIRY */
+ {UINT32_C(0x06), "ACK"}, /* ACKNOWLEDGE */
+ {UINT32_C(0x07), "BEL"}, /* BELL */
+ {UINT32_C(0x08), "BS"}, /* BACKSPACE */
+ {UINT32_C(0x09), "HT"}, /* CHARACTER TABULATION */
+ {UINT32_C(0x0A), "LF"}, /* LINE FEED */
+ {UINT32_C(0x0B), "VT"}, /* LINE TABULATION */
+ {UINT32_C(0x0C), "FF"}, /* FORM FEED */
+ {UINT32_C(0x0D), "CR"}, /* CARRIAGE RETURN */
+ {UINT32_C(0x0E), "SS"}, /* SHIFT OUT */
+ {UINT32_C(0x0F), "SI"}, /* SHIFT IN */
+ {UINT32_C(0x10), "DLE"}, /* DATA LINK ESCAPE */
+ {UINT32_C(0x11), "DC1"}, /* DEVICE CONTROL ONE */
+ {UINT32_C(0x12), "DC2"}, /* DEVICE CONTROL TWO */
+ {UINT32_C(0x13), "DC3"}, /* DEVICE CONTROL THREE */
+ {UINT32_C(0x14), "DC4"}, /* DEVICE CONTROL FOUR */
+ {UINT32_C(0x15), "NAK"}, /* NEGATIVE ACKNOWLEDGE */
+ {UINT32_C(0x16), "SYN"}, /* SYNCHRONOUS IDLE */
+ {UINT32_C(0x17), "ETB"}, /* END OF TRANSMISSION BLOCK */
+ {UINT32_C(0x18), "CAN"}, /* CANCEL */
+ {UINT32_C(0x19), "EM"}, /* END OF MEDIUM */
+ {UINT32_C(0x1A), "SUB"}, /* SUBSTITUTE */
+ {UINT32_C(0x1B), "ESC"}, /* ESCAPE */
+ {UINT32_C(0x1C), "FS"}, /* INFORMATION SEPARATOR FOUR */
+ {UINT32_C(0x1D), "GS"}, /* INFORMATION SEPARATOR THREE */
+ {UINT32_C(0x1E), "RS"}, /* INFORMATION SEPARATOR TWO */
+ {UINT32_C(0x1F), "US"}, /* INFORMATION SEPARATOR ONE */
+ {UINT32_C(0x7F), "DEL"}, /* DELETE */
+ {UINT32_C(0x0080), "PAD"}, /* Padding Character */
+ {UINT32_C(0x0081), "HOP"}, /* High Octet Preset */
+ {UINT32_C(0x0082), "BPH"}, /* BREAK PERMITTED HERE */
+ {UINT32_C(0x0083), "NBP"}, /* NO BREAK HERE */
+ {UINT32_C(0x0084), "IND"}, /* INDEX */
+ {UINT32_C(0x0085), "NEL"}, /* NEXT LINE */
+ {UINT32_C(0x0086), "SSA"}, /* START OF SELECTED AREA */
+ {UINT32_C(0x0087), "ESA"}, /* END OF SELECTED AREA */
+ {UINT32_C(0x0088), "HTS"}, /* CHARACTER TABULATION SET */
+ {UINT32_C(0x0089), "HTJ"}, /* CHARACTER TABULATION WITH JUSTIFICATION */
+ {UINT32_C(0x008A), "LTS"}, /* LINE TABULATION SET */
+ {UINT32_C(0x008B), "PLD"}, /* PARTIAL LINE FORWARD */
+ {UINT32_C(0x008C), "PLU"}, /* PARTIAL LINE BACKWARD */
+ {UINT32_C(0x008D), "RI"}, /* REVERSE LINE FEED */
+ {UINT32_C(0x008E), "SS2"}, /* SINGLE SHIFT TWO */
+ {UINT32_C(0x008F), "SS3"}, /* SINGLE SHIFT THREE */
+ {UINT32_C(0x0090), "DCS"}, /* DEVICE CONTROL STRING */
+ {UINT32_C(0x0091), "PU1"}, /* PRIVATE USE ONE */
+ {UINT32_C(0x0092), "PU2"}, /* PRIVATE USE TWO */
+ {UINT32_C(0x0093), "STS"}, /* SET TRANSMIT STATE */
+ {UINT32_C(0x0094), "CCH"}, /* CANCEL CHARACTER */
+ {UINT32_C(0x0095), "MW"}, /* MESSAGE WAITING */
+ {UINT32_C(0x0096), "SPA"}, /* START OF GUARDED AREA */
+ {UINT32_C(0x0097), "EPA"}, /* END OF GUARDED AREA */
+ {UINT32_C(0x0098), "SOS"}, /* START OF STRING */
+ {UINT32_C(0x0099), "SGCI"}, /* Single Graphic Character Introducer */
+ {UINT32_C(0x009A), "SSI"}, /* SINGLE CHARACTER INTRODUCER */
+ {UINT32_C(0x009B), "CSI"}, /* CONTROL SEQUENCE INTRODUCER */
+ {UINT32_C(0x009C), "ST"}, /* STRING TERMINATOR */
+ {UINT32_C(0x009D), "OSC"}, /* OPERATING SYSTEM COMMAND */
+ {UINT32_C(0x009E), "PM"}, /* PRIVACY MESSAGE */
+ {UINT32_C(0x009F), "APC"}, /* APPLICATION PROGRAM COMMAND */
+ {UINT32_C(0x200B), "ZWS"}, /* ZERO WIDTH SPACE */
+ {UINT32_C(0x200C), "ZWNJ"}, /* ZERO WIDTH NON-JOINER */
+ {UINT32_C(0x200D), "ZWJ"}, /* ZERO WIDTH JOINER */
+ {UINT32_C(0x200E), "LTRM"}, /* LEFT-TO-RIGHT MARK */
+ {UINT32_C(0x200F), "RTLM"}, /* RIGHT-TO-LEFT MARK */
+ {UINT32_C(0x202A), "LTRE"}, /* LEFT-TO-RIGHT EMBEDDING */
+ {UINT32_C(0x202B), "RTLE"}, /* RIGHT-TO-LEFT EMBEDDING */
+ {UINT32_C(0x202C), "PDF"}, /* POP DIRECTIONAL FORMATTING */
+ {UINT32_C(0x202D), "LTRO"}, /* LEFT-TO-RIGHT OVERRIDE */
+ {UINT32_C(0x202E), "RTLO"}, /* RIGHT-TO-LEFT OVERRIDE */
+ {UINT32_C(0x2060), "WJ"}, /* WORD JOINER */
+ {UINT32_C(0x2066), "LTRI"}, /* LEFT-TO-RIGHT ISOLATE */
+ {UINT32_C(0x2067), "RTLI"}, /* RIGHT-TO-LEFT ISOLATE */
+ {UINT32_C(0x2068), "FSI"}, /* FIRST STRONG ISOLATE */
+ {UINT32_C(0x2069), "PDI"}, /* POP DIRECTIONAL ISOLATE */
+ {UINT32_C(0x206A), "ISS"}, /* INHIBIT SYMMETRIC SWAPPING */
+ {UINT32_C(0x206B), "ASS"}, /* ACTIVATE SYMMETRIC SWAPPING */
+ {UINT32_C(0x206C), "IAFS"}, /* INHIBIT ARABIC FORM SHAPING */
+ {UINT32_C(0x206D), "AAFS"}, /* ACTIVATE ARABIC FORM SHAPING */
+ {UINT32_C(0x206E), "NADS"}, /* NATIONAL DIGIT SHAPES */
+ {UINT32_C(0x206F), "NODS"}, /* NOMINAL DIGIT SHAPES */
+ {UINT32_C(0xFFF9), "IAA"}, /* INTERLINEAR ANNOTATION ANCHOR */
+ {UINT32_C(0xFFFA), "IAS"}, /* INTERLINEAR ANNOTATION SEPARATOR */
+ {UINT32_C(0xFFFB), "IAT"}, /* INTERLINEAR ANNOTATION TERMINATOR */
+ {UINT32_C(0x1BCA0), "SFLO"}, /* SHORTHAND FORMAT LETTER OVERLAP */
+ {UINT32_C(0x1BCA1), "SFCO"}, /* SHORTHAND FORMAT CONTINUING OVERLAP */
+ {UINT32_C(0x1BCA2), "SFDS"}, /* SHORTHAND FORMAT DOWN STEP */
+ {UINT32_C(0x1BCA3), "SFUS"}, /* SHORTHAND FORMAT UP STEP */
+ {UINT32_C(0xE0001), "LTAG"}, /* LANGUAGE TAG */
+ {UINT32_C(0xE007F), "CTAG"}, /* CANCEL TAG */
+
+ {UINT32_C(0x20), "SP"}, /* SPACE */
+ {UINT32_C(0x00A0), "NBSP"}, /* NO-BREAK SPACE */
+ {UINT32_C(0x2000), "NQ"}, /* EN QUAD */
+ {UINT32_C(0x2001), "MQ"}, /* EM QUAD */
+ {UINT32_C(0x2002), "NSP"}, /* EN SPACE */
+ {UINT32_C(0x2003), "MSP"}, /* EM SPACE */
+ {UINT32_C(0x2004), "3MSP"}, /* THREE-PER-EM SPACE */
+ {UINT32_C(0x2005), "4MSP"}, /* FOUR-PER-EM SPACE */
+ {UINT32_C(0x2006), "6MSP"}, /* SIX-PER-EM SPACE */
+ {UINT32_C(0x2007), "FSP"}, /* FIGURE SPACE */
+ {UINT32_C(0x2008), "PSP"}, /* PUNCTUATION SPACE */
+ {UINT32_C(0x2009), "TSP"}, /* THIN SPACE */
+ {UINT32_C(0x200A), "HSP"}, /* HAIR SPACE */
+ {UINT32_C(0x2028), "LS"}, /* LINE SEPARATOR */
+ {UINT32_C(0x2029), "PS"}, /* PARAGRAPH SEPARATOR */
+ {UINT32_C(0x202F), "NNBSP"}, /* NARROW NO-BREAK SPACE */
+ {UINT32_C(0x205F), "MMSP"}, /* MEDIUM MATHEMATICAL SPACE */
+
+ {UINT32_C(0x00AD), "SHY"} /* SOFT HYPHEN */
+};
+
+
+enum libcharconv_result
+libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ size_t i, len, found, found_len;
+ int indeterminate;
+ *n = 0;
+ for (; slen; s++, slen--, ++*n) {
+ indeterminate = 0;
+ found = SIZE_MAX;
+ found_len = 0u;
+ for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) {
+ len = strlen(symbols[i].s);
+ if (strncmp(s, symbols[i].s, len < slen ? len : slen))
+ continue;
+ if (slen < len) {
+ indeterminate = 1;
+ continue;
+ }
+ if (len > found_len) {
+ found = i;
+ found_len = len;
+ }
+ }
+ if (found_len)
+ goto conv;
+ if (*n)
+ goto no_conv;
+ if (indeterminate)
+ return LIBCHARCONV_INDETERMINATE;
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+conv:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = symbols[found].cp;
+ *n += found_len;
+ *ncp = 1u;
+ return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED;
+}
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index d8c27d0..db60835 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -728,6 +728,125 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
case UINT32_C(0x2142): c1 = 'L'; goto conv1;
case UINT32_C(0x2144): c1 = 'Y'; goto conv1;
+ /* control characters (control characters) */
+ case UINT32_C(0x00): c1 = 'N'; c2 = 'U'; c3 = 'L'; goto conv3;
+ case UINT32_C(0x01): c1 = 'S'; c2 = 'O'; c3 = 'H'; goto conv3;
+ case UINT32_C(0x02): c1 = 'S'; c2 = 'T'; c3 = 'X'; goto conv3;
+ case UINT32_C(0x03): c1 = 'E'; c2 = 'T'; c3 = 'X'; goto conv3;
+ case UINT32_C(0x04): c1 = 'E'; c2 = 'O'; c3 = 'T'; goto conv3;
+ case UINT32_C(0x05): c1 = 'E'; c2 = 'N'; c3 = 'Q'; goto conv3;
+ case UINT32_C(0x06): c1 = 'A'; c2 = 'C'; c3 = 'K'; goto conv3;
+ case UINT32_C(0x07): c1 = 'B'; c2 = 'E'; c3 = 'L'; goto conv3;
+ case UINT32_C(0x08): c1 = 'B'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x09): c1 = 'H'; c2 = 'T'; goto conv2;
+ case UINT32_C(0x0A): c1 = 'L'; c2 = 'F'; goto conv2;
+ case UINT32_C(0x0B): c1 = 'V'; c2 = 'T'; goto conv2;
+ case UINT32_C(0x0C): c1 = 'F'; c2 = 'F'; goto conv2;
+ case UINT32_C(0x0D): c1 = 'C'; c2 = 'R'; goto conv2;
+ case UINT32_C(0x0E): c1 = 'S'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x0F): c1 = 'S'; c2 = 'I'; goto conv2;
+ case UINT32_C(0x10): c1 = 'D'; c2 = 'L'; c3 = 'E'; goto conv3;
+ case UINT32_C(0x11): c1 = 'D'; c2 = 'C'; c3 = '1'; goto conv3;
+ case UINT32_C(0x12): c1 = 'D'; c2 = 'C'; c3 = '2'; goto conv3;
+ case UINT32_C(0x13): c1 = 'D'; c2 = 'C'; c3 = '3'; goto conv3;
+ case UINT32_C(0x14): c1 = 'D'; c2 = 'C'; c3 = '4'; goto conv3;
+ case UINT32_C(0x15): c1 = 'N'; c2 = 'A'; c3 = 'K'; goto conv3;
+ case UINT32_C(0x16): c1 = 'S'; c2 = 'Y'; c3 = 'N'; goto conv3;
+ case UINT32_C(0x17): c1 = 'E'; c2 = 'T'; c3 = 'B'; goto conv3;
+ case UINT32_C(0x18): c1 = 'C'; c2 = 'A'; c3 = 'N'; goto conv3;
+ case UINT32_C(0x19): c1 = 'E'; c2 = 'M'; goto conv2;
+ case UINT32_C(0x1A): c1 = 'S'; c2 = 'U'; c3 = 'B'; goto conv3;
+ case UINT32_C(0x1B): c1 = 'E'; c2 = 'S'; c3 = 'C'; goto conv3;
+ case UINT32_C(0x1C): c1 = 'F'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x1D): c1 = 'G'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x1E): c1 = 'R'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x1F): c1 = 'U'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x7F): c1 = 'D'; c2 = 'E'; c3 = 'L'; goto conv3;
+ case UINT32_C(0x0080): c1 = 'P'; c2 = 'A'; c3 = 'D'; goto conv3;
+ case UINT32_C(0x0081): c1 = 'H'; c2 = 'O'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x0082): c1 = 'B'; c2 = 'P'; c3 = 'H'; goto conv3;
+ case UINT32_C(0x0083): c1 = 'N'; c2 = 'B'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x0084): c1 = 'I'; c2 = 'N'; c3 = 'D'; goto conv3;
+ case UINT32_C(0x0085): c1 = 'N'; c2 = 'E'; c3 = 'L'; goto conv3;
+ case UINT32_C(0x0086): c1 = 'S'; c2 = 'S'; c3 = 'A'; goto conv3;
+ case UINT32_C(0x0087): c1 = 'E'; c2 = 'S'; c3 = 'A'; goto conv3;
+ case UINT32_C(0x0088): c1 = 'H'; c2 = 'T'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x0089): c1 = 'H'; c2 = 'T'; c3 = 'J'; goto conv3;
+ case UINT32_C(0x008A): c1 = 'L'; c2 = 'T'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x008B): c1 = 'P'; c2 = 'L'; c3 = 'D'; goto conv3;
+ case UINT32_C(0x008C): c1 = 'P'; c2 = 'L'; c3 = 'U'; goto conv3;
+ case UINT32_C(0x008D): c1 = 'R'; c2 = 'I'; goto conv2;
+ case UINT32_C(0x008E): c1 = 'S'; c2 = 'S'; c3 = '2'; goto conv3;
+ case UINT32_C(0x008F): c1 = 'S'; c2 = 'S'; c3 = '3'; goto conv3;
+ case UINT32_C(0x0090): c1 = 'D'; c2 = 'C'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x0091): c1 = 'P'; c2 = 'U'; c3 = '1'; goto conv3;
+ case UINT32_C(0x0092): c1 = 'P'; c2 = 'U'; c3 = '2'; goto conv3;
+ case UINT32_C(0x0093): c1 = 'S'; c2 = 'T'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x0094): c1 = 'C'; c2 = 'C'; c3 = 'H'; goto conv3;
+ case UINT32_C(0x0095): c1 = 'M'; c2 = 'W'; goto conv2;
+ case UINT32_C(0x0096): c1 = 'S'; c2 = 'P'; c3 = 'A'; goto conv3;
+ case UINT32_C(0x0097): c1 = 'E'; c2 = 'P'; c3 = 'A'; goto conv3;
+ case UINT32_C(0x0098): c1 = 'S'; c2 = 'O'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x0099): c1 = 'S'; c2 = 'G'; c3 = 'C'; c4 = 'I'; goto conv4;
+ case UINT32_C(0x009A): c1 = 'S'; c2 = 'S'; c3 = 'I'; goto conv3;
+ case UINT32_C(0x009B): c1 = 'C'; c2 = 'S'; c3 = 'I'; goto conv3;
+ case UINT32_C(0x009C): c1 = 'S'; c2 = 'T'; goto conv2;
+ case UINT32_C(0x009D): c1 = 'O'; c2 = 'S'; c3 = 'C'; goto conv3;
+ case UINT32_C(0x009E): c1 = 'P'; c2 = 'M'; goto conv2;
+ case UINT32_C(0x009F): c1 = 'A'; c2 = 'P'; c3 = 'C'; goto conv3;
+ case UINT32_C(0x200B): c1 = 'Z'; c2 = 'W'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x200C): c1 = 'Z'; c2 = 'W'; c3 = 'N'; c4 = 'J'; goto conv4;
+ case UINT32_C(0x200D): c1 = 'Z'; c2 = 'W'; c3 = 'J'; goto conv3;
+ case UINT32_C(0x200E): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'M'; goto conv4;
+ case UINT32_C(0x200F): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'M'; goto conv4;
+ case UINT32_C(0x202A): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'E'; goto conv4;
+ case UINT32_C(0x202B): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'E'; goto conv4;
+ case UINT32_C(0x202C): c1 = 'P'; c2 = 'D'; c3 = 'F'; goto conv3;
+ case UINT32_C(0x202D): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'O'; goto conv4;
+ case UINT32_C(0x202E): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'O'; goto conv4;
+ case UINT32_C(0x2060): c1 = 'W'; c2 = 'J'; goto conv2;
+ case UINT32_C(0x2066): c1 = 'L'; c2 = 'T'; c3 = 'R'; c4 = 'I'; goto conv4;
+ case UINT32_C(0x2067): c1 = 'R'; c2 = 'T'; c3 = 'L'; c4 = 'I'; goto conv4;
+ case UINT32_C(0x2068): c1 = 'F'; c2 = 'S'; c3 = 'I'; goto conv3;
+ case UINT32_C(0x2069): c1 = 'P'; c2 = 'D'; c3 = 'I'; goto conv3;
+ case UINT32_C(0x206A): c1 = 'I'; c2 = 'S'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x206B): c1 = 'A'; c2 = 'S'; c3 = 'S'; goto conv3;
+ case UINT32_C(0x206C): c1 = 'I'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4;
+ case UINT32_C(0x206D): c1 = 'A'; c2 = 'A'; c3 = 'F'; c4 = 'S'; goto conv4;
+ case UINT32_C(0x206E): c1 = 'N'; c2 = 'A'; c3 = 'D'; c4 = 'S'; goto conv4;
+ case UINT32_C(0x206F): c1 = 'N'; c2 = 'O'; c3 = 'D'; c4 = 'S'; goto conv4;
+ case UINT32_C(0xFFF9): c1 = 'I'; c2 = 'A'; c3 = 'A'; goto conv3;
+ case UINT32_C(0xFFFA): c1 = 'I'; c2 = 'A'; c3 = 'S'; goto conv3;
+ case UINT32_C(0xFFFB): c1 = 'I'; c2 = 'A'; c3 = 'T'; goto conv3;
+ case UINT32_C(0x1BCA0): c1 = 'S'; c2 = 'F'; c3 = 'L'; c4 = 'O'; goto conv4;
+ case UINT32_C(0x1BCA1): c1 = 'S'; c2 = 'F'; c3 = 'C'; c4 = 'O'; goto conv4;
+ case UINT32_C(0x1BCA2): c1 = 'S'; c2 = 'F'; c3 = 'D'; c4 = 'S'; goto conv4;
+ case UINT32_C(0x1BCA3): c1 = 'S'; c2 = 'F'; c3 = 'U'; c4 = 'S'; goto conv4;
+ case UINT32_C(0xE0001): c1 = 'L'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4;
+ case UINT32_C(0xE007F): c1 = 'C'; c2 = 'T'; c3 = 'A'; c4 = 'G'; goto conv4;
+
+ /* control characters (whitespace) */
+ case UINT32_C(0x20): c1 = 'S'; c2 = 'P'; goto conv2;
+ case UINT32_C(0x00A0): c1 = 'N'; c2 = 'B'; c3 = 'S'; c4 = 'P'; goto conv4;
+ case UINT32_C(0x2000): c1 = 'N'; c2 = 'Q'; goto conv2;
+ case UINT32_C(0x2001): c1 = 'M'; c2 = 'Q'; goto conv2;
+ case UINT32_C(0x2002): c1 = 'N'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x2003): c1 = 'M'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x2004): c1 = '3'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4;
+ case UINT32_C(0x2005): c1 = '4'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4;
+ case UINT32_C(0x2006): c1 = '6'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4;
+ case UINT32_C(0x2007): c1 = 'F'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x2008): c1 = 'P'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x2009): c1 = 'T'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x200A): c1 = 'H'; c2 = 'S'; c3 = 'P'; goto conv3;
+ case UINT32_C(0x2028): c1 = 'L'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x2029): c1 = 'P'; c2 = 'S'; goto conv2;
+ case UINT32_C(0x202F): c1 = 'N'; c2 = 'N'; c3 = 'B'; c4 = 'S'; c5 = 'P'; goto conv5;
+ case UINT32_C(0x205F): c1 = 'M'; c2 = 'M'; c3 = 'S'; c4 = 'P'; goto conv4;
+
+ /* control characters (conditional characters) */
+ case UINT32_C(0x00AD): c1 = 'S'; c2 = 'H'; c3 = 'Y'; goto conv3;
+
default:
no_match:
*n += clen;