/* See LICENSE file for copyright and license details. */ #include "lib-common.h" #include static struct { uint_least32_t cp; const char *s; } symbols[] = { {UINT32_C(0x00), "NUL"}, /* NULL */ {UINT32_C(0x01), "SOH"}, /* START OF HEADING */ {UINT32_C(0x02), "STX"}, /* START OF TEXT */ {UINT32_C(0x03), "ETX"}, /* END OF TEXT */ {UINT32_C(0x04), "EOT"}, /* END OF TRANSMISSION */ {UINT32_C(0x05), "ENQ"}, /* ENQUIRY */ {UINT32_C(0x06), "ACK"}, /* ACKNOWLEDGE */ {UINT32_C(0x07), "BEL"}, /* BELL */ {UINT32_C(0x08), "BS"}, /* BACKSPACE */ {UINT32_C(0x09), "HT"}, /* CHARACTER TABULATION */ {UINT32_C(0x0A), "LF"}, /* LINE FEED */ {UINT32_C(0x0B), "VT"}, /* LINE TABULATION */ {UINT32_C(0x0C), "FF"}, /* FORM FEED */ {UINT32_C(0x0D), "CR"}, /* CARRIAGE RETURN */ {UINT32_C(0x0E), "SS"}, /* SHIFT OUT */ {UINT32_C(0x0F), "SI"}, /* SHIFT IN */ {UINT32_C(0x10), "DLE"}, /* DATA LINK ESCAPE */ {UINT32_C(0x11), "DC1"}, /* DEVICE CONTROL ONE */ {UINT32_C(0x12), "DC2"}, /* DEVICE CONTROL TWO */ {UINT32_C(0x13), "DC3"}, /* DEVICE CONTROL THREE */ {UINT32_C(0x14), "DC4"}, /* DEVICE CONTROL FOUR */ {UINT32_C(0x15), "NAK"}, /* NEGATIVE ACKNOWLEDGE */ {UINT32_C(0x16), "SYN"}, /* SYNCHRONOUS IDLE */ {UINT32_C(0x17), "ETB"}, /* END OF TRANSMISSION BLOCK */ {UINT32_C(0x18), "CAN"}, /* CANCEL */ {UINT32_C(0x19), "EM"}, /* END OF MEDIUM */ {UINT32_C(0x1A), "SUB"}, /* SUBSTITUTE */ {UINT32_C(0x1B), "ESC"}, /* ESCAPE */ {UINT32_C(0x1C), "FS"}, /* INFORMATION SEPARATOR FOUR */ {UINT32_C(0x1D), "GS"}, /* INFORMATION SEPARATOR THREE */ {UINT32_C(0x1E), "RS"}, /* INFORMATION SEPARATOR TWO */ {UINT32_C(0x1F), "US"}, /* INFORMATION SEPARATOR ONE */ {UINT32_C(0x7F), "DEL"}, /* DELETE */ {UINT32_C(0x0080), "PAD"}, /* Padding Character */ {UINT32_C(0x0081), "HOP"}, /* High Octet Preset */ {UINT32_C(0x0082), "BPH"}, /* BREAK PERMITTED HERE */ {UINT32_C(0x0083), "NBP"}, /* NO BREAK HERE */ {UINT32_C(0x0084), "IND"}, /* INDEX */ {UINT32_C(0x0085), "NEL"}, /* NEXT LINE */ {UINT32_C(0x0086), "SSA"}, /* START OF SELECTED AREA */ {UINT32_C(0x0087), "ESA"}, /* END OF SELECTED AREA */ {UINT32_C(0x0088), "HTS"}, /* CHARACTER TABULATION SET */ {UINT32_C(0x0089), "HTJ"}, /* CHARACTER TABULATION WITH JUSTIFICATION */ {UINT32_C(0x008A), "LTS"}, /* LINE TABULATION SET */ {UINT32_C(0x008B), "PLD"}, /* PARTIAL LINE FORWARD */ {UINT32_C(0x008C), "PLU"}, /* PARTIAL LINE BACKWARD */ {UINT32_C(0x008D), "RI"}, /* REVERSE LINE FEED */ {UINT32_C(0x008E), "SS2"}, /* SINGLE SHIFT TWO */ {UINT32_C(0x008F), "SS3"}, /* SINGLE SHIFT THREE */ {UINT32_C(0x0090), "DCS"}, /* DEVICE CONTROL STRING */ {UINT32_C(0x0091), "PU1"}, /* PRIVATE USE ONE */ {UINT32_C(0x0092), "PU2"}, /* PRIVATE USE TWO */ {UINT32_C(0x0093), "STS"}, /* SET TRANSMIT STATE */ {UINT32_C(0x0094), "CCH"}, /* CANCEL CHARACTER */ {UINT32_C(0x0095), "MW"}, /* MESSAGE WAITING */ {UINT32_C(0x0096), "SPA"}, /* START OF GUARDED AREA */ {UINT32_C(0x0097), "EPA"}, /* END OF GUARDED AREA */ {UINT32_C(0x0098), "SOS"}, /* START OF STRING */ {UINT32_C(0x0099), "SGCI"}, /* Single Graphic Character Introducer */ {UINT32_C(0x009A), "SSI"}, /* SINGLE CHARACTER INTRODUCER */ {UINT32_C(0x009B), "CSI"}, /* CONTROL SEQUENCE INTRODUCER */ {UINT32_C(0x009C), "ST"}, /* STRING TERMINATOR */ {UINT32_C(0x009D), "OSC"}, /* OPERATING SYSTEM COMMAND */ {UINT32_C(0x009E), "PM"}, /* PRIVACY MESSAGE */ {UINT32_C(0x009F), "APC"}, /* APPLICATION PROGRAM COMMAND */ {UINT32_C(0x200B), "ZWS"}, /* ZERO WIDTH SPACE */ {UINT32_C(0x200C), "ZWNJ"}, /* ZERO WIDTH NON-JOINER */ {UINT32_C(0x200D), "ZWJ"}, /* ZERO WIDTH JOINER */ {UINT32_C(0x200E), "LTRM"}, /* LEFT-TO-RIGHT MARK */ {UINT32_C(0x200F), "RTLM"}, /* RIGHT-TO-LEFT MARK */ {UINT32_C(0x202A), "LTRE"}, /* LEFT-TO-RIGHT EMBEDDING */ {UINT32_C(0x202B), "RTLE"}, /* RIGHT-TO-LEFT EMBEDDING */ {UINT32_C(0x202C), "PDF"}, /* POP DIRECTIONAL FORMATTING */ {UINT32_C(0x202D), "LTRO"}, /* LEFT-TO-RIGHT OVERRIDE */ {UINT32_C(0x202E), "RTLO"}, /* RIGHT-TO-LEFT OVERRIDE */ {UINT32_C(0x2060), "WJ"}, /* WORD JOINER */ {UINT32_C(0x2066), "LTRI"}, /* LEFT-TO-RIGHT ISOLATE */ {UINT32_C(0x2067), "RTLI"}, /* RIGHT-TO-LEFT ISOLATE */ {UINT32_C(0x2068), "FSI"}, /* FIRST STRONG ISOLATE */ {UINT32_C(0x2069), "PDI"}, /* POP DIRECTIONAL ISOLATE */ {UINT32_C(0x206A), "ISS"}, /* INHIBIT SYMMETRIC SWAPPING */ {UINT32_C(0x206B), "ASS"}, /* ACTIVATE SYMMETRIC SWAPPING */ {UINT32_C(0x206C), "IAFS"}, /* INHIBIT ARABIC FORM SHAPING */ {UINT32_C(0x206D), "AAFS"}, /* ACTIVATE ARABIC FORM SHAPING */ {UINT32_C(0x206E), "NADS"}, /* NATIONAL DIGIT SHAPES */ {UINT32_C(0x206F), "NODS"}, /* NOMINAL DIGIT SHAPES */ {UINT32_C(0xFFF9), "IAA"}, /* INTERLINEAR ANNOTATION ANCHOR */ {UINT32_C(0xFFFA), "IAS"}, /* INTERLINEAR ANNOTATION SEPARATOR */ {UINT32_C(0xFFFB), "IAT"}, /* INTERLINEAR ANNOTATION TERMINATOR */ {UINT32_C(0x1BCA0), "SFLO"}, /* SHORTHAND FORMAT LETTER OVERLAP */ {UINT32_C(0x1BCA1), "SFCO"}, /* SHORTHAND FORMAT CONTINUING OVERLAP */ {UINT32_C(0x1BCA2), "SFDS"}, /* SHORTHAND FORMAT DOWN STEP */ {UINT32_C(0x1BCA3), "SFUS"}, /* SHORTHAND FORMAT UP STEP */ {UINT32_C(0xE0001), "LTAG"}, /* LANGUAGE TAG */ {UINT32_C(0xE007F), "CTAG"}, /* CANCEL TAG */ {UINT32_C(0x20), "SP"}, /* SPACE */ {UINT32_C(0x00A0), "NBSP"}, /* NO-BREAK SPACE */ {UINT32_C(0x2000), "NQ"}, /* EN QUAD */ {UINT32_C(0x2001), "MQ"}, /* EM QUAD */ {UINT32_C(0x2002), "NSP"}, /* EN SPACE */ {UINT32_C(0x2003), "MSP"}, /* EM SPACE */ {UINT32_C(0x2004), "3MSP"}, /* THREE-PER-EM SPACE */ {UINT32_C(0x2005), "4MSP"}, /* FOUR-PER-EM SPACE */ {UINT32_C(0x2006), "6MSP"}, /* SIX-PER-EM SPACE */ {UINT32_C(0x2007), "FSP"}, /* FIGURE SPACE */ {UINT32_C(0x2008), "PSP"}, /* PUNCTUATION SPACE */ {UINT32_C(0x2009), "TSP"}, /* THIN SPACE */ {UINT32_C(0x200A), "HSP"}, /* HAIR SPACE */ {UINT32_C(0x2028), "LS"}, /* LINE SEPARATOR */ {UINT32_C(0x2029), "PS"}, /* PARAGRAPH SEPARATOR */ {UINT32_C(0x202F), "NNBSP"}, /* NARROW NO-BREAK SPACE */ {UINT32_C(0x205F), "MMSP"}, /* MEDIUM MATHEMATICAL SPACE */ {UINT32_C(0x00AD), "SHY"} /* SOFT HYPHEN */ }; enum libcharconv_result libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) { size_t i, len, found, found_len; int indeterminate; *n = 0; for (; slen; s++, slen--, ++*n) { indeterminate = 0; found = SIZE_MAX; found_len = 0u; for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) { len = strlen(symbols[i].s); if (strncmp(s, symbols[i].s, len < slen ? len : slen)) continue; if (slen < len) { indeterminate = 1; continue; } if (len > found_len) { found = i; found_len = len; } } if (found_len) goto conv; if (*n) goto no_conv; if (indeterminate) return LIBCHARCONV_INDETERMINATE; } no_conv: return LIBCHARCONV_NO_CONVERT; conv: if (*n) goto no_conv; if (*ncp) *cp = symbols[found].cp; *n += found_len; *ncp = 1u; return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED; }