diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-25 18:37:59 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-25 18:37:59 +0100 |
| commit | 87f373ca116abbb5a7ba8e6ad7111082e8dfb128 (patch) | |
| tree | 0943f33e352ae98297cfd55cc058acd7b0c9609d /libcharconv_control_characters.c | |
| parent | Add symbols (diff) | |
| download | charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.gz charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.bz2 charconv-87f373ca116abbb5a7ba8e6ad7111082e8dfb128.tar.xz | |
Add control characters
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to 'libcharconv_control_characters.c')
| -rw-r--r-- | libcharconv_control_characters.c | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/libcharconv_control_characters.c b/libcharconv_control_characters.c new file mode 100644 index 0000000..55e4bbf --- /dev/null +++ b/libcharconv_control_characters.c @@ -0,0 +1,169 @@ +/* See LICENSE file for copyright and license details. */ +#include "lib-common.h" +#include <string.h> + + +static struct { + uint_least32_t cp; + const char *s; +} symbols[] = { + {UINT32_C(0x00), "NUL"}, /* NULL */ + {UINT32_C(0x01), "SOH"}, /* START OF HEADING */ + {UINT32_C(0x02), "STX"}, /* START OF TEXT */ + {UINT32_C(0x03), "ETX"}, /* END OF TEXT */ + {UINT32_C(0x04), "EOT"}, /* END OF TRANSMISSION */ + {UINT32_C(0x05), "ENQ"}, /* ENQUIRY */ + {UINT32_C(0x06), "ACK"}, /* ACKNOWLEDGE */ + {UINT32_C(0x07), "BEL"}, /* BELL */ + {UINT32_C(0x08), "BS"}, /* BACKSPACE */ + {UINT32_C(0x09), "HT"}, /* CHARACTER TABULATION */ + {UINT32_C(0x0A), "LF"}, /* LINE FEED */ + {UINT32_C(0x0B), "VT"}, /* LINE TABULATION */ + {UINT32_C(0x0C), "FF"}, /* FORM FEED */ + {UINT32_C(0x0D), "CR"}, /* CARRIAGE RETURN */ + {UINT32_C(0x0E), "SS"}, /* SHIFT OUT */ + {UINT32_C(0x0F), "SI"}, /* SHIFT IN */ + {UINT32_C(0x10), "DLE"}, /* DATA LINK ESCAPE */ + {UINT32_C(0x11), "DC1"}, /* DEVICE CONTROL ONE */ + {UINT32_C(0x12), "DC2"}, /* DEVICE CONTROL TWO */ + {UINT32_C(0x13), "DC3"}, /* DEVICE CONTROL THREE */ + {UINT32_C(0x14), "DC4"}, /* DEVICE CONTROL FOUR */ + {UINT32_C(0x15), "NAK"}, /* NEGATIVE ACKNOWLEDGE */ + {UINT32_C(0x16), "SYN"}, /* SYNCHRONOUS IDLE */ + {UINT32_C(0x17), "ETB"}, /* END OF TRANSMISSION BLOCK */ + {UINT32_C(0x18), "CAN"}, /* CANCEL */ + {UINT32_C(0x19), "EM"}, /* END OF MEDIUM */ + {UINT32_C(0x1A), "SUB"}, /* SUBSTITUTE */ + {UINT32_C(0x1B), "ESC"}, /* ESCAPE */ + {UINT32_C(0x1C), "FS"}, /* INFORMATION SEPARATOR FOUR */ + {UINT32_C(0x1D), "GS"}, /* INFORMATION SEPARATOR THREE */ + {UINT32_C(0x1E), "RS"}, /* INFORMATION SEPARATOR TWO */ + {UINT32_C(0x1F), "US"}, /* INFORMATION SEPARATOR ONE */ + {UINT32_C(0x7F), "DEL"}, /* DELETE */ + {UINT32_C(0x0080), "PAD"}, /* Padding Character */ + {UINT32_C(0x0081), "HOP"}, /* High Octet Preset */ + {UINT32_C(0x0082), "BPH"}, /* BREAK PERMITTED HERE */ + {UINT32_C(0x0083), "NBP"}, /* NO BREAK HERE */ + {UINT32_C(0x0084), "IND"}, /* INDEX */ + {UINT32_C(0x0085), "NEL"}, /* NEXT LINE */ + {UINT32_C(0x0086), "SSA"}, /* START OF SELECTED AREA */ + {UINT32_C(0x0087), "ESA"}, /* END OF SELECTED AREA */ + {UINT32_C(0x0088), "HTS"}, /* CHARACTER TABULATION SET */ + {UINT32_C(0x0089), "HTJ"}, /* CHARACTER TABULATION WITH JUSTIFICATION */ + {UINT32_C(0x008A), "LTS"}, /* LINE TABULATION SET */ + {UINT32_C(0x008B), "PLD"}, /* PARTIAL LINE FORWARD */ + {UINT32_C(0x008C), "PLU"}, /* PARTIAL LINE BACKWARD */ + {UINT32_C(0x008D), "RI"}, /* REVERSE LINE FEED */ + {UINT32_C(0x008E), "SS2"}, /* SINGLE SHIFT TWO */ + {UINT32_C(0x008F), "SS3"}, /* SINGLE SHIFT THREE */ + {UINT32_C(0x0090), "DCS"}, /* DEVICE CONTROL STRING */ + {UINT32_C(0x0091), "PU1"}, /* PRIVATE USE ONE */ + {UINT32_C(0x0092), "PU2"}, /* PRIVATE USE TWO */ + {UINT32_C(0x0093), "STS"}, /* SET TRANSMIT STATE */ + {UINT32_C(0x0094), "CCH"}, /* CANCEL CHARACTER */ + {UINT32_C(0x0095), "MW"}, /* MESSAGE WAITING */ + {UINT32_C(0x0096), "SPA"}, /* START OF GUARDED AREA */ + {UINT32_C(0x0097), "EPA"}, /* END OF GUARDED AREA */ + {UINT32_C(0x0098), "SOS"}, /* START OF STRING */ + {UINT32_C(0x0099), "SGCI"}, /* Single Graphic Character Introducer */ + {UINT32_C(0x009A), "SSI"}, /* SINGLE CHARACTER INTRODUCER */ + {UINT32_C(0x009B), "CSI"}, /* CONTROL SEQUENCE INTRODUCER */ + {UINT32_C(0x009C), "ST"}, /* STRING TERMINATOR */ + {UINT32_C(0x009D), "OSC"}, /* OPERATING SYSTEM COMMAND */ + {UINT32_C(0x009E), "PM"}, /* PRIVACY MESSAGE */ + {UINT32_C(0x009F), "APC"}, /* APPLICATION PROGRAM COMMAND */ + {UINT32_C(0x200B), "ZWS"}, /* ZERO WIDTH SPACE */ + {UINT32_C(0x200C), "ZWNJ"}, /* ZERO WIDTH NON-JOINER */ + {UINT32_C(0x200D), "ZWJ"}, /* ZERO WIDTH JOINER */ + {UINT32_C(0x200E), "LTRM"}, /* LEFT-TO-RIGHT MARK */ + {UINT32_C(0x200F), "RTLM"}, /* RIGHT-TO-LEFT MARK */ + {UINT32_C(0x202A), "LTRE"}, /* LEFT-TO-RIGHT EMBEDDING */ + {UINT32_C(0x202B), "RTLE"}, /* RIGHT-TO-LEFT EMBEDDING */ + {UINT32_C(0x202C), "PDF"}, /* POP DIRECTIONAL FORMATTING */ + {UINT32_C(0x202D), "LTRO"}, /* LEFT-TO-RIGHT OVERRIDE */ + {UINT32_C(0x202E), "RTLO"}, /* RIGHT-TO-LEFT OVERRIDE */ + {UINT32_C(0x2060), "WJ"}, /* WORD JOINER */ + {UINT32_C(0x2066), "LTRI"}, /* LEFT-TO-RIGHT ISOLATE */ + {UINT32_C(0x2067), "RTLI"}, /* RIGHT-TO-LEFT ISOLATE */ + {UINT32_C(0x2068), "FSI"}, /* FIRST STRONG ISOLATE */ + {UINT32_C(0x2069), "PDI"}, /* POP DIRECTIONAL ISOLATE */ + {UINT32_C(0x206A), "ISS"}, /* INHIBIT SYMMETRIC SWAPPING */ + {UINT32_C(0x206B), "ASS"}, /* ACTIVATE SYMMETRIC SWAPPING */ + {UINT32_C(0x206C), "IAFS"}, /* INHIBIT ARABIC FORM SHAPING */ + {UINT32_C(0x206D), "AAFS"}, /* ACTIVATE ARABIC FORM SHAPING */ + {UINT32_C(0x206E), "NADS"}, /* NATIONAL DIGIT SHAPES */ + {UINT32_C(0x206F), "NODS"}, /* NOMINAL DIGIT SHAPES */ + {UINT32_C(0xFFF9), "IAA"}, /* INTERLINEAR ANNOTATION ANCHOR */ + {UINT32_C(0xFFFA), "IAS"}, /* INTERLINEAR ANNOTATION SEPARATOR */ + {UINT32_C(0xFFFB), "IAT"}, /* INTERLINEAR ANNOTATION TERMINATOR */ + {UINT32_C(0x1BCA0), "SFLO"}, /* SHORTHAND FORMAT LETTER OVERLAP */ + {UINT32_C(0x1BCA1), "SFCO"}, /* SHORTHAND FORMAT CONTINUING OVERLAP */ + {UINT32_C(0x1BCA2), "SFDS"}, /* SHORTHAND FORMAT DOWN STEP */ + {UINT32_C(0x1BCA3), "SFUS"}, /* SHORTHAND FORMAT UP STEP */ + {UINT32_C(0xE0001), "LTAG"}, /* LANGUAGE TAG */ + {UINT32_C(0xE007F), "CTAG"}, /* CANCEL TAG */ + + {UINT32_C(0x20), "SP"}, /* SPACE */ + {UINT32_C(0x00A0), "NBSP"}, /* NO-BREAK SPACE */ + {UINT32_C(0x2000), "NQ"}, /* EN QUAD */ + {UINT32_C(0x2001), "MQ"}, /* EM QUAD */ + {UINT32_C(0x2002), "NSP"}, /* EN SPACE */ + {UINT32_C(0x2003), "MSP"}, /* EM SPACE */ + {UINT32_C(0x2004), "3MSP"}, /* THREE-PER-EM SPACE */ + {UINT32_C(0x2005), "4MSP"}, /* FOUR-PER-EM SPACE */ + {UINT32_C(0x2006), "6MSP"}, /* SIX-PER-EM SPACE */ + {UINT32_C(0x2007), "FSP"}, /* FIGURE SPACE */ + {UINT32_C(0x2008), "PSP"}, /* PUNCTUATION SPACE */ + {UINT32_C(0x2009), "TSP"}, /* THIN SPACE */ + {UINT32_C(0x200A), "HSP"}, /* HAIR SPACE */ + {UINT32_C(0x2028), "LS"}, /* LINE SEPARATOR */ + {UINT32_C(0x2029), "PS"}, /* PARAGRAPH SEPARATOR */ + {UINT32_C(0x202F), "NNBSP"}, /* NARROW NO-BREAK SPACE */ + {UINT32_C(0x205F), "MMSP"}, /* MEDIUM MATHEMATICAL SPACE */ + + {UINT32_C(0x00AD), "SHY"} /* SOFT HYPHEN */ +}; + + +enum libcharconv_result +libcharconv_control_characters(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + size_t i, len, found, found_len; + int indeterminate; + *n = 0; + for (; slen; s++, slen--, ++*n) { + indeterminate = 0; + found = SIZE_MAX; + found_len = 0u; + for (i = 0u; i < sizeof(symbols) / sizeof(*symbols); i++) { + len = strlen(symbols[i].s); + if (strncmp(s, symbols[i].s, len < slen ? len : slen)) + continue; + if (slen < len) { + indeterminate = 1; + continue; + } + if (len > found_len) { + found = i; + found_len = len; + } + } + if (found_len) + goto conv; + if (*n) + goto no_conv; + if (indeterminate) + return LIBCHARCONV_INDETERMINATE; + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = symbols[found].cp; + *n += found_len; + *ncp = 1u; + return indeterminate ? LIBCHARCONV_CONVERT_IF_END : LIBCHARCONV_CONVERTED; +} |
