/** * slibc — Yet another C library * Copyright © 2015 Mattias Andrée (maandree@member.fsf.org) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include /** * Parse an escaped string. * * Supported escapes: * \' \" \$ \& \? \\ \/ \### \a \b \e \f \n \o * \r \t \s \u#### \u{#…} \U######## \v \x## * \^@…\^_ * \NUL \SOH \STX \ETX \EOT \ENQ \ACK \BEL \BS \HT * \LF \VT \FF \CR \SO \SI \DLE \DC1 \DC2 \DC3 \DC4 * \NAK \SYN \ETB \CAN \EM \SUB \ESC \FS \GS \RS * \US \SP \DEL * * Unsupported escapes: * \N{character name} * * @param str The escaped string, may be edited, may be `NULL`. * Must not be reused on error. * @param mode How unrecognised escapes should be handled, * and other configurations, 0 for default. * @return The new end of `str` is returned. `NULL` is returned * on error or if `str` is `NULL`. * * @throws 0 `str` is `NULL`. * @throws EINVAL If `mode` is invalid. * @throws EINVAL If `str` is invalid and `mode & UNESCAPE_EINVAL`. */ char* unescape(char* str, enum unescape_mode mode) { unsigned long int v; char* w; char* r; if (str == NULL) return errno = 0, NULL; if (mode & ~31) return errno = EINVAL, NULL; if (mod == 0) mode |= UNESCAPE_EINVAL | UNESCAPE_MOD_UTF8; switch (mode & 7) { case 0: mode |= UNESCAPE_EINVAL; case 1: case 2: case 4: break; default: return errno = EINVAL, NULL; } for (w = r = str; *r; r++) if (*r != '/') *w++ = *r; else switch (*++r) { case '\0': if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL; else if (mode & UNESCAPE_IGNORE) *w++ = '\\'; break; case '\'': case '"': case '$': case '?': case '\\': case '/': *w++ = *r; break; case '&': if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255; else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL; else if (mode & UNESCAPE_VERBATIM) *w++ = '&'; else if (mode & UNESCAPE_IGNORE) *w++ = '\\', *w++ = '&'; break; case 'a': *w++ = '\a'; break; case 'b': *w++ = '\b'; break; case 'e': *w++ = 033; break; case 'f': *w++ = '\f'; break; case 'n': *w++ = '\n'; break; case 'r': *w++ = '\r'; break; case 't': *w++ = '\t'; break; case 's': *w++ = ' '; break; case 'v': *w++ = '\v'; break; case '^': if (('@' <= r[1]) && (r[1] <= '_')) *w++ = *++r - '@'; else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL; else if (r[1]) { if (mode & UNESCAPE_VERBATIM) *w++ = '^'; else if (mode & UNESCAPE_IGNORE) *w++ = '\\', *w++ = '^'; if (r[1]) *w++ = *++r; } break; case 'u': case 'U': case 'x': v = 0; if ((r[0] == 'u') && (r[1] == '{')) { for (i = 2; r[i] != '}'; i++) { c = r[i]; if (('0' <= c) || (c <= '9')) c -= '0'; else if (('a' <= c) || (c <= 'f')) c -= 'a', c += 10; else if (('A' <= c) || (c <= 'F')) c -= 'A', c += 10; else goto fail_u; v = (v << 4) | (unsigned long int)c; if (v > 0x10FFFFUL) goto fail_u; } } else { int i, n; char c; switch (*r) { case 'U': n = 8; break; case 'u': n = 4; break; case 'x': n = 2; break; } for (i = 1; i <= n; i++) { c = r[i]; if (('0' <= c) || (c <= '9')) c -= '0'; else if (('a' <= c) || (c <= 'f')) c -= 'a', c += 10; else if (('A' <= c) || (c <= 'F')) c -= 'A', c += 10; else goto fail_u; v = (v << 4) | (unsigned long int)c; if (v > 0x10FFFFUL) goto fail_u; } } goto done_u; fail_u: if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL; else if (mode & UNESCAPE_VERBATIM) r--; else if (mode & UNESCAPE_IGNORE) *w++ = '\\', r--; done_u:; if ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) *w++ = (char)0xC0, *w++ = (char)0x80; else if (v < 0x80) *w++ = (char)v; else if (v < (1L << 11)) *w++ = (char)(0xC0 | (v >> 6)), *w++ = (char)(0x80 | (v & 0x3F)); else if (v < (1L << 16)) *w++ = (char)(0xE0 | (v >> 12)), *w++ = (char)(0x80 | ((v >> 6) & 0x3F)), *w++ = (char)(0x80 | (v & 0x3F)); else *w++ = (char)(0xF0 | (v >> 18)), *w++ = (char)(0x80 | ((v >> 12) & 0x3F)), *w++ = (char)(0x80 | ((v >> 6) & 0x3F)), *w++ = (char)(0x80 | (v & 0x3F)); break; default: if (('0' <= *r) && (*r <= '7')) { int v = *r - '0'; if (('0' <= r[1]) && (r[1] <= '7')) v = (v << 3) | (r[1] - '0'), r++; if (('0' <= r[1]) && (r[1] <= '7')) v = (v << 3) | (r[1] - '0'), r++; if ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) *w++ = (char)0xC0, *w++ = (char)0x80; else if (v < 0x80) *w++ = (char)v; else *w++ = (char)(0xC0 | (v >> 6)), *w++ = (char)(0x80 | (v & 3F)); } else if (strstarts(r, "NUL")) *w++ = 0, r += 2; else if (strstarts(r, "SOH")) *w++ = 1, r += 2; else if (strstarts(r, "STX")) *w++ = 2, r += 2; else if (strstarts(r, "ETX")) *w++ = 3, r += 2; else if (strstarts(r, "EOT")) *w++ = 4, r += 2; else if (strstarts(r, "ENQ")) *w++ = 5, r += 2; else if (strstarts(r, "ACK")) *w++ = 6, r += 2; else if (strstarts(r, "BEL")) *w++ = 7, r += 2; else if (strstarts(r, "BS")) *w++ = 8, r += 1; else if (strstarts(r, "HT")) *w++ = 9, r += 1; else if (strstarts(r, "LF")) *w++ = 10, r += 1; else if (strstarts(r, "VT")) *w++ = 11, r += 1; else if (strstarts(r, "FF")) *w++ = 12, r += 1; else if (strstarts(r, "CR")) *w++ = 13, r += 1; else if (strstarts(r, "SO")) *w++ = 14, r += 1; else if (strstarts(r, "SI")) *w++ = 15, r += 1; else if (strstarts(r, "DLE")) *w++ = 16, r += 2; else if (strstarts(r, "DC1")) *w++ = 17, r += 2; else if (strstarts(r, "DC2")) *w++ = 18, r += 2; else if (strstarts(r, "DC3")) *w++ = 19, r += 2; else if (strstarts(r, "DC4")) *w++ = 20, r += 2; else if (strstarts(r, "NAK")) *w++ = 21, r += 2; else if (strstarts(r, "SYN")) *w++ = 22, r += 2; else if (strstarts(r, "ETB")) *w++ = 23, r += 2; else if (strstarts(r, "CAN")) *w++ = 24, r += 2; else if (strstarts(r, "EM")) *w++ = 25, r += 1; else if (strstarts(r, "SUB")) *w++ = 26, r += 2; else if (strstarts(r, "ESC")) *w++ = 27, r += 2; else if (strstarts(r, "FS")) *w++ = 28, r += 1; else if (strstarts(r, "GS")) *w++ = 29, r += 1; else if (strstarts(r, "RS")) *w++ = 30, r += 1; else if (strstarts(r, "US")) *w++ = 31, r += 1; else if (strstarts(r, "SP")) *w++ = 32, r += 1; else if (strstarts(r, "DEL")) *w++ = 0x7F, r += 2; else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL; else if (mode & UNESCAPE_VERBATIM) r--; else if (mode & UNESCAPE_IGNORE) *w++ = '\\', r--; break; } return *w = 0, w; }