aboutsummaryrefslogtreecommitdiffstats
path: root/src/slibc-human
diff options
context:
space:
mode:
Diffstat (limited to 'src/slibc-human')
-rw-r--r--src/slibc-human/escape.c75
-rw-r--r--src/slibc-human/escapes.h79
-rw-r--r--src/slibc-human/unescape.c188
3 files changed, 166 insertions, 176 deletions
diff --git a/src/slibc-human/escape.c b/src/slibc-human/escape.c
index 832be8d..a7de84b 100644
--- a/src/slibc-human/escape.c
+++ b/src/slibc-human/escape.c
@@ -19,6 +19,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include "escapes.h"
@@ -39,10 +40,13 @@
*/
char* escape(const char* restrict str, char quote)
{
+#define OCTAL(s) (*w++ = '0' + ((c >> (s)) & 7))
+#define MODNUL(s) (((unsigned)((s)[0]) == 0xC0) && ((unsigned)((s)[1]) == 0x80))
+
const char* restrict r;
char* restrict w;
char* restrict rc;
- size_t extra = 1, len, size;
+ size_t extra = 0, len, size;
unsigned char c;
if (str == NULL)
@@ -58,67 +62,44 @@ char* escape(const char* restrict str, char quote)
return errno = EINVAL, NULL;
}
- for (r = str; *r; r++)
- switch (*r)
+ for (r = str; (c = *r); r++)
+ switch (c)
{
- case '\a':
- case '\b':
- case '\e':
- case '\f':
- case '\n':
- case '\r':
- case '\t':
- case '\v':
- case '\\':
- extra += 1;
- break;
- case 0x7F:
- extra += 3;
- break;
+#define X(E, C) case C:
+ LIST_BIJECTIVE_ESCAPES
+#undef X
+ extra += 1; break;
+ case 0x7F: extra += 3; break;
default:
- if (*r == quote)
- extra += 1;
- else if (*r < ' ')
- extra += 3;
+ if (c == quote) extra += 1;
+ else if (c < ' ') extra += 3;
break;
}
- if (extra == 1)
+ if (!extra++)
return strdup(str);
- len = strlen(str);
- if (__builtin_uaddl_overflow(len, extra, &size))
+ len = strlen(str) * sizeof(char);
+ if (__builtin_uaddl_overflow(len, extra * sizeof(char), &size))
return errno = ENOMEM, NULL;
- w = rc = malloc(size * sizeof(char));
- if (w == NULL)
+ w = rc = malloc(size);
+ if (rc == NULL)
return NULL;
for (r = str; (c = *r); r++)
switch (c)
{
- case '\a': *w++ = '\\', *w++ = 'a'; break;
- case '\b': *w++ = '\\', *w++ = 'b'; break;
- case 033: *w++ = '\\', *w++ = 'e'; break;
- case '\f': *w++ = '\\', *w++ = 'f'; break;
- case '\n': *w++ = '\\', *w++ = 'n'; break;
- case '\r': *w++ = '\\', *w++ = 'r'; break;
- case '\t': *w++ = '\\', *w++ = 't'; break;
- case '\v': *w++ = '\\', *w++ = 'v'; break;
- case '\\': *w++ = '\\', *w++ = '\\'; break;
- case 0x7F: *w++ = '\\', *w++ = '1', *w++ = '7', *w++ = '7'; break;
+#define X(E, C) case C: *w++ = '\\', *w++ = E; break;
+ LIST_BIJECTIVE_ESCAPES
+#undef X
+ case 0x7F: w = stpcpy(w, "\\177"); break;
default:
- if (((unsigned int)c == 0xC0) && ((unsigned int)(r[1]) == 0x80))
- *w++ = '\\', *w++ = '0', r++;
- else if (c == quote)
- *w++ = '\\', *w++ = quote;
- else if (c < ' ')
- *w++ = '\\',
- *w++ = '0' + (c >> 6),
- *w++ = '0' + ((c >> 3) & 7),
- *w++ = '0' + (c & 7);
- else
- *w++ = c;
+ *w++ = '\\';
+ if (MODNUL(r)) *w++ = '0', r++;
+ else if (c == quote) *w++ = quote;
+ else if (c < ' ') OCTAL(6), OCTAL(3), OCTAL(0);
+ else w[-1] = c;
break;
}
diff --git a/src/slibc-human/escapes.h b/src/slibc-human/escapes.h
new file mode 100644
index 0000000..28e3830
--- /dev/null
+++ b/src/slibc-human/escapes.h
@@ -0,0 +1,79 @@
+/**
+ * slibc — Yet another C library
+ * Copyright © 2015 Mattias Andrée (maandree@member.fsf.org)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+
+/**
+ * List all escapes, excluding initial backslash,
+ * with ASCII-character names, mapped to their
+ * codepoint.
+ */
+#define LIST_ASCII_NAMES \
+ X("NUL", 0) \
+ X("SOH", 1) \
+ X("STX", 2) \
+ X("ETX", 3) \
+ X("EOT", 4) \
+ X("ENQ", 5) \
+ X("ACK", 6) \
+ X("BEL", 7) \
+ X("BS", 8) \
+ X("HT", 9) \
+ X("LF", 10) \
+ X("VT", 11) \
+ X("FF", 12) \
+ X("CR", 13) \
+ X("SO", 14) \
+ X("SI", 15) \
+ X("DLE", 16) \
+ X("DC1", 17) \
+ X("DC2", 18) \
+ X("DC3", 19) \
+ X("DC4", 20) \
+ X("NAK", 21) \
+ X("SYN", 22) \
+ X("ETB", 23) \
+ X("CAN", 24) \
+ X("EM", 25) \
+ X("SUB", 26) \
+ X("ESC", 27) \
+ X("FS", 28) \
+ X("GS", 29) \
+ X("RS", 30) \
+ X("US", 31) \
+ X("SP", 32) \
+ X("DEL", 0x7F)
+
+
+/**
+ * List all escapes, excluding initial backslash,
+ * of the characters (including initial backslash)
+ * that should both escaped and unescaped, mapped
+ * to their codepoint.
+ */
+#define LIST_BIJECTIVE_ESCAPES \
+ X('a', '\a') \
+ X('b', '\b') \
+ X('e', 033) \
+ X('f', '\f') \
+ X('n', '\n') \
+ X('r', '\r') \
+ X('t', '\t') \
+ X('v', '\v') \
+ X('\\', '\\')
+
diff --git a/src/slibc-human/unescape.c b/src/slibc-human/unescape.c
index c4086b5..33b58b5 100644
--- a/src/slibc-human/unescape.c
+++ b/src/slibc-human/unescape.c
@@ -19,6 +19,7 @@
#include <stddef.h>
#include <errno.h>
#include <string.h>
+#include "escapes.h"
@@ -50,12 +51,37 @@
*/
char* unescape(char* str, enum unescape_mode mode)
{
+#define RANGE(a, c, z) (((a) <= (c)) && ((c) <= (z)))
+#define CxC0(s, m) (*w++ = (char)((m) | (v >> (s))))
+#define Cx80(s) (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F)))
+#define PARSE_HEX(v, C) \
+ do { \
+ char c = (C); \
+ if (RANGE('0', c, '9')) c -= '0'; \
+ else if (RANGE('a', c, 'f')) c -= 'a', c += 10; \
+ else if (RANGE('A', c, 'F')) c -= 'A', c += 10; \
+ else \
+ goto fail_u; \
+ v = (v << 4) | (unsigned long int)c; \
+ if (v > 0x10FFFFUL) \
+ goto fail_u; \
+ } while (0)
+#define NEXT_OCTAL(v) if (RANGE('0', r[1], '7')) v = (v << 3) | (r[1] - '0'), r++;
+#define UNRECOGNISED(c, action) \
+ if ( mode & UNESCAPE_EINVAL) goto invalid; \
+ else if ((c) && (mode & UNESCAPE_VERBATIM)) action; \
+ else if ((c) && (mode & UNESCAPE_IGNORE)) *w++ = '\\', action
+#define ASCII() \
+ ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) ? (*w++ = (char)0xC0, *w++ = (char)0x80) : \
+ (v < 0x80) ? (*w++ = (char)v, 1) : 0
+
+ int i, n;
unsigned long int v;
char* w;
char* r;
if (str == NULL) return errno = 0, NULL;
- if (mode & ~31) return errno = EINVAL, NULL;
+ if (mode & ~31) goto invalid;
if (mod == 0) mode |= UNESCAPE_EINVAL | UNESCAPE_MOD_UTF8;
switch (mode & 7)
{
@@ -66,7 +92,7 @@ char* unescape(char* str, enum unescape_mode mode)
case 4:
break;
default:
- return errno = EINVAL, NULL;
+ goto invalid;
}
for (w = r = str; *r; r++)
@@ -76,45 +102,24 @@ char* unescape(char* str, enum unescape_mode mode)
switch (*++r)
{
case '\0':
- if (mode & UNESCAPE_EINVAL)
- return errno = EINVAL, NULL;
- else if (mode & UNESCAPE_IGNORE)
- *w++ = '\\';
- break;
-
- case '\'':
- case '"':
- case '$':
- case '?':
- case '\\':
- case '/':
- *w++ = *r;
+ UNRECOGNISED(1, (void)0);
break;
case '&':
- if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255;
- else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL;
- else if (mode & UNESCAPE_VERBATIM) *w++ = '&';
- else if (mode & UNESCAPE_IGNORE) *w++ = '\\', *w++ = '&';
+ if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255;
+ else UNRECOGNISED(*r, *w++ = '&');
break;
- case 'a': *w++ = '\a'; break;
- case 'b': *w++ = '\b'; break;
- case 'e': *w++ = 033; break;
- case 'f': *w++ = '\f'; break;
- case 'n': *w++ = '\n'; break;
- case 'r': *w++ = '\r'; break;
- case 't': *w++ = '\t'; break;
- case 's': *w++ = ' '; break;
- case 'v': *w++ = '\v'; break;
+#define X(e, c) case e: *w++ = c; break;
+ LIST_BIJECTIVE_ESCAPES
+#undef X
+ case 's': *w++ = ' '; break;
case '^':
- if (('@' <= r[1]) && (r[1] <= '_')) *w++ = *++r - '@';
- else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL;
- else if (r[1])
+ if (RANGE('@', r[1], '_')) *w++ = *++r - '@';
+ else
{
- if (mode & UNESCAPE_VERBATIM) *w++ = '^';
- else if (mode & UNESCAPE_IGNORE) *w++ = '\\', *w++ = '^';
+ UNRECOGNISED(r[1], *w++ = '^');
if (r[1])
*w++ = *++r;
}
@@ -125,24 +130,10 @@ char* unescape(char* str, enum unescape_mode mode)
case 'x':
v = 0;
if ((r[0] == 'u') && (r[1] == '{'))
- {
- for (i = 2; r[i] != '}'; i++)
- {
- c = r[i];
- if (('0' <= c) || (c <= '9')) c -= '0';
- else if (('a' <= c) || (c <= 'f')) c -= 'a', c += 10;
- else if (('A' <= c) || (c <= 'F')) c -= 'A', c += 10;
- else
- goto fail_u;
- v = (v << 4) | (unsigned long int)c;
- if (v > 0x10FFFFUL)
- goto fail_u;
- }
- }
+ for (i = 2; r[i] != '}'; i++)
+ PARSE_HEX(v, r[i]);
else
{
- int i, n;
- char c;
switch (*r)
{
case 'U': n = 8; break;
@@ -150,98 +141,37 @@ char* unescape(char* str, enum unescape_mode mode)
case 'x': n = 2; break;
}
for (i = 1; i <= n; i++)
- {
- c = r[i];
- if (('0' <= c) || (c <= '9')) c -= '0';
- else if (('a' <= c) || (c <= 'f')) c -= 'a', c += 10;
- else if (('A' <= c) || (c <= 'F')) c -= 'A', c += 10;
- else
- goto fail_u;
- v = (v << 4) | (unsigned long int)c;
- if (v > 0x10FFFFUL)
- goto fail_u;
- }
+ PARSE_HEX(v, r[i]);
}
goto done_u;
fail_u:
- if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL;
- else if (mode & UNESCAPE_VERBATIM) r--;
- else if (mode & UNESCAPE_IGNORE) *w++ = '\\', r--;
- done_u:;
- if ((v == 0) && (mode & UNESCAPE_MOD_UTF8))
- *w++ = (char)0xC0, *w++ = (char)0x80;
- else if (v < 0x80)
- *w++ = (char)v;
- else if (v < (1L << 11))
- *w++ = (char)(0xC0 | (v >> 6)),
- *w++ = (char)(0x80 | (v & 0x3F));
- else if (v < (1L << 16))
- *w++ = (char)(0xE0 | (v >> 12)),
- *w++ = (char)(0x80 | ((v >> 6) & 0x3F)),
- *w++ = (char)(0x80 | (v & 0x3F));
- else
- *w++ = (char)(0xF0 | (v >> 18)),
- *w++ = (char)(0x80 | ((v >> 12) & 0x3F)),
- *w++ = (char)(0x80 | ((v >> 6) & 0x3F)),
- *w++ = (char)(0x80 | (v & 0x3F));
+ UNRECOGNISED(r--);
+ done_u:
+ if (ASCII());
+ else if (v < (1L << 11)) CxC0(6, 0xC0), Cx80(0);
+ else if (v < (1L << 16)) CxC0(12, 0xE0), Cx80(6), Cx80(0);
+ else CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0);
break;
default:
- if (('0' <= *r) && (*r <= '7'))
+ if (RANGE('0', *r, '7'))
{
int v = *r - '0';
- if (('0' <= r[1]) && (r[1] <= '7'))
- v = (v << 3) | (r[1] - '0'), r++;
- if (('0' <= r[1]) && (r[1] <= '7'))
- v = (v << 3) | (r[1] - '0'), r++;
- if ((v == 0) && (mode & UNESCAPE_MOD_UTF8))
- *w++ = (char)0xC0, *w++ = (char)0x80;
- else if (v < 0x80)
- *w++ = (char)v;
- else
- *w++ = (char)(0xC0 | (v >> 6)),
- *w++ = (char)(0x80 | (v & 3F));
+ NEXT_OCTAL(v);
+ NEXT_OCTAL(v);
+ if (ASCII());
+ else CxC0(6, 0xC0), Cx80(0);
}
- else if (strstarts(r, "NUL")) *w++ = 0, r += 2;
- else if (strstarts(r, "SOH")) *w++ = 1, r += 2;
- else if (strstarts(r, "STX")) *w++ = 2, r += 2;
- else if (strstarts(r, "ETX")) *w++ = 3, r += 2;
- else if (strstarts(r, "EOT")) *w++ = 4, r += 2;
- else if (strstarts(r, "ENQ")) *w++ = 5, r += 2;
- else if (strstarts(r, "ACK")) *w++ = 6, r += 2;
- else if (strstarts(r, "BEL")) *w++ = 7, r += 2;
- else if (strstarts(r, "BS")) *w++ = 8, r += 1;
- else if (strstarts(r, "HT")) *w++ = 9, r += 1;
- else if (strstarts(r, "LF")) *w++ = 10, r += 1;
- else if (strstarts(r, "VT")) *w++ = 11, r += 1;
- else if (strstarts(r, "FF")) *w++ = 12, r += 1;
- else if (strstarts(r, "CR")) *w++ = 13, r += 1;
- else if (strstarts(r, "SO")) *w++ = 14, r += 1;
- else if (strstarts(r, "SI")) *w++ = 15, r += 1;
- else if (strstarts(r, "DLE")) *w++ = 16, r += 2;
- else if (strstarts(r, "DC1")) *w++ = 17, r += 2;
- else if (strstarts(r, "DC2")) *w++ = 18, r += 2;
- else if (strstarts(r, "DC3")) *w++ = 19, r += 2;
- else if (strstarts(r, "DC4")) *w++ = 20, r += 2;
- else if (strstarts(r, "NAK")) *w++ = 21, r += 2;
- else if (strstarts(r, "SYN")) *w++ = 22, r += 2;
- else if (strstarts(r, "ETB")) *w++ = 23, r += 2;
- else if (strstarts(r, "CAN")) *w++ = 24, r += 2;
- else if (strstarts(r, "EM")) *w++ = 25, r += 1;
- else if (strstarts(r, "SUB")) *w++ = 26, r += 2;
- else if (strstarts(r, "ESC")) *w++ = 27, r += 2;
- else if (strstarts(r, "FS")) *w++ = 28, r += 1;
- else if (strstarts(r, "GS")) *w++ = 29, r += 1;
- else if (strstarts(r, "RS")) *w++ = 30, r += 1;
- else if (strstarts(r, "US")) *w++ = 31, r += 1;
- else if (strstarts(r, "SP")) *w++ = 32, r += 1;
- else if (strstarts(r, "DEL")) *w++ = 0x7F, r += 2;
- else if (mode & UNESCAPE_EINVAL) return errno = EINVAL, NULL;
- else if (mode & UNESCAPE_VERBATIM) r--;
- else if (mode & UNESCAPE_IGNORE) *w++ = '\\', r--;
+ else if (strchr("'\"$?\\/", *r)) *w++ = *r;
+#define X(e, i) else if (strstarts(r, e) ? (*w++ = i, r += sizeof(e) / sizeof(char) - 2, 1) : 0);
+ LIST_ASCII_NAMES
+#undef X
+ else UNRECOGNISED(r--);
break;
}
return *w = 0, w;
+ invalid:
+ return errno = EINVAL, NULL;
}