aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/slibc-human/unescape.c97
1 files changed, 45 insertions, 52 deletions
diff --git a/src/slibc-human/unescape.c b/src/slibc-human/unescape.c
index 941ea9b..90bf61c 100644
--- a/src/slibc-human/unescape.c
+++ b/src/slibc-human/unescape.c
@@ -52,28 +52,33 @@
char* unescape(char* str, enum unescape_mode mode)
{
#define RANGE(a, c, z) (((a) <= (c)) && ((c) <= (z)))
-#define CxC0(s, m) (*w++ = (char)((m) | (v >> (s))))
-#define Cx80(s) (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F)))
-#define PARSE_HEX(v, C) \
- do { \
- char c = (C); \
+#define CxC0(s, m) (*w++ = (char)((m) | (v >> (s))))
+#define Cx80(s) (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F)))
+#define NEXT_OCTAL(v) if (RANGE('0', r[1], '7')) v = (v << 3) | (r[1] - '0'), r++;
+#define PARSE_HEX(START, COND, v) \
+ do for (i = START; COND; i++) { \
+ char c = r[i]; \
if (RANGE('0', c, '9')) c -= '0'; \
else if (RANGE('a', c, 'f')) c -= 'a', c += 10; \
else if (RANGE('A', c, 'F')) c -= 'A', c += 10; \
else \
- goto fail_u; \
+ goto unrecognised; \
v = (v << 4) | (unsigned long int)c; \
if (v > 0x10FFFFUL) \
- goto fail_u; \
+ goto unrecognised; \
} while (0)
-#define NEXT_OCTAL(v) if (RANGE('0', r[1], '7')) v = (v << 3) | (r[1] - '0'), r++;
#define UNRECOGNISED(c, action) \
if ( mode & UNESCAPE_EINVAL) goto invalid; \
else if ((c) && (mode & UNESCAPE_VERBATIM)) action; \
else if ((c) && (mode & UNESCAPE_IGNORE)) *w++ = '\\', action
-#define ASCII() \
+#define ASCII() \
((v == 0) && (mode & UNESCAPE_MOD_UTF8)) ? (*w++ = (char)0xC0, *w++ = (char)0x80) : \
(v < 0x80) ? (*w++ = (char)v, 1) : 0
+#define UTF8() \
+ if (ASCII()); \
+ else if (v < (1L << 11)) CxC0(6, 0xC0), Cx80(0); \
+ else if (v < (1L << 16)) CxC0(12, 0xE0), Cx80(6), Cx80(0); \
+ else CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0);
int i, n;
unsigned long int v;
@@ -96,24 +101,27 @@ char* unescape(char* str, enum unescape_mode mode)
}
for (w = r = str; *r; r++)
- if (*r != '/')
- *w++ = *r;
- else
+ {
+ if (*r != '/')
+ {
+ *w++ = *r;
+ continue;
+ }
+
+ n = 0, v = 0;
switch (*++r)
{
- case '\0':
- UNRECOGNISED(1, (void)0);
- break;
-
- case '&':
- if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255;
- else UNRECOGNISED(*r, *w++ = '&');
- break;
-
#define X(e, c) case e: *w++ = c; break;
LIST_BIJECTIVE_ESCAPES
#undef X
- case 's': *w++ = ' '; break;
+
+ case '\0': UNRECOGNISED(1, (void)0); break;
+ case 's': *w++ = ' '; break;
+
+ case '&':
+ if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255;
+ else goto unrecognised;
+ break;
case '^':
if (RANGE('@', r[1], '_')) *w++ = *++r - '@';
@@ -125,32 +133,13 @@ char* unescape(char* str, enum unescape_mode mode)
}
break;
- case 'u':
- case 'U':
- case 'x':
- v = 0;
- if ((r[0] == 'u') && (r[1] == '{'))
- for (i = 2; r[i] != '}'; i++)
- PARSE_HEX(v, r[i]);
- else
- {
- switch (*r)
- {
- case 'U': n = 8; break;
- case 'u': n = 4; break;
- case 'x': n = 2; break;
- }
- for (i = 1; i <= n; i++)
- PARSE_HEX(v, r[i]);
- }
- goto done_u;
- fail_u:
- UNRECOGNISED(r--);
- done_u:
- if (ASCII());
- else if (v < (1L << 11)) CxC0(6, 0xC0), Cx80(0);
- else if (v < (1L << 16)) CxC0(12, 0xE0), Cx80(6), Cx80(0);
- else CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0);
+ case 'U': n += 4;
+ case 'u': n += 2;
+ case 'x': n += 2;
+ if (strstarts(r, "u{")) PARSE_HEX(2, r[i] != '}', v);
+ else PARSE_HEX(1, i <= n, v);
+ r += i - (r[i] != '}');
+ UTF8();
break;
default:
@@ -159,16 +148,20 @@ char* unescape(char* str, enum unescape_mode mode)
v = *r - '0';
NEXT_OCTAL(v);
NEXT_OCTAL(v);
- if (ASCII());
- else CxC0(6, 0xC0), Cx80(0);
+ UTF8();
}
else if (strchr("'\"$?\\/", *r)) *w++ = *r;
-#define X(e, i) else if (strstarts(r, e) ? (*w++ = i, r += sizeof(e) / sizeof(char) - 2, 1) : 0);
+#define X(e, i) else if (strstarts(r, e)) *w++ = i, r += sizeof(e) / sizeof(char) - 2;
LIST_ASCII_NAMES
#undef X
- else UNRECOGNISED(*r, r--);
+ else goto unrecognised;
break;
}
+
+ continue;
+ unrecognised:
+ UNRECOGNISED(*r, *w++ = *r);
+ }
return *w = 0, w;
invalid: