From 78036b7031f30ab47188759f4178ff0acbdecda7 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Tue, 17 Nov 2015 01:20:22 +0100 Subject: increase readability of unescape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/slibc-human/unescape.c | 97 +++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 52 deletions(-) (limited to 'src/slibc-human') diff --git a/src/slibc-human/unescape.c b/src/slibc-human/unescape.c index 941ea9b..90bf61c 100644 --- a/src/slibc-human/unescape.c +++ b/src/slibc-human/unescape.c @@ -52,28 +52,33 @@ char* unescape(char* str, enum unescape_mode mode) { #define RANGE(a, c, z) (((a) <= (c)) && ((c) <= (z))) -#define CxC0(s, m) (*w++ = (char)((m) | (v >> (s)))) -#define Cx80(s) (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F))) -#define PARSE_HEX(v, C) \ - do { \ - char c = (C); \ +#define CxC0(s, m) (*w++ = (char)((m) | (v >> (s)))) +#define Cx80(s) (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F))) +#define NEXT_OCTAL(v) if (RANGE('0', r[1], '7')) v = (v << 3) | (r[1] - '0'), r++; +#define PARSE_HEX(START, COND, v) \ + do for (i = START; COND; i++) { \ + char c = r[i]; \ if (RANGE('0', c, '9')) c -= '0'; \ else if (RANGE('a', c, 'f')) c -= 'a', c += 10; \ else if (RANGE('A', c, 'F')) c -= 'A', c += 10; \ else \ - goto fail_u; \ + goto unrecognised; \ v = (v << 4) | (unsigned long int)c; \ if (v > 0x10FFFFUL) \ - goto fail_u; \ + goto unrecognised; \ } while (0) -#define NEXT_OCTAL(v) if (RANGE('0', r[1], '7')) v = (v << 3) | (r[1] - '0'), r++; #define UNRECOGNISED(c, action) \ if ( mode & UNESCAPE_EINVAL) goto invalid; \ else if ((c) && (mode & UNESCAPE_VERBATIM)) action; \ else if ((c) && (mode & UNESCAPE_IGNORE)) *w++ = '\\', action -#define ASCII() \ +#define ASCII() \ ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) ? (*w++ = (char)0xC0, *w++ = (char)0x80) : \ (v < 0x80) ? (*w++ = (char)v, 1) : 0 +#define UTF8() \ + if (ASCII()); \ + else if (v < (1L << 11)) CxC0(6, 0xC0), Cx80(0); \ + else if (v < (1L << 16)) CxC0(12, 0xE0), Cx80(6), Cx80(0); \ + else CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0); int i, n; unsigned long int v; @@ -96,24 +101,27 @@ char* unescape(char* str, enum unescape_mode mode) } for (w = r = str; *r; r++) - if (*r != '/') - *w++ = *r; - else + { + if (*r != '/') + { + *w++ = *r; + continue; + } + + n = 0, v = 0; switch (*++r) { - case '\0': - UNRECOGNISED(1, (void)0); - break; - - case '&': - if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255; - else UNRECOGNISED(*r, *w++ = '&'); - break; - #define X(e, c) case e: *w++ = c; break; LIST_BIJECTIVE_ESCAPES #undef X - case 's': *w++ = ' '; break; + + case '\0': UNRECOGNISED(1, (void)0); break; + case 's': *w++ = ' '; break; + + case '&': + if (mode & UNESCAPE_AMPERSAND) *w++ = (char)255; + else goto unrecognised; + break; case '^': if (RANGE('@', r[1], '_')) *w++ = *++r - '@'; @@ -125,32 +133,13 @@ char* unescape(char* str, enum unescape_mode mode) } break; - case 'u': - case 'U': - case 'x': - v = 0; - if ((r[0] == 'u') && (r[1] == '{')) - for (i = 2; r[i] != '}'; i++) - PARSE_HEX(v, r[i]); - else - { - switch (*r) - { - case 'U': n = 8; break; - case 'u': n = 4; break; - case 'x': n = 2; break; - } - for (i = 1; i <= n; i++) - PARSE_HEX(v, r[i]); - } - goto done_u; - fail_u: - UNRECOGNISED(r--); - done_u: - if (ASCII()); - else if (v < (1L << 11)) CxC0(6, 0xC0), Cx80(0); - else if (v < (1L << 16)) CxC0(12, 0xE0), Cx80(6), Cx80(0); - else CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0); + case 'U': n += 4; + case 'u': n += 2; + case 'x': n += 2; + if (strstarts(r, "u{")) PARSE_HEX(2, r[i] != '}', v); + else PARSE_HEX(1, i <= n, v); + r += i - (r[i] != '}'); + UTF8(); break; default: @@ -159,16 +148,20 @@ char* unescape(char* str, enum unescape_mode mode) v = *r - '0'; NEXT_OCTAL(v); NEXT_OCTAL(v); - if (ASCII()); - else CxC0(6, 0xC0), Cx80(0); + UTF8(); } else if (strchr("'\"$?\\/", *r)) *w++ = *r; -#define X(e, i) else if (strstarts(r, e) ? (*w++ = i, r += sizeof(e) / sizeof(char) - 2, 1) : 0); +#define X(e, i) else if (strstarts(r, e)) *w++ = i, r += sizeof(e) / sizeof(char) - 2; LIST_ASCII_NAMES #undef X - else UNRECOGNISED(*r, r--); + else goto unrecognised; break; } + + continue; + unrecognised: + UNRECOGNISED(*r, *w++ = *r); + } return *w = 0, w; invalid: -- cgit v1.2.3-70-g09d2