diff options
| author | Mattias Andrée <maandree@operamail.com> | 2015-11-16 22:49:28 +0100 | 
|---|---|---|
| committer | Mattias Andrée <maandree@operamail.com> | 2015-11-16 22:49:28 +0100 | 
| commit | 52e31877f55fd0eba772262722fc627f8f1cddd8 (patch) | |
| tree | 8f8e1bfdc55bf177d57bcace2b55bdd441a4771e /src/slibc-human | |
| parent | m (diff) | |
| download | slibc-52e31877f55fd0eba772262722fc627f8f1cddd8.tar.gz slibc-52e31877f55fd0eba772262722fc627f8f1cddd8.tar.bz2 slibc-52e31877f55fd0eba772262722fc627f8f1cddd8.tar.xz  | |
increase readability
Signed-off-by: Mattias Andrée <maandree@operamail.com>
Diffstat (limited to 'src/slibc-human')
| -rw-r--r-- | src/slibc-human/escape.c | 75 | ||||
| -rw-r--r-- | src/slibc-human/escapes.h | 79 | ||||
| -rw-r--r-- | src/slibc-human/unescape.c | 188 | 
3 files changed, 166 insertions, 176 deletions
diff --git a/src/slibc-human/escape.c b/src/slibc-human/escape.c index 832be8d..a7de84b 100644 --- a/src/slibc-human/escape.c +++ b/src/slibc-human/escape.c @@ -19,6 +19,7 @@  #include <stdlib.h>  #include <string.h>  #include <errno.h> +#include "escapes.h" @@ -39,10 +40,13 @@   */  char* escape(const char* restrict str, char quote)  { +#define OCTAL(s)   (*w++ = '0' + ((c >> (s)) & 7)) +#define MODNUL(s)  (((unsigned)((s)[0]) == 0xC0) && ((unsigned)((s)[1]) == 0x80)) +      const char* restrict r;    char* restrict w;    char* restrict rc; -  size_t extra = 1, len, size; +  size_t extra = 0, len, size;    unsigned char c;    if (str == NULL) @@ -58,67 +62,44 @@ char* escape(const char* restrict str, char quote)        return errno = EINVAL, NULL;      } -  for (r = str; *r; r++) -    switch (*r) +  for (r = str; (c = *r); r++) +    switch (c)        { -      case '\a': -      case '\b': -      case '\e': -      case '\f': -      case '\n': -      case '\r': -      case '\t': -      case '\v': -      case '\\': -	extra += 1; -	break; -      case 0x7F: -	extra += 3; -	break; +#define X(E, C)  case C: +      LIST_BIJECTIVE_ESCAPES +#undef X +	                      extra += 1;  break; +      case 0x7F:              extra += 3;  break;        default: -	if (*r == quote) -	  extra += 1; -	else if (*r < ' ') -	  extra += 3; +	if      (c == quote)  extra += 1; +	else if (c < ' ')     extra += 3;  	break;        } -  if (extra == 1) +  if (!extra++)      return strdup(str); -  len = strlen(str); -  if (__builtin_uaddl_overflow(len, extra, &size)) +  len = strlen(str) * sizeof(char); +  if (__builtin_uaddl_overflow(len, extra * sizeof(char), &size))      return errno = ENOMEM, NULL; -  w = rc = malloc(size * sizeof(char)); -  if (w == NULL) +  w = rc = malloc(size); +  if (rc == NULL)      return NULL;    for (r = str; (c = *r); r++)      switch (c)        { -      case '\a':  *w++ = '\\', *w++ = 'a';   break; -      case '\b':  *w++ = '\\', *w++ = 'b';   break; -      case 033:   *w++ = '\\', *w++ = 'e';   break; -      case '\f':  *w++ = '\\', *w++ = 'f';   break; -      case '\n':  *w++ = '\\', *w++ = 'n';   break; -      case '\r':  *w++ = '\\', *w++ = 'r';   break; -      case '\t':  *w++ = '\\', *w++ = 't';   break; -      case '\v':  *w++ = '\\', *w++ = 'v';   break; -      case '\\':  *w++ = '\\', *w++ = '\\';  break; -      case 0x7F:  *w++ = '\\', *w++ = '1', *w++ = '7', *w++ = '7';  break; +#define X(E, C)  case C:  *w++ = '\\', *w++ = E;  break; +      LIST_BIJECTIVE_ESCAPES +#undef X +      case 0x7F:  w = stpcpy(w, "\\177");  break;        default: -	if (((unsigned int)c == 0xC0) && ((unsigned int)(r[1]) == 0x80)) -	  *w++ = '\\', *w++ = '0', r++; -	else if (c == quote) -	  *w++ = '\\', *w++ = quote; -	else if (c < ' ') -	  *w++ = '\\', -	    *w++ = '0' + (c >> 6), -	    *w++ = '0' + ((c >> 3) & 7), -	    *w++ = '0' + (c & 7); -	else -	  *w++ = c; +	*w++ = '\\'; +	if      (MODNUL(r))   *w++ = '0', r++; +	else if (c == quote)  *w++ = quote; +	else if (c < ' ')     OCTAL(6), OCTAL(3), OCTAL(0); +	else                  w[-1] = c;  	break;        } diff --git a/src/slibc-human/escapes.h b/src/slibc-human/escapes.h new file mode 100644 index 0000000..28e3830 --- /dev/null +++ b/src/slibc-human/escapes.h @@ -0,0 +1,79 @@ +/** + * slibc — Yet another C library + * Copyright © 2015  Mattias Andrée (maandree@member.fsf.org) + *  + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + *  + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + *  + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + + + +/** + * List all escapes, excluding initial backslash, + * with ASCII-character names, mapped to their + * codepoint. + */ +#define LIST_ASCII_NAMES  \ +  X("NUL",  0)  \ +  X("SOH",  1)  \ +  X("STX",  2)  \ +  X("ETX",  3)  \ +  X("EOT",  4)  \ +  X("ENQ",  5)  \ +  X("ACK",  6)  \ +  X("BEL",  7)  \ +  X("BS",   8)  \ +  X("HT",   9)  \ +  X("LF",  10)  \ +  X("VT",  11)  \ +  X("FF",  12)  \ +  X("CR",  13)  \ +  X("SO",  14)  \ +  X("SI",  15)  \ +  X("DLE", 16)  \ +  X("DC1", 17)  \ +  X("DC2", 18)  \ +  X("DC3", 19)  \ +  X("DC4", 20)  \ +  X("NAK", 21)  \ +  X("SYN", 22)  \ +  X("ETB", 23)  \ +  X("CAN", 24)  \ +  X("EM",  25)  \ +  X("SUB", 26)  \ +  X("ESC", 27)  \ +  X("FS",  28)  \ +  X("GS",  29)  \ +  X("RS",  30)  \ +  X("US",  31)  \ +  X("SP",  32)  \ +  X("DEL", 0x7F) + + +/** + * List all escapes, excluding initial backslash, + * of the characters (including initial backslash) + * that should both escaped and unescaped, mapped + * to their codepoint. + */ +#define LIST_BIJECTIVE_ESCAPES  \ +  X('a', '\a')  \ +  X('b', '\b')  \ +  X('e', 033)   \ +  X('f', '\f')  \ +  X('n', '\n')  \ +  X('r', '\r')  \ +  X('t', '\t')  \ +  X('v', '\v')  \ +  X('\\', '\\') + diff --git a/src/slibc-human/unescape.c b/src/slibc-human/unescape.c index c4086b5..33b58b5 100644 --- a/src/slibc-human/unescape.c +++ b/src/slibc-human/unescape.c @@ -19,6 +19,7 @@  #include <stddef.h>  #include <errno.h>  #include <string.h> +#include "escapes.h" @@ -50,12 +51,37 @@   */  char* unescape(char* str, enum unescape_mode mode)  { +#define RANGE(a, c, z)  (((a) <= (c)) && ((c) <= (z))) +#define CxC0(s, m)  (*w++ = (char)((m) | (v >> (s)))) +#define Cx80(s)     (*w++ = (char)(0x80 | ((v >> (s)) & 0x3F))) +#define PARSE_HEX(v, C)				      \ +  do {						      \ +    char c = (C);				      \ +    if      (RANGE('0', c, '9'))  c -= '0';	      \ +    else if (RANGE('a', c, 'f'))  c -= 'a', c += 10;  \ +    else if (RANGE('A', c, 'F'))  c -= 'A', c += 10;  \ +    else					      \ +      goto fail_u;				      \ +    v = (v << 4) | (unsigned long int)c;	      \ +    if (v > 0x10FFFFUL)				      \ +      goto fail_u;				      \ +  } while (0) +#define NEXT_OCTAL(v)  if (RANGE('0', r[1], '7'))  v = (v << 3) | (r[1] - '0'), r++; +#define UNRECOGNISED(c, action)                                     \ +  if      (        mode & UNESCAPE_EINVAL)     goto invalid;        \ +  else if ((c) && (mode & UNESCAPE_VERBATIM))  action;              \ +  else if ((c) && (mode & UNESCAPE_IGNORE))    *w++ = '\\', action +#define ASCII()  \ +  ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) ? (*w++ = (char)0xC0, *w++ = (char)0x80) : \ +  (v < 0x80)                               ? (*w++ = (char)v, 1)                    : 0 +   +  int i, n;    unsigned long int v;    char* w;    char* r;    if (str == NULL)  return errno = 0, NULL; -  if (mode & ~31)   return errno = EINVAL, NULL; +  if (mode & ~31)   goto invalid;    if (mod == 0)     mode |= UNESCAPE_EINVAL | UNESCAPE_MOD_UTF8;    switch (mode & 7)      { @@ -66,7 +92,7 @@ char* unescape(char* str, enum unescape_mode mode)      case 4:        break;      default: -      return errno = EINVAL, NULL; +      goto invalid;      }    for (w = r = str; *r; r++) @@ -76,45 +102,24 @@ char* unescape(char* str, enum unescape_mode mode)        switch (*++r)  	{  	case '\0': -	  if (mode & UNESCAPE_EINVAL) -	    return errno = EINVAL, NULL; -	  else if (mode & UNESCAPE_IGNORE) -	    *w++ = '\\'; -	  break; -	   -	case '\'': -	case '"': -	case '$': -	case '?': -	case '\\': -	case '/': -	  *w++ = *r; +	  UNRECOGNISED(1, (void)0);  	  break;  	case '&': -	  if      (mode & UNESCAPE_AMPERSAND)  *w++ = (char)255; -	  else if (mode & UNESCAPE_EINVAL)     return errno = EINVAL, NULL; -	  else if (mode & UNESCAPE_VERBATIM)   *w++ = '&'; -	  else if (mode & UNESCAPE_IGNORE)     *w++ = '\\', *w++ = '&'; +	  if   (mode & UNESCAPE_AMPERSAND)  *w++ = (char)255; +	  else UNRECOGNISED(*r, *w++ = '&');  	  break; -	case 'a':  *w++ = '\a';  break; -	case 'b':  *w++ = '\b';  break; -	case 'e':  *w++ = 033;   break; -	case 'f':  *w++ = '\f';  break; -	case 'n':  *w++ = '\n';  break; -	case 'r':  *w++ = '\r';  break; -	case 't':  *w++ = '\t';  break; -	case 's':  *w++ = ' ';   break; -	case 'v':  *w++ = '\v';  break; +#define X(e, c)  case e:  *w++ = c;  break; +	LIST_BIJECTIVE_ESCAPES +#undef X +	case 's':  *w++ = ' ';  break;  	case '^': -	  if (('@' <= r[1]) && (r[1] <= '_'))      *w++ = *++r - '@'; -	  else if (mode & UNESCAPE_EINVAL)         return errno = EINVAL, NULL; -	  else if (r[1]) +	  if (RANGE('@', r[1], '_'))  *w++ = *++r - '@'; +	  else  	    { -	      if      (mode & UNESCAPE_VERBATIM)   *w++ = '^'; -	      else if (mode & UNESCAPE_IGNORE)     *w++ = '\\', *w++ = '^'; +	      UNRECOGNISED(r[1], *w++ = '^');  	      if (r[1])  		*w++ = *++r;  	    } @@ -125,24 +130,10 @@ char* unescape(char* str, enum unescape_mode mode)  	case 'x':  	  v = 0;  	  if ((r[0] == 'u') && (r[1] == '{')) -	    { -	      for (i = 2; r[i] != '}'; i++) -		{ -		  c = r[i]; -		  if      (('0' <= c) || (c <= '9'))  c -= '0'; -		  else if (('a' <= c) || (c <= 'f'))  c -= 'a', c += 10; -		  else if (('A' <= c) || (c <= 'F'))  c -= 'A', c += 10; -		  else -		    goto fail_u; -		  v = (v << 4) | (unsigned long int)c; -		  if (v > 0x10FFFFUL) -		    goto fail_u; -		} -	    } +	    for (i = 2; r[i] != '}'; i++) +	      PARSE_HEX(v, r[i]);  	  else  	    { -	      int i, n; -	      char c;  	      switch (*r)  		{  		case 'U':  n = 8;  break; @@ -150,98 +141,37 @@ char* unescape(char* str, enum unescape_mode mode)  		case 'x':  n = 2;  break;  		}  	      for (i = 1; i <= n; i++) -		{ -		  c = r[i]; -		  if      (('0' <= c) || (c <= '9'))  c -= '0'; -		  else if (('a' <= c) || (c <= 'f'))  c -= 'a', c += 10; -		  else if (('A' <= c) || (c <= 'F'))  c -= 'A', c += 10; -		  else -		    goto fail_u; -		  v = (v << 4) | (unsigned long int)c; -		  if (v > 0x10FFFFUL) -		    goto fail_u; -		} +		PARSE_HEX(v, r[i]);  	    }  	  goto done_u;  	fail_u: -	  if      (mode & UNESCAPE_EINVAL)     return errno = EINVAL, NULL; -	  else if (mode & UNESCAPE_VERBATIM)   r--; -	  else if (mode & UNESCAPE_IGNORE)     *w++ = '\\', r--; -	done_u:; -	  if ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) -	    *w++ = (char)0xC0, *w++ = (char)0x80; -	  else if (v < 0x80) -	    *w++ = (char)v; -	  else if (v < (1L << 11)) -	    *w++ = (char)(0xC0 | (v >> 6)), -	      *w++ = (char)(0x80 | (v & 0x3F)); -	  else if (v < (1L << 16)) -	    *w++ = (char)(0xE0 | (v >> 12)), -	      *w++ = (char)(0x80 | ((v >> 6) & 0x3F)), -	      *w++ = (char)(0x80 | (v & 0x3F)); -	  else -	    *w++ = (char)(0xF0 | (v >> 18)), -	      *w++ = (char)(0x80 | ((v >> 12) & 0x3F)), -	      *w++ = (char)(0x80 | ((v >> 6) & 0x3F)), -	      *w++ = (char)(0x80 | (v & 0x3F)); +	  UNRECOGNISED(r--); +	done_u: +	  if (ASCII()); +	  else if (v < (1L << 11))  CxC0(6,  0xC0), Cx80(0); +	  else if (v < (1L << 16))  CxC0(12, 0xE0), Cx80(6),  Cx80(0); +	  else                      CxC0(18, 0xF0), Cx80(12), Cx80(0), Cx80(0);  	  break;  	default: -	  if (('0' <= *r) && (*r <= '7')) +	  if (RANGE('0', *r, '7'))  	    {  	      int v = *r - '0'; -	      if (('0' <= r[1]) && (r[1] <= '7')) -		v = (v << 3) | (r[1] - '0'), r++; -	      if (('0' <= r[1]) && (r[1] <= '7')) -		v = (v << 3) | (r[1] - '0'), r++; -	      if ((v == 0) && (mode & UNESCAPE_MOD_UTF8)) -		*w++ = (char)0xC0, *w++ = (char)0x80; -	      else if (v < 0x80) -		*w++ = (char)v; -	      else -		*w++ = (char)(0xC0 | (v >> 6)), -		  *w++ = (char)(0x80 | (v & 3F)); +	      NEXT_OCTAL(v); +	      NEXT_OCTAL(v); +	      if   (ASCII()); +	      else CxC0(6, 0xC0), Cx80(0);  	    } -	  else if (strstarts(r, "NUL"))        *w++ =  0, r += 2; -	  else if (strstarts(r, "SOH"))        *w++ =  1, r += 2; -	  else if (strstarts(r, "STX"))        *w++ =  2, r += 2; -	  else if (strstarts(r, "ETX"))        *w++ =  3, r += 2; -	  else if (strstarts(r, "EOT"))        *w++ =  4, r += 2; -	  else if (strstarts(r, "ENQ"))        *w++ =  5, r += 2; -	  else if (strstarts(r, "ACK"))        *w++ =  6, r += 2; -	  else if (strstarts(r, "BEL"))        *w++ =  7, r += 2; -	  else if (strstarts(r, "BS"))         *w++ =  8, r += 1; -	  else if (strstarts(r, "HT"))         *w++ =  9, r += 1; -	  else if (strstarts(r, "LF"))         *w++ = 10, r += 1; -	  else if (strstarts(r, "VT"))         *w++ = 11, r += 1; -	  else if (strstarts(r, "FF"))         *w++ = 12, r += 1; -	  else if (strstarts(r, "CR"))         *w++ = 13, r += 1; -	  else if (strstarts(r, "SO"))         *w++ = 14, r += 1; -	  else if (strstarts(r, "SI"))         *w++ = 15, r += 1; -	  else if (strstarts(r, "DLE"))        *w++ = 16, r += 2; -	  else if (strstarts(r, "DC1"))        *w++ = 17, r += 2; -	  else if (strstarts(r, "DC2"))        *w++ = 18, r += 2; -	  else if (strstarts(r, "DC3"))        *w++ = 19, r += 2; -	  else if (strstarts(r, "DC4"))        *w++ = 20, r += 2; -	  else if (strstarts(r, "NAK"))        *w++ = 21, r += 2; -	  else if (strstarts(r, "SYN"))        *w++ = 22, r += 2; -	  else if (strstarts(r, "ETB"))        *w++ = 23, r += 2; -	  else if (strstarts(r, "CAN"))        *w++ = 24, r += 2; -	  else if (strstarts(r, "EM"))         *w++ = 25, r += 1; -	  else if (strstarts(r, "SUB"))        *w++ = 26, r += 2; -	  else if (strstarts(r, "ESC"))        *w++ = 27, r += 2; -	  else if (strstarts(r, "FS"))         *w++ = 28, r += 1; -	  else if (strstarts(r, "GS"))         *w++ = 29, r += 1; -	  else if (strstarts(r, "RS"))         *w++ = 30, r += 1; -	  else if (strstarts(r, "US"))         *w++ = 31, r += 1; -	  else if (strstarts(r, "SP"))         *w++ = 32, r += 1; -	  else if (strstarts(r, "DEL"))        *w++ = 0x7F, r += 2; -	  else if (mode & UNESCAPE_EINVAL)     return errno = EINVAL, NULL; -	  else if (mode & UNESCAPE_VERBATIM)   r--; -	  else if (mode & UNESCAPE_IGNORE)     *w++ = '\\', r--; +	  else if (strchr("'\"$?\\/", *r))  *w++ = *r; +#define X(e, i)  else if (strstarts(r, e) ? (*w++ = i, r += sizeof(e) / sizeof(char) - 2, 1) : 0); +	  LIST_ASCII_NAMES +#undef X +	  else  UNRECOGNISED(r--);  	  break;  	}    return *w = 0, w; + invalid: +  return errno = EINVAL, NULL;  }  | 
