From 978b2c445aa220a4138028335a44116479da58c1 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 30 Nov 2014 13:34:28 +0100 Subject: mds-kbdc: process includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/mds-kbdc/raw-data.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'src/mds-kbdc/raw-data.c') diff --git a/src/mds-kbdc/raw-data.c b/src/mds-kbdc/raw-data.c index b0985fe..1803cd5 100644 --- a/src/mds-kbdc/raw-data.c +++ b/src/mds-kbdc/raw-data.c @@ -18,6 +18,7 @@ #include "raw-data.h" #include "globals.h" +#include "string.h" #include @@ -29,6 +30,7 @@ #include #include #include +#include @@ -54,6 +56,8 @@ void mds_kbdc_source_code_initialise(mds_kbdc_source_code_t* restrict this) */ void mds_kbdc_source_code_destroy(mds_kbdc_source_code_t* restrict this) { + if (this == NULL) + return; free(this->lines), this->lines = NULL; free(this->real_lines), this->real_lines = NULL; free(this->content), this->content = NULL; @@ -68,6 +72,8 @@ void mds_kbdc_source_code_destroy(mds_kbdc_source_code_t* restrict this) */ void mds_kbdc_source_code_free(mds_kbdc_source_code_t* restrict this) { + if (this == NULL) + return; free(this->lines); free(this->real_lines); free(this->content); @@ -371,3 +377,101 @@ int read_source_lines(const char* restrict pathname, mds_kbdc_source_code_t* res return -1; } + +/** + * Encode a character in UTF-8 + * + * @param buffer The buffer where the character should be stored + * @param character The character + * @return The of the character in `buffer`, `NULL` on error + */ +static char* encode_utf8(char* buffer, char32_t character) +{ + char32_t text[2]; + char* restrict str; + char* restrict str_; + + text[0] = character; + text[1] = -1; + + if (str_ = str = string_encode(text), str == NULL) + return NULL; + + while (*str) + *buffer++ = *str++; + + free(str_); + return buffer; +} + + +/** + * Parse a quoted and escaped string that may not include function calls or variable dereferences + * + * @param string The string + * @return The string in machine-readable format, `NULL` on error + */ +char* parse_raw_string(const char* restrict string) +{ +#define r(lower, upper) (((lower) <= c) && (c <= (upper))) + + char* rc; + char* p; + int escape = 0; + char32_t buf; + + /* We know that the output string can only be shorter because + * it is surrounded by 2 quotes and escape can only be longer + * then what they escape, for example \uA0, is four characters, + * but when parsed it generateds 2 bytes in UTF-8, and their + * is not code point whose UTF-8 encoding is longer than its + * hexadecimal representation. */ + p = rc = malloc(strlen(string) * sizeof(char)); + if (rc == NULL) + return NULL; + + while (*string) + { + char c = *string++; + + if (escape > 1) + { + if ((escape == 8) && r('0', '7')) buf = (buf << 3) | (c & 15); + else if ((escape == 16) && r('0', '9')) buf = (buf << 4) | (c & 15); + else if ((escape == 16) && r('a', 'f')) buf = (buf << 4) | ((c & 15) + 9); + else if ((escape == 16) && r('A', 'F')) buf = (buf << 4) | ((c & 15) + 9); + else + goto end_of_escape; + continue; + end_of_escape: + escape = 0; + p = encode_utf8(p, buf); + if (p == NULL) + goto fail; + if (c != '.') + *p++ = c; + } + else if (escape == 1) + { + escape = 0, buf = 0; + switch (c) + { + case '0': escape = 8; break; + case 'u': escape = 16; break; + default: *p++ = c; break; + } + } + else if (c == '\\') + escape = 1; + else if (c != '\"') + *p++ = c; + } + + *p = '\0'; + return rc; + fail: + free(rc); + return NULL; +#undef r +} + -- cgit v1.2.3-70-g09d2