From 978b2c445aa220a4138028335a44116479da58c1 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 30 Nov 2014 13:34:28 +0100 Subject: mds-kbdc: process includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/mds-kbdc/make-tree.c | 29 ++++++++++- src/mds-kbdc/process-includes.c | 30 ++++++++---- src/mds-kbdc/raw-data.c | 104 ++++++++++++++++++++++++++++++++++++++++ src/mds-kbdc/raw-data.h | 9 ++++ 4 files changed, 161 insertions(+), 11 deletions(-) (limited to 'src/mds-kbdc') diff --git a/src/mds-kbdc/make-tree.c b/src/mds-kbdc/make-tree.c index fab402b..5c644b4 100644 --- a/src/mds-kbdc/make-tree.c +++ b/src/mds-kbdc/make-tree.c @@ -662,6 +662,9 @@ int parse_to_tree(const char* restrict filename, mds_kbdc_parsed_t* restrict res size_t line_i, line_n; const char** keyword_stack = NULL; mds_kbdc_tree_t*** tree_stack = NULL; + char* cwd = NULL; + char* old = NULL; + size_t cwd_size = 4096 >> 1; size_t stack_ptr = 0; int saved_errno, in_array = 0; @@ -672,7 +675,29 @@ int parse_to_tree(const char* restrict filename, mds_kbdc_parsed_t* restrict res * can be misleading as the program can have changed working * directory to be able to resolve filenames. */ result->pathname = realpath(filename, NULL); /* XXX use absolute path */ - fail_if (result->pathname == NULL); + if (result->pathname == NULL) + { + fail_if (errno != ENOENT); + saved_errno = errno; + + /* Get the current working directory. */ + /* glibc offers ways to do this in just one function call, + * but we will not assume that glibc is used here. */ + for (;;) + { + fail_if (xxrealloc(old, cwd, cwd_size <<= 1, char)); + if (getcwd(cwd, cwd_size)) + break; + else + fail_if (errno != ERANGE); + } + + result->pathname = strdup(filename); + fail_if (result->pathname == NULL); + NEW_ERROR_(result, ERROR, 0, 0, 0, 0, 1, "no such file or directory in ‘%s’", cwd); + free(cwd); + return 0; + } /* Check that the file exists and can be read. */ if (access(result->pathname, R_OK) < 0) @@ -1043,6 +1068,8 @@ int parse_to_tree(const char* restrict filename, mds_kbdc_parsed_t* restrict res saved_errno = errno; free(keyword_stack); free(tree_stack); + free(cwd); + free(old); return errno = saved_errno, -1; } diff --git a/src/mds-kbdc/process-includes.c b/src/mds-kbdc/process-includes.c index 9677671..8c4bbbf 100644 --- a/src/mds-kbdc/process-includes.c +++ b/src/mds-kbdc/process-includes.c @@ -16,6 +16,7 @@ * along with this program. If not, see . */ #include "process-includes.h" +/* TODO we need to deal with mutually recursive includes */ #include "make-tree.h" #include "simplify-tree.h" @@ -68,26 +69,34 @@ static int transfer_errors(mds_kbdc_parsed_t* restrict subresult, mds_kbdc_tree_ { mds_kbdc_parse_error_t** errors = NULL; mds_kbdc_parse_error_t* suberror; - size_t errors_ptr = 0, i; + size_t errors_ptr = 0; int saved_errno; - /* List errors backwards, so that we can easily insert “included from here”-notes. */ + /* Allocate temporary list for errors. */ + if (subresult->errors_ptr == 0) + return 0; fail_if (xmalloc(errors, subresult->errors_ptr * 2, mds_kbdc_parse_error_t*)); + + /* List errors backwards, so that we can easily handle errors and add “included from here”-note. */ while (subresult->errors_ptr--) { suberror = subresult->errors[subresult->errors_ptr]; + + /* If it is more severe than a note, we want to say there it was included. */ if (suberror->severity > MDS_KBDC_PARSE_ERROR_NOTE) { NEW_ERROR(tree, NOTE, "included from here"); errors[errors_ptr++] = error; result->errors[--(result->errors_ptr)] = NULL; } + + /* Include error. */ errors[errors_ptr++] = suberror; subresult->errors[subresult->errors_ptr] = NULL; } /* Append errors. */ - for (i = 0; i < errors_ptr; errors[i++] = NULL) + for (; errors_ptr--; errors[errors_ptr] = NULL) { if (result->errors_ptr + 1 >= result->errors_size) { @@ -98,8 +107,8 @@ static int transfer_errors(mds_kbdc_parsed_t* restrict subresult, mds_kbdc_tree_ result->errors = new_errors; result->errors_size = new_errors_size; } - - result->errors[result->errors_ptr++] = errors[i]; + + result->errors[result->errors_ptr++] = errors[errors_ptr]; result->errors[result->errors_ptr] = NULL; } @@ -108,10 +117,7 @@ static int transfer_errors(mds_kbdc_parsed_t* restrict subresult, mds_kbdc_tree_ pfail: saved_errno = errno; while (errors_ptr--) - if (errors[errors_ptr] == NULL) - break; - else - mds_kbdc_parse_error_free(errors[errors_ptr]); + mds_kbdc_parse_error_free(errors[errors_ptr]); free(errors); return errno = saved_errno, -1; } @@ -150,7 +156,7 @@ static int process_include(mds_kbdc_tree_include_t* restrict tree) * but we will not assume that glibc is used here. */ for (;;) { - fail_if (!xxrealloc(old, cwd, cwd_size <<= 1, char)); + fail_if (xxrealloc(old, cwd, cwd_size <<= 1, char)); if (getcwd(cwd, cwd_size)) break; else @@ -165,6 +171,10 @@ static int process_include(mds_kbdc_tree_include_t* restrict tree) our_result = result; /* Process include. */ + old = tree->filename, tree->filename = NULL; + tree->filename = parse_raw_string(old); + fail_if (tree->filename == NULL); + free(old), old = NULL; process (parse_to_tree(tree->filename, &subresult)); process (simplify_tree(&subresult)); process (process_includes(&subresult)); diff --git a/src/mds-kbdc/raw-data.c b/src/mds-kbdc/raw-data.c index b0985fe..1803cd5 100644 --- a/src/mds-kbdc/raw-data.c +++ b/src/mds-kbdc/raw-data.c @@ -18,6 +18,7 @@ #include "raw-data.h" #include "globals.h" +#include "string.h" #include @@ -29,6 +30,7 @@ #include #include #include +#include @@ -54,6 +56,8 @@ void mds_kbdc_source_code_initialise(mds_kbdc_source_code_t* restrict this) */ void mds_kbdc_source_code_destroy(mds_kbdc_source_code_t* restrict this) { + if (this == NULL) + return; free(this->lines), this->lines = NULL; free(this->real_lines), this->real_lines = NULL; free(this->content), this->content = NULL; @@ -68,6 +72,8 @@ void mds_kbdc_source_code_destroy(mds_kbdc_source_code_t* restrict this) */ void mds_kbdc_source_code_free(mds_kbdc_source_code_t* restrict this) { + if (this == NULL) + return; free(this->lines); free(this->real_lines); free(this->content); @@ -371,3 +377,101 @@ int read_source_lines(const char* restrict pathname, mds_kbdc_source_code_t* res return -1; } + +/** + * Encode a character in UTF-8 + * + * @param buffer The buffer where the character should be stored + * @param character The character + * @return The of the character in `buffer`, `NULL` on error + */ +static char* encode_utf8(char* buffer, char32_t character) +{ + char32_t text[2]; + char* restrict str; + char* restrict str_; + + text[0] = character; + text[1] = -1; + + if (str_ = str = string_encode(text), str == NULL) + return NULL; + + while (*str) + *buffer++ = *str++; + + free(str_); + return buffer; +} + + +/** + * Parse a quoted and escaped string that may not include function calls or variable dereferences + * + * @param string The string + * @return The string in machine-readable format, `NULL` on error + */ +char* parse_raw_string(const char* restrict string) +{ +#define r(lower, upper) (((lower) <= c) && (c <= (upper))) + + char* rc; + char* p; + int escape = 0; + char32_t buf; + + /* We know that the output string can only be shorter because + * it is surrounded by 2 quotes and escape can only be longer + * then what they escape, for example \uA0, is four characters, + * but when parsed it generateds 2 bytes in UTF-8, and their + * is not code point whose UTF-8 encoding is longer than its + * hexadecimal representation. */ + p = rc = malloc(strlen(string) * sizeof(char)); + if (rc == NULL) + return NULL; + + while (*string) + { + char c = *string++; + + if (escape > 1) + { + if ((escape == 8) && r('0', '7')) buf = (buf << 3) | (c & 15); + else if ((escape == 16) && r('0', '9')) buf = (buf << 4) | (c & 15); + else if ((escape == 16) && r('a', 'f')) buf = (buf << 4) | ((c & 15) + 9); + else if ((escape == 16) && r('A', 'F')) buf = (buf << 4) | ((c & 15) + 9); + else + goto end_of_escape; + continue; + end_of_escape: + escape = 0; + p = encode_utf8(p, buf); + if (p == NULL) + goto fail; + if (c != '.') + *p++ = c; + } + else if (escape == 1) + { + escape = 0, buf = 0; + switch (c) + { + case '0': escape = 8; break; + case 'u': escape = 16; break; + default: *p++ = c; break; + } + } + else if (c == '\\') + escape = 1; + else if (c != '\"') + *p++ = c; + } + + *p = '\0'; + return rc; + fail: + free(rc); + return NULL; +#undef r +} + diff --git a/src/mds-kbdc/raw-data.h b/src/mds-kbdc/raw-data.h index 4bc7355..a2f1edc 100644 --- a/src/mds-kbdc/raw-data.h +++ b/src/mds-kbdc/raw-data.h @@ -104,5 +104,14 @@ size_t get_end_of_call(char* restrict content, size_t offset, size_t size) __att int read_source_lines(const char* restrict pathname, mds_kbdc_source_code_t* restrict source_code); +/** + * Parse a quoted and escaped string that may not include function calls or variable dereferences + * + * @param string The string + * @return The string in machine-readable format, `NULL` on error + */ +char* parse_raw_string(const char* restrict string); + + #endif -- cgit v1.2.3-70-g09d2