From 977cc038e1d0cfa9455146bcdab568d32401f37f Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sat, 6 Dec 2014 19:28:59 +0100 Subject: mds-kbdc: parse_keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/mds-kbdc/compile-layout.c | 129 +++++++++++++++++++++++++++++++++++++++--- src/mds-kbdc/string.c | 13 ++++- 2 files changed, 130 insertions(+), 12 deletions(-) (limited to 'src/mds-kbdc') diff --git a/src/mds-kbdc/compile-layout.c b/src/mds-kbdc/compile-layout.c index ad9bb6c..f5f80f2 100644 --- a/src/mds-kbdc/compile-layout.c +++ b/src/mds-kbdc/compile-layout.c @@ -304,7 +304,14 @@ static char32_t* parse_quoted_string(mds_kbdc_tree_t* restrict tree, const char* /* Parse the string. */ while ((c = *raw++)) - if (escape) + if (escape && quote && strchr("()[]{}<>\"\\,", c)) + { + /* Buffer UTF-8 text for convertion to UTF-32. */ + GROW_BUF; + buf[buf_ptr++] = c; + escape = 0; + } + else if (escape) { /* Parse escape. */ raw -= 2, subrc = parse_escape(tree, raw, lineoff + (size_t)(raw - raw_), &escape, &raw); @@ -401,7 +408,7 @@ static char32_t* parse_unquoted_string(mds_kbdc_tree_t* restrict tree, const cha goto done; \ } \ while (0) - + const char* restrict raw_ = raw; char32_t* rc; char32_t buf = 0; @@ -436,19 +443,123 @@ static char32_t* parse_string(mds_kbdc_tree_t* restrict tree, const char* restri { mds_kbdc_tree_t* old_last_value_statement = last_value_statement; char32_t* rc = (strchr("\"\\", *raw) ? parse_quoted_string : parse_unquoted_string)(tree, raw, lineoff); - last_value_statement = old_last_value_statement; - return rc; + return last_value_statement = old_last_value_statement, rc; } +/** + * Parse a key-combination string + * + * @param tree The statement where the string is located + * @param raw The string to parse + * @param lineoff The offset on the line where the string beings + * @return The string as pure text, `NULL` on error + */ static char32_t* parse_keys(mds_kbdc_tree_t* restrict tree, const char* restrict raw, size_t lineoff) { - (void) tree; - (void) raw; - (void) lineoff; - return NULL; /* TODO */ +#define GROW_BUF \ + if (buf_ptr == buf_size) \ + fail_if (xxrealloc(old_buf, buf, buf_size ? (buf_size <<= 1) : 16, char)) +#define COPY \ + n = string_length(subrc); \ + if (rc_ptr + n > rc_size) \ + fail_if (xxrealloc(old_rc, rc, rc_ptr + n, char32_t)); \ + memcpy(rc + rc_ptr, subrc, n * sizeof(char32_t)); \ + free(subrc), subrc = NULL +#define STORE \ + GROW_BUF; \ + buf[buf_ptr] = '\0', buf_ptr = 0; \ + fail_if ((subrc = string_decode(buf), subrc == NULL)); \ + COPY + + mds_kbdc_tree_t* old_last_value_statement = last_value_statement; + const char* restrict raw_ = raw++; + char32_t* restrict subrc = NULL; + char32_t* restrict rc = NULL; + char32_t* restrict old_rc = NULL; + char* restrict buf = NULL; + char* restrict old_buf = NULL; + size_t rc_ptr = 0, rc_size = 0, n; + size_t buf_ptr = 0, buf_size = 0, i; + int escape = 0; + char c; + int saved_errno; + + /* Parse the string. */ + while (c = *raw++, *raw) + if (escape && strchr("()[]{}<>\"\\,", c)) + { + /* Buffer UTF-8 text for convertion to UTF-32. */ + GROW_BUF; + buf[buf_ptr++] = c; + escape = 0; + } + else if (escape) + { + /* Parse escape. */ + raw -= 2, subrc = parse_escape(tree, raw, lineoff + (size_t)(raw - raw_), &escape, &raw); + fail_if (subrc == NULL); + COPY; + } + else if (c == '\\') + { + /* Convert the buffered UTF-8 text to UTF-32, and start an escape. */ + STORE; + escape = 1; + } + else if (c == ',') + { + /* Include commas as (1 << 31) ^ 1 (above 2³¹, yet guaranteed not to be -1). */ + for (i = 0; i < 7; i++) + GROW_BUF; + buf[buf_ptr++] = (char)0xFE; + for (i = 0; i < 5; i++) + buf[buf_ptr++] = 0x00; + buf[buf_ptr++] = (char)(((1ULL << 31) ^ 1ULL) & 255); + } + else + { + /* Buffer UTF-8 text for convertion to UTF-32. */ + GROW_BUF; + buf[buf_ptr++] = c; + } - /* Do not forget to store and then restore `last_value_statement` */ + /* Check that no escape is incomplete. */ + if (escape && (tree->processed != PROCESS_LEVEL)) + { + NEW_ERROR(tree, ERROR, "incomplete escape"); + error->start = lineoff + (size_t)(strrchr(raw_, '\\') - raw); + error->end = lineoff + strlen(raw_); + tree->processed = PROCESS_LEVEL; + } + + /* Check that key-combination is complete. */ + if ((c != '>') && (tree->processed != PROCESS_LEVEL)) + { + NEW_ERROR(tree, ERROR, "key-combination is not closed"); + error->start = lineoff; + error->end = lineoff + strlen(raw_); + tree->processed = PROCESS_LEVEL; + } + + /* Shrink or grow to string to its minimal size, and -1-terminate it. */ + fail_if (xxrealloc(old_rc, rc, rc_ptr + 1, char32_t)); + rc[rc_ptr] = -1; + + free(buf); + return last_value_statement = old_last_value_statement, rc; + pfail: + saved_errno = errno; + free(subrc); + free(old_rc); + free(old_buf); + free(rc); + free(buf); + errno = saved_errno; + return last_value_statement = old_last_value_statement, NULL; +#undef STORE +#undef COPY +#undef GROW_BUF } diff --git a/src/mds-kbdc/string.c b/src/mds-kbdc/string.c index 69f8f25..d6befe3 100644 --- a/src/mds-kbdc/string.c +++ b/src/mds-kbdc/string.c @@ -98,22 +98,29 @@ char* string_encode(const char32_t* restrict string) char* restrict rc; /* Allocated Modified UTF-8 string. */ - if (xmalloc(rc, 6 * n + 1, char)) + if (xmalloc(rc, 7 * n + 1, char)) return NULL; /* Convert to Modified UTF-8. */ for (i = j = 0; i < n; i++) { #define _c(s) rc[j++] = (char)(((c >> (s)) & 0x3F) | 0x80) -#define _t(s) c < (char32_t)(1L << s) +#define _t(s) (0 < c) && ((uint32_t)c < (uint32_t)(1ULL << s)) char32_t c = string[i]; if (c == 0) rc[j++] = (char)0xC0, rc[j++] = (char)0x80; else if (_t( 7)) rc[j++] = (char)c; else if (_t(11)) rc[j++] = (char)((c >> 6) | 0xC0), _c( 0); else if (_t(16)) rc[j++] = (char)((c >> 12) | 0xE0), _c( 6), _c( 0); else if (_t(21)) rc[j++] = (char)((c >> 18) | 0xF0), _c(12), _c( 6), _c( 0); + /* UTF-8 actually ends here, fits 32 planes. */ else if (_t(26)) rc[j++] = (char)((c >> 24) | 0xF8), _c(18), _c(12), _c( 6), _c(0); - else rc[j++] = (char)((c >> 30) | 0xFC), _c(24), _c(18), _c(12), _c(6), _c(0); + else if (_t(31)) rc[j++] = (char)((c >> 30) | 0xFC), _c(24), _c(18), _c(12), _c(6), _c(0); + /* The orginal UTF-8 specification ended here, fits 31 bits. + * However, we added another byte so we can fit 32 bits + * (actually we ca now fit 36 bits.) + * However, we only needed this in `string_decode` which would + * not require any changed, but we added it here for symmetry. */ + else rc[j++] = (char)((c >> 30) | 0xFE), _c(30), _c(24), _c(18), _c(12), _c(6), _c(0); #undef _t #undef _c } -- cgit v1.2.3-70-g09d2