From 28b0a361363acbd9a2d291d82e145fb3757ed9f8 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Mon, 5 Jan 2026 15:18:50 +0100 Subject: Add \t and improve printing of text, and print the input text when PRINT_ACTIONS used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- README | 2 +- libparser-generate.c | 2 ++ libparser.7 | 2 +- libparser.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++- print-syntax.c | 44 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 101 insertions(+), 4 deletions(-) diff --git a/README b/README index 23d231b..b5b833e 100644 --- a/README +++ b/README @@ -52,7 +52,7 @@ EXTENDED DESCRIPTION (* STRINGS *) - _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "v"; + _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; _escape_hex = ("x" | "X"), _xdigit, _xdigit; _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) _escape = _escape_simple | _escape_hex | _escape_octal | -; diff --git a/libparser-generate.c b/libparser-generate.c index 0a91835..47d2ebd 100644 --- a/libparser-generate.c +++ b/libparser-generate.c @@ -902,6 +902,8 @@ again: tokens[i]->s[1] = '\n'; } else if (tokens[i]->s[2] == 'r') { tokens[i]->s[1] = '\r'; + } else if (tokens[i]->s[2] == 't') { + tokens[i]->s[1] = '\t'; } else if (tokens[i]->s[2] == 'v') { tokens[i]->s[1] = '\v'; } else if (tokens[i]->s[2] == 'x' && isxdigit(tokens[i]->s[3]) && isxdigit(tokens[i]->s[4])) { diff --git a/libparser.7 b/libparser.7 index c50b5c9..174bac9 100644 --- a/libparser.7 +++ b/libparser.7 @@ -64,7 +64,7 @@ identifier = _identifier_head, {_identifier_tail}; (* STRINGS *) -_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqv\(dq; +_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq; _escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit; _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) _escape = _escape_simple | _escape_hex | _escape_octal | -; diff --git a/libparser.c b/libparser.c index 5ecb7ee..2a925b7 100644 --- a/libparser.c +++ b/libparser.c @@ -362,6 +362,54 @@ print_state(struct context *ctx) } +static int +print_text(const char *text, size_t textlen) +{ + size_t off = 0, n = 0; + int len, ret = 0; + + while (off + n < textlen) { + if (!text[off + n]) + break; + if (text[off + n] < ' ' || text[off + n] >= 0x7F || + text[off + n] == '"' || text[off + n] == '\\') { + fprintf(stderr, "%.*s%n", (int)n, &text[off], &len); + ret += len; + off += n; + n = 0; + switch (text[off]) { + case '\\': fprintf(stderr, "\\\\%n", &len); break; + case '\"': fprintf(stderr, "\\\"%n", &len); break; + case '\a': fprintf(stderr, "\\a%n", &len); break; + case '\b': fprintf(stderr, "\\b%n", &len); break; + case '\f': fprintf(stderr, "\\f%n", &len); break; + case '\n': fprintf(stderr, "\\n%n", &len); break; + case '\r': fprintf(stderr, "\\r%n", &len); break; + case '\t': fprintf(stderr, "\\t%n", &len); break; + case '\v': fprintf(stderr, "\\v%n", &len); break; + default: + fprintf(stderr, "\\x%02X%n", +(unsigned char)text[off], &len); + break; + } + off++; + } else if (n == 4096U) { + fprintf(stderr, "%.*s%n", (int)n, &text[off], &len); + ret += len; + off += n; + n = 0; + } else { + n++; + } + } + if (n) { + fprintf(stderr, "%.*s%n", (int)n, &text[off], &len); + ret += len; + } + + return ret; +} + + static int print_sentence(const union libparser_sentence *sentence, int indent) { @@ -431,7 +479,9 @@ print_sentence(const union libparser_sentence *sentence, int indent) break; case LIBPARSER_SENTENCE_TYPE_STRING: - fprintf(stderr, "\"%.*s\"%n", (int)sentence->string.length, sentence->string.string, &len); + fprintf(stderr, "\"%n", &len); + indent += len + print_text(sentence->string.string, sentence->string.length); + fprintf(stderr, "\"%n", &len); indent += len; break; @@ -1200,6 +1250,9 @@ libparser_parse_file(const struct libparser_rule *const rules[], const char *dat #if PRINT_ACTIONS print_grammar(rules); + fprintf(stderr, "Input text: \""); + print_text(ctx.data, ctx.length); + fprintf(stderr, "\"\n"); #endif /* TODO guard against left-side recursion */ diff --git a/print-syntax.c b/print-syntax.c index 07b3858..6a00fb8 100644 --- a/print-syntax.c +++ b/print-syntax.c @@ -14,6 +14,7 @@ static int print_sentence(const union libparser_sentence *sentence, int indent) { + size_t off, n; int len; switch (sentence->type) { @@ -79,7 +80,48 @@ print_sentence(const union libparser_sentence *sentence, int indent) break; case LIBPARSER_SENTENCE_TYPE_STRING: - printf("\"%.*s\"%n", (int)sentence->string.length, sentence->string.string, &len); + printf("\"%n", &len); + indent += len; + off = 0; + n = 0; + while (off + n < sentence->string.length) { + if (sentence->string.string[off + n] < ' ' || + sentence->string.string[off + n] >= 0x7F || + sentence->string.string[off + n] == '"' || + sentence->string.string[off + n] == '\\') { + printf("%.*s%n", (int)n, &sentence->string.string[off], &len); + indent += len; + off += n; + n = 0; + switch (sentence->string.string[off]) { + case '\\': printf("\\\\%n", &len); break; + case '\"': printf("\\\"%n", &len); break; + case '\a': printf("\\a%n", &len); break; + case '\b': printf("\\b%n", &len); break; + case '\f': printf("\\f%n", &len); break; + case '\n': printf("\\n%n", &len); break; + case '\r': printf("\\r%n", &len); break; + case '\t': printf("\\t%n", &len); break; + case '\v': printf("\\v%n", &len); break; + default: + printf("\\x%02X%n", +(unsigned char)sentence->string.string[off], &len); + break; + } + off++; + } else if (n == 4096U) { + printf("%.*s%n", (int)n, &sentence->string.string[off], &len); + indent += len; + off += n; + n = 0; + } else { + n++; + } + } + if (n) { + printf("%.*s%n", (int)n, &sentence->string.string[off], &len); + indent += len; + } + printf("\"%n", &len); indent += len; break; -- cgit v1.2.3-70-g09d2