From ddf1fe9631ab241ffe855aca2b9589687bfc8db4 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Thu, 26 Feb 2026 17:21:58 +0100 Subject: Add extras/libparser-syntax-highlighter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- extras/grammar | 134 +++++++++++++++++ extras/libparser-syntax-highlighter.c | 262 ++++++++++++++++++++++++++++++++++ 2 files changed, 396 insertions(+) create mode 100644 extras/grammar create mode 100644 extras/libparser-syntax-highlighter.c (limited to 'extras') diff --git a/extras/grammar b/extras/grammar new file mode 100644 index 0000000..8cc6022 --- /dev/null +++ b/extras/grammar @@ -0,0 +1,134 @@ +(* CHARACTER CLASSES *) + +_space = " " | "\n" | "\t"; +_alpha = <"a", "z"> | <"A", "Z">; +_octal = <"0", "7">; +_digit = <"0", "9">; +_xdigit = _digit | <"a", "f"> | <"A", "F">; +_nonascii = <128, 255>; + + +(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) + +_comment_str_esc = "\\", (_space | <"!", 255>); +_comment_str_char = _space | !"\"", <"!", 255>; +_comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -); +_comment_char = _space | !"*)", !"\"", <"!", 255>; +comment = "(*", {_comment_char | _comment_str}, ("*)" | -); + +_ = {_space | comment}; + + +(* IDENTIFIERS *) + +_identifier_head = _alpha | _digit | _nonascii | "_"; +_identifier_tail = _identifier_head | "-"; + +intrusive-identifier = _identifier_head, {_identifier_tail}; +discrete-identifier = "_", {_identifier_tail}; + +_identifier = discrete-identifier | intrusive-identifier; + + +(* STRINGS *) + +_escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; +_escape_hex = ("x" | "X"), _xdigit, _xdigit; +_escape_octal = _octal, {_octal}; +escape-payload = _escape_simple | _escape_hex | _escape_octal | -; +ESCAPE = "\\"; +escape = ESCAPE, escape-payload; +_character = escape | !"\"", <" ", 255>; +_string = "\"", _character, {_character}, ("\"" | -); + +string = _string; +character = "\"", _character, ("\"" | -); + + +(* INTEGERS *) + +_decimal = _digit, {_digit}; +_hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; + +integer = _hexadecimal | _decimal; + + +(* GROUPINGS *) + +boundary = character | integer; +low = boundary; +high = boundary; + +NONDETERMINISTIC = "?"; + +COMMITTED = "+"; +committed = COMMITTED, _, operand; + +REJECTION = "!"; +rejection = REJECTION, _, operand; + +EXCEPTION = "-"; +exception = EXCEPTION; + +CONCATENATION = ","; +concatenation = operand, {_, CONCATENATION, _, operand}; + +ALTERNATION = "|"; +alternation = concatenation, {_, [NONDETERMINISTIC], ALTERNATION, _, concatenation}; + +CHAR-RANGE-START = "<"; +CHAR-RANGE-COMMA = ","; +CHAR-RANGE-END = ">"; +CHAR-RANGE-EDGE = CHAR-RANGE-START | CHAR-RANGE-END; +CHAR-RANGE = CHAR-RANGE-EDGE | CHAR-RANGE-COMMA; +char-range = !!CHAR-RANGE-START, CHAR-RANGE + , _, low, _ + , !!CHAR-RANGE-COMMA, CHAR-RANGE + , _, high, _ + , !!CHAR-RANGE-END, CHAR-RANGE; + +OPTIONAL-START = "["; +OPTIONAL-END = "]"; +OPTIONAL = OPTIONAL-START | OPTIONAL-END; +optional = [NONDETERMINISTIC] + , !!OPTIONAL-START, OPTIONAL + , _, _expression, _ + , !!OPTIONAL-END, OPTIONAL; + +REPEATED-START = "{"; +REPEATED-END = "}"; +REPEATED = REPEATED-START | REPEATED-END; +repeated = [NONDETERMINISTIC] + , !!REPEATED-START, REPEATED + , _, _expression, _ + , !!REPEATED-END, REPEATED; + +GROUP-START = "("; +GROUP-END = ")"; +GROUP = GROUP-START | GROUP-END; +group = !!GROUP-START, GROUP + , _, _expression, _ + , !!GROUP-END, GROUP; + +embedded-rule = _identifier; + +_literal = char-range | exception | string; +_group = optional | repeated | group | embedded-rule; +operand = _group | _literal | rejection | committed; + +_expression = alternation; + + +(* RULES *) + +DEFINITION = "="; +TERMINATION = ";"; +RULE = DEFINITION | TERMINATION; +rule-name = _identifier; +rule = rule-name, _ + , !!DEFINITION, RULE + , _, _expression, _ + , !!TERMINATION, RULE; + +(* This is the root rule of the grammar. *) +grammar = _, {rule, _}; diff --git a/extras/libparser-syntax-highlighter.c b/extras/libparser-syntax-highlighter.c new file mode 100644 index 0000000..f1dbddd --- /dev/null +++ b/extras/libparser-syntax-highlighter.c @@ -0,0 +1,262 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include +#include +#include +#include + + +#define LIST_RULES(X, D)\ + X("comment", colourise, "31") D\ + X("intrusive-identifier", colourise, "33") D\ + X("discrete-identifier", colourise, "") D\ + X("escape-payload", ignore, "") D\ + X("ESCAPE", colourise, "2;34") D\ + X("escape", colourise_and_descend, "34") D\ + X("string", colourise_and_descend, "32") D\ + X("character", descend, "") D\ + X("integer", ignore, "") D\ + X("boundary", descend, "") D\ + X("low", descend, "") D\ + X("high", descend, "") D\ + X("NONDETERMINISTIC", colourise, "1;33") D\ + X("COMMITTED", colourise, "1;33") D\ + X("committed", descend, "") D\ + X("REJECTION", colourise, "1;31") D\ + X("rejection", descend, "") D\ + X("EXCEPTION", colourise, "1;31") D\ + X("exception", descend, "") D\ + X("CONCATENATION", colourise, "2") D\ + X("concatenation", descend, "") D\ + X("ALTERNATION", colourise, "") D\ + X("alternation", descend, "") D\ + X("CHAR-RANGE-START", ignore, "") D\ + X("CHAR-RANGE-COMMA", ignore, "") D\ + X("CHAR-RANGE-END", ignore, "") D\ + X("CHAR-RANGE-EDGE", ignore, "") D\ + X("CHAR-RANGE", ignore, "") D\ + X("char-range", colourise_and_descend, "35") D\ + X("OPTIONAL-START", ignore, "") D\ + X("OPTIONAL-END", ignore, "") D\ + X("OPTIONAL", colourise, "1;34") D\ + X("optional", descend, "") D\ + X("REPEATED-START", ignore, "") D\ + X("REPEATED-END", ignore, "") D\ + X("REPEATED", colourise, "1;32") D\ + X("repeated", descend, "") D\ + X("GROUP-START", ignore, "") D\ + X("GROUP-END", ignore, "") D\ + X("GROUP", colourise, "1") D\ + X("group", descend, "") D\ + X("embedded-rule", descend, "") D\ + X("operand", descend, "") D\ + X("DEFINITION", ignore, "") D\ + X("TERMINATION", ignore, "") D\ + X("RULE", colourise, "1;36") D\ + X("rule-name", descend, "") D\ + X("rule", descend, "") D\ + X("grammar", descend, "") + + +static const char *argv0; + +static char *text = NULL; +static size_t size = 0; +static size_t len = 0; +static size_t off = 0; + + +static void descend(struct libparser_unit *tree, const char *colour); + + +static void +write_all(const char *s, size_t n) +{ + ssize_t r; + while (n) { + r = write(STDOUT_FILENO, s, n); + if (r < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: write %s: %s\n", argv0, "", strerror(errno)); + exit(1); + } + s = &s[r]; + n -= (size_t)r; + } +} + + +static void +write_str(const char *s) +{ + write_all(s, strlen(s)); +} + + +static void +output(size_t to) +{ + if (to > off) { + write_all(&text[off], to - off); + off = to; + } +} + + +static void +ignore(struct libparser_unit *tree, const char *colour) +{ + (void) tree; + (void) colour; +} + + +static void +colourise(struct libparser_unit *tree, const char *colour) +{ + output(tree->start); + write_str("\033[m"); + write_str(colour); + output(tree->end); + write_str("\033[m"); +} + + +static void +colourise_and_descend(struct libparser_unit *tree, const char *colour) +{ + struct libparser_unit *node; + output(tree->start); + write_str("\033[m"); + write_str(colour); + +#define X(RULE, ACTION, COLOUR)\ + if (!strcmp(node->rule, RULE)) ACTION(node, "\033["COLOUR"m"); + + for (node = tree->in; node; node = node->next) { + LIST_RULES(X, else) else descend(node, ""); + } + +#undef X + + write_str("\033[m"); + write_str(colour); + output(tree->end); + write_str("\033[m"); +} + + +static void +descend(struct libparser_unit *tree, const char *colour) +{ + struct libparser_unit *node; + + (void) colour; + +#define X(RULE, ACTION, COLOUR)\ + if (!strcmp(node->rule, RULE)) ACTION(node, "\033["COLOUR"m"); + + for (node = tree->in; node; node = node->next) { + LIST_RULES(X, else) else descend(node, ""); + } + +#undef X +} + + +static void +free_tree(struct libparser_unit *tree) +{ + struct libparser_unit *node; + struct libparser_unit *next; + + if (!tree) + return; + + for (node = tree->in; node; node = next) { + next = node->next; + free_tree(node); + } + + free(tree); +} + + +int +main(int argc, char *argv[]) +{ + struct libparser_unit *tree = NULL; + ssize_t r; + int fd = STDIN_FILENO; + const char *path = ""; + + argv0 = *argv++; + argc--; + + if (argc && !strcmp(*argv, "--")) { + argv++; + argc--; + } else if (argc && argv[0][0] == '-' && argv[0][1]) { + goto usage; + } + if (argc > 1) + goto usage; + if (argc && !strcmp(*argv, "-")) + argc--; + + if (argc) { + path = *argv; + fd = open(path, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: open %s O_RDONLY: %s\n", argv[0], path, strerror(errno)); + exit(1); + } + } + + for (;;) { + if (len == size) { + size += 8096u; + text = realloc(text, size); + if (!text) { + fprintf(stderr, "%s: realloc %zu: %s\n", argv[0], size, strerror(errno)); + exit(1); + } + } + r = read(fd, &text[len], size - len); + if (!r) + break; + if (r < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: read %s: %s\n", argv[0], path, strerror(errno)); + exit(1); + } + len += (size_t)r; + } + + if (argc) + close(fd); + + r = libparser_parse_file(libparser_rule_table, text, len, &tree); + if (r < 0) { + fprintf(stderr, "%s: libparser_parse_file: %s\n", argv0, strerror(errno)); + exit(1); + } else if (!tree || tree->end != (size_t)len || !r) { + fprintf(stderr, "%s: failed to parse input\n", argv0); + exit(1); + } + + descend(tree, ""); + output(len); + + free(text); + free_tree(tree); + return 0; + +usage: + fprintf(stderr, "usage: %s [file]\n", argv0); + exit(1); +} -- cgit v1.2.3-70-g09d2