From f75673290768677c08135f6aada53298544a14f2 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Thu, 26 Feb 2026 17:23:41 +0100 Subject: cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- README | 88 ++++++++++++++++++++++++++++++---------------------------- TODO | 2 -- libparser.7 | 88 ++++++++++++++++++++++++++++++---------------------------- print-syntax.c | 4 +-- 4 files changed, 92 insertions(+), 90 deletions(-) diff --git a/README b/README index b5b833e..0183879 100644 --- a/README +++ b/README @@ -25,83 +25,85 @@ EXTENDED DESCRIPTION (* CHARACTER CLASSES *) - _space = " " | "\n" | "\t"; - _alpha = <"a", "z"> | <"A", "Z">; - _octal = <"0", "7">; - _digit = <"0", "9">; - _xdigit = _digit | <"a", "f"> | <"A", "F">; - _nonascii = <128, 255>; + _space = " " | "\n" | "\t"; + _alpha = <"a", "z"> | <"A", "Z">; + _octal = <"0", "7">; + _digit = <"0", "9">; + _xdigit = _digit | <"a", "f"> | <"A", "F">; + _nonascii = <128, 255>; (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) - _comment_char = _space | !"*", !"\"", <"!", 0xFF>; - _comment_tail = [_comment_char], [_string], ("*)" | _comment_tail | -); - _comment = "(*", _comment_tail; + _comment_str_esc = "\\", (_space | <"!", 255>); + _comment_str_char = _space | !"\"", <"!", 255>; + _comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -); + _comment_char = _space | !"*)", !"\"", <"!", 0xFF>; + _comment = "(*", {_comment_char | _comment_str}, ("*)" | -); - _ = {_space | _comment}; + _ = {_space | _comment}; (* IDENTIFIERS *) - _identifier_head = _alpha | _digit | _nonascii | "_"; - _identifier_tail = _identifier_head | "-"; + _identifier_head = _alpha | _digit | _nonascii | "_"; + _identifier_tail = _identifier_head | "-"; - identifier = _identifier_head, {_identifier_tail}; + identifier = _identifier_head, {_identifier_tail}; (* STRINGS *) - _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; - _escape_hex = ("x" | "X"), _xdigit, _xdigit; - _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) - _escape = _escape_simple | _escape_hex | _escape_octal | -; - _character = "\\", _escape | !"\"", <" ", 0xFF>; - _string = "\"", _character, {_character}, ("\"" | -); + _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; + _escape_hex = ("x" | "X"), _xdigit, _xdigit; + _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) + _escape = _escape_simple | _escape_hex | _escape_octal | -; + _character = "\\", _escape | !"\"", <" ", 0xFF>; + _string = "\"", _character, {_character}, ("\"" | -); - string = _string - character = "\"", _character, ("\"" | -); + string = _string; + character = "\"", _character, ("\"" | -); (* INTEGERS *) - _decimal = _digit, {_digit}; - _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; + _decimal = _digit, {_digit}; + _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; - integer = _decimal | _hexadecimal; (* May not exceed 255. *) + integer = _hexadecimal | _decimal; (* May not exceed 255. *) (* GROUPINGS *) - _low = character | integer; - _high = character | integer; + _low = character | integer; + _high = character | integer; - nondeterministic = "?"; + nondeterministic = "?"; - committed = "+", _, _operand; - rejection = "!", _, _operand; - concatenation = _operand, {_, ",", _, _operand}; - alternation = concatenation, {_, [nondeterministic], "|", _, concatenation}; - optional = [nondeterministic], "[", _, _expression, _, "]"; - repeated = [nondeterministic], "{", _, _expression, _, "}"; - group = "(", _, _expression, _, ")"; - char-range = "<", _, _low, _, ",", _, _high, "_", ">"; - exception = "-"; - embedded-rule = identifier; + committed = "+", _, _operand; + rejection = "!", _, _operand; + concatenation = _operand, {_, ",", _, _operand}; + alternation = concatenation, {_, [nondeterministic], "|", _, concatenation}; + optional = [nondeterministic], "[", _, _expression, _, "]"; + repeated = [nondeterministic], "{", _, _expression, _, "}"; + group = "(", _, _expression, _, ")"; + char-range = "<", _, _low, _, ",", _, _high, _, ">"; + exception = "-"; + embedded-rule = identifier; - _literal = char-range | exception | string; - _group = optional | repeated | group | embedded-rule; - _operand = _group | _literal | rejection | committed; + _literal = char-range | exception | string; + _group = optional | repeated | group | embedded-rule; + _operand = _group | _literal | rejection | committed; - _expression = alternation; + _expression = alternation; (* RULES *) - rule = identifier, _, "=", _, _expression, _, ";"; + rule = identifier, _, "=", _, _expression, _, ";"; (* This is the root rule of the grammar. *) - grammar = _, {rules, _}; + grammar = _, {rule, _}; The file must be encoded in UTF-8, with LF as the line break (CR and FF are illegal just because). diff --git a/TODO b/TODO index 0989ca3..8f7c514 100644 --- a/TODO +++ b/TODO @@ -17,5 +17,3 @@ Add support for prelexed the application to take action on parsed rules and deallocate memory that is no longer needed after that. The hooks shall also be able to cause the parser to abort. - -Add tests diff --git a/libparser.7 b/libparser.7 index 174bac9..d06e5ab 100644 --- a/libparser.7 +++ b/libparser.7 @@ -37,83 +37,85 @@ input can be described in its own grammar: .nf (* CHARACTER CLASSES *) -_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq; -_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>; -_octal = <\(dq0\(dq, \(dq7\(dq>; -_digit = <\(dq0\(dq, \(dq9\(dq>; -_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>; -_nonascii = <128, 255>; +_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq; +_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>; +_octal = <\(dq0\(dq, \(dq7\(dq>; +_digit = <\(dq0\(dq, \(dq9\(dq>; +_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>; +_nonascii = <128, 255>; (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) -_comment_char = _space | !\(dq*\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; -_comment_tail = [_comment_char], [_string], (\(dq*)\(dq | [*], _comment_tail | -); -_comment = \(dq(*\(dq, _comment_tail; +_comment_str_esc = \(dq\e\(dq\(dq, (_space | <\(dq!\(dq, 0xFF>); +_comment_str_char = _space | !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; +_comment_str = \(dq\e\(dq\(dq, {_comment_str_esc | _comment_str_char}, (\(dq\e\(dq\(dq | -); +_comment_char = _space | !\(dq*)\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; +_comment = \(dq(*\(dq, {_comment_char | _comment_str}, (\(dq*)\(dq | -); -_ = {_space | _comment}; +_ = {_space | _comment}; (* IDENTIFIERS *) -_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq; -_identifier_tail = _identifier_head | \(dq-\(dq; +_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq; +_identifier_tail = _identifier_head | \(dq-\(dq; -identifier = _identifier_head, {_identifier_tail}; +identifier = _identifier_head, {_identifier_tail}; (* STRINGS *) -_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq; -_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit; -_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) -_escape = _escape_simple | _escape_hex | _escape_octal | -; -_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>; -_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -); +_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq; +_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit; +_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) +_escape = _escape_simple | _escape_hex | _escape_octal | -; +_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>; +_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -); -string = _string; -character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -); +string = _string; +character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -); (* INTEGERS *) -_decimal = _digit, {_digit}; -_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit}; +_decimal = _digit, {_digit}; +_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit}; -integer = _decimal | _hexadecimal; (* May not exceed 255. *) +integer = _hexadecimal | _decimal; (* May not exceed 255. *) (* GROUPINGS *) -_low = character | integer; -_high = character | integer; +_low = character | integer; +_high = character | integer; -nondeterministic = \(dq?\(dq; +nondeterministic = \(dq?\(dq; -committed = \(dq+\(dq, _, _operand; -rejection = \(dq!\(dq, _, _operand; -concatenation = _operand, {_, \(dq,\(dq, _, _operand}; -alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation}; -optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq; -repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq; -group = \(dq(\(dq, _, _expression, _, \(dq)\(dq; -char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq; -exception = \(dq-\(dq; -embedded-rule = identifier; +committed = \(dq+\(dq, _, _operand; +rejection = \(dq!\(dq, _, _operand; +concatenation = _operand, {_, \(dq,\(dq, _, _operand}; +alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation}; +optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq; +repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq; +group = \(dq(\(dq, _, _expression, _, \(dq)\(dq; +char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, _, \(dq>\(dq; +exception = \(dq-\(dq; +embedded-rule = identifier; -_literal = char-range | exception | string; -_group = optional | repeated | group | embedded-rule; -_operand = _group | _literal | rejection | committed; +_literal = char-range | exception | string; +_group = optional | repeated | group | embedded-rule; +_operand = _group | _literal | rejection | committed; -_expression = alternation; +_expression = alternation; (* RULES *) -rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq; +rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq; (* This is the root rule of the grammar. *) -grammar = _, {rules, _}; +grammar = _, {rule, _}; .fi .PP .RE diff --git a/print-syntax.c b/print-syntax.c index 6a00fb8..e4c202b 100644 --- a/print-syntax.c +++ b/print-syntax.c @@ -30,7 +30,7 @@ print_sentence(const union libparser_sentence *sentence, int indent) case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION: printf("("); print_sentence(sentence->binary.left, indent + 1); - printf(" ?| \n%*.s", indent + 1, ""); + printf(" ?|\n%*.s", indent + 1, ""); indent = print_sentence(sentence->binary.right, indent + 1); printf(")"); indent += 1; @@ -39,7 +39,7 @@ print_sentence(const union libparser_sentence *sentence, int indent) case LIBPARSER_SENTENCE_TYPE_ALTERNATION: printf("("); print_sentence(sentence->binary.left, indent + 1); - printf(" | \n%*.s", indent + 1, ""); + printf(" |\n%*.s", indent + 1, ""); indent = print_sentence(sentence->binary.right, indent + 1); printf(")"); indent += 1; -- cgit v1.2.3-70-g09d2