diff options
| author | Mattias Andrée <m@maandree.se> | 2026-02-26 17:23:41 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-02-26 17:23:41 +0100 |
| commit | f75673290768677c08135f6aada53298544a14f2 (patch) | |
| tree | 6b2fb2cbf4f380c400fcd4fb3ed5a594f2b400a9 | |
| parent | Add extras/libparser-syntax-highlighter (diff) | |
| download | libparser-f75673290768677c08135f6aada53298544a14f2.tar.gz libparser-f75673290768677c08135f6aada53298544a14f2.tar.bz2 libparser-f75673290768677c08135f6aada53298544a14f2.tar.xz | |
cleanup
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to '')
| -rw-r--r-- | README | 88 | ||||
| -rw-r--r-- | TODO | 2 | ||||
| -rw-r--r-- | libparser.7 | 88 | ||||
| -rw-r--r-- | print-syntax.c | 4 |
4 files changed, 92 insertions, 90 deletions
@@ -25,83 +25,85 @@ EXTENDED DESCRIPTION (* CHARACTER CLASSES *) - _space = " " | "\n" | "\t"; - _alpha = <"a", "z"> | <"A", "Z">; - _octal = <"0", "7">; - _digit = <"0", "9">; - _xdigit = _digit | <"a", "f"> | <"A", "F">; - _nonascii = <128, 255>; + _space = " " | "\n" | "\t"; + _alpha = <"a", "z"> | <"A", "Z">; + _octal = <"0", "7">; + _digit = <"0", "9">; + _xdigit = _digit | <"a", "f"> | <"A", "F">; + _nonascii = <128, 255>; (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) - _comment_char = _space | !"*", !"\"", <"!", 0xFF>; - _comment_tail = [_comment_char], [_string], ("*)" | _comment_tail | -); - _comment = "(*", _comment_tail; + _comment_str_esc = "\\", (_space | <"!", 255>); + _comment_str_char = _space | !"\"", <"!", 255>; + _comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -); + _comment_char = _space | !"*)", !"\"", <"!", 0xFF>; + _comment = "(*", {_comment_char | _comment_str}, ("*)" | -); - _ = {_space | _comment}; + _ = {_space | _comment}; (* IDENTIFIERS *) - _identifier_head = _alpha | _digit | _nonascii | "_"; - _identifier_tail = _identifier_head | "-"; + _identifier_head = _alpha | _digit | _nonascii | "_"; + _identifier_tail = _identifier_head | "-"; - identifier = _identifier_head, {_identifier_tail}; + identifier = _identifier_head, {_identifier_tail}; (* STRINGS *) - _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; - _escape_hex = ("x" | "X"), _xdigit, _xdigit; - _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) - _escape = _escape_simple | _escape_hex | _escape_octal | -; - _character = "\\", _escape | !"\"", <" ", 0xFF>; - _string = "\"", _character, {_character}, ("\"" | -); + _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; + _escape_hex = ("x" | "X"), _xdigit, _xdigit; + _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) + _escape = _escape_simple | _escape_hex | _escape_octal | -; + _character = "\\", _escape | !"\"", <" ", 0xFF>; + _string = "\"", _character, {_character}, ("\"" | -); - string = _string - character = "\"", _character, ("\"" | -); + string = _string; + character = "\"", _character, ("\"" | -); (* INTEGERS *) - _decimal = _digit, {_digit}; - _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; + _decimal = _digit, {_digit}; + _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; - integer = _decimal | _hexadecimal; (* May not exceed 255. *) + integer = _hexadecimal | _decimal; (* May not exceed 255. *) (* GROUPINGS *) - _low = character | integer; - _high = character | integer; + _low = character | integer; + _high = character | integer; - nondeterministic = "?"; + nondeterministic = "?"; - committed = "+", _, _operand; - rejection = "!", _, _operand; - concatenation = _operand, {_, ",", _, _operand}; - alternation = concatenation, {_, [nondeterministic], "|", _, concatenation}; - optional = [nondeterministic], "[", _, _expression, _, "]"; - repeated = [nondeterministic], "{", _, _expression, _, "}"; - group = "(", _, _expression, _, ")"; - char-range = "<", _, _low, _, ",", _, _high, "_", ">"; - exception = "-"; - embedded-rule = identifier; + committed = "+", _, _operand; + rejection = "!", _, _operand; + concatenation = _operand, {_, ",", _, _operand}; + alternation = concatenation, {_, [nondeterministic], "|", _, concatenation}; + optional = [nondeterministic], "[", _, _expression, _, "]"; + repeated = [nondeterministic], "{", _, _expression, _, "}"; + group = "(", _, _expression, _, ")"; + char-range = "<", _, _low, _, ",", _, _high, _, ">"; + exception = "-"; + embedded-rule = identifier; - _literal = char-range | exception | string; - _group = optional | repeated | group | embedded-rule; - _operand = _group | _literal | rejection | committed; + _literal = char-range | exception | string; + _group = optional | repeated | group | embedded-rule; + _operand = _group | _literal | rejection | committed; - _expression = alternation; + _expression = alternation; (* RULES *) - rule = identifier, _, "=", _, _expression, _, ";"; + rule = identifier, _, "=", _, _expression, _, ";"; (* This is the root rule of the grammar. *) - grammar = _, {rules, _}; + grammar = _, {rule, _}; The file must be encoded in UTF-8, with LF as the line break (CR and FF are illegal just because). @@ -17,5 +17,3 @@ Add support for prelexed the application to take action on parsed rules and deallocate memory that is no longer needed after that. The hooks shall also be able to cause the parser to abort. - -Add tests diff --git a/libparser.7 b/libparser.7 index 174bac9..d06e5ab 100644 --- a/libparser.7 +++ b/libparser.7 @@ -37,83 +37,85 @@ input can be described in its own grammar: .nf (* CHARACTER CLASSES *) -_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq; -_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>; -_octal = <\(dq0\(dq, \(dq7\(dq>; -_digit = <\(dq0\(dq, \(dq9\(dq>; -_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>; -_nonascii = <128, 255>; +_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq; +_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>; +_octal = <\(dq0\(dq, \(dq7\(dq>; +_digit = <\(dq0\(dq, \(dq9\(dq>; +_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>; +_nonascii = <128, 255>; (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) -_comment_char = _space | !\(dq*\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; -_comment_tail = [_comment_char], [_string], (\(dq*)\(dq | [*], _comment_tail | -); -_comment = \(dq(*\(dq, _comment_tail; +_comment_str_esc = \(dq\e\(dq\(dq, (_space | <\(dq!\(dq, 0xFF>); +_comment_str_char = _space | !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; +_comment_str = \(dq\e\(dq\(dq, {_comment_str_esc | _comment_str_char}, (\(dq\e\(dq\(dq | -); +_comment_char = _space | !\(dq*)\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>; +_comment = \(dq(*\(dq, {_comment_char | _comment_str}, (\(dq*)\(dq | -); -_ = {_space | _comment}; +_ = {_space | _comment}; (* IDENTIFIERS *) -_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq; -_identifier_tail = _identifier_head | \(dq-\(dq; +_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq; +_identifier_tail = _identifier_head | \(dq-\(dq; -identifier = _identifier_head, {_identifier_tail}; +identifier = _identifier_head, {_identifier_tail}; (* STRINGS *) -_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq; -_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit; -_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) -_escape = _escape_simple | _escape_hex | _escape_octal | -; -_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>; -_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -); +_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq; +_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit; +_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *) +_escape = _escape_simple | _escape_hex | _escape_octal | -; +_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>; +_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -); -string = _string; -character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -); +string = _string; +character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -); (* INTEGERS *) -_decimal = _digit, {_digit}; -_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit}; +_decimal = _digit, {_digit}; +_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit}; -integer = _decimal | _hexadecimal; (* May not exceed 255. *) +integer = _hexadecimal | _decimal; (* May not exceed 255. *) (* GROUPINGS *) -_low = character | integer; -_high = character | integer; +_low = character | integer; +_high = character | integer; -nondeterministic = \(dq?\(dq; +nondeterministic = \(dq?\(dq; -committed = \(dq+\(dq, _, _operand; -rejection = \(dq!\(dq, _, _operand; -concatenation = _operand, {_, \(dq,\(dq, _, _operand}; -alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation}; -optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq; -repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq; -group = \(dq(\(dq, _, _expression, _, \(dq)\(dq; -char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq; -exception = \(dq-\(dq; -embedded-rule = identifier; +committed = \(dq+\(dq, _, _operand; +rejection = \(dq!\(dq, _, _operand; +concatenation = _operand, {_, \(dq,\(dq, _, _operand}; +alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation}; +optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq; +repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq; +group = \(dq(\(dq, _, _expression, _, \(dq)\(dq; +char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, _, \(dq>\(dq; +exception = \(dq-\(dq; +embedded-rule = identifier; -_literal = char-range | exception | string; -_group = optional | repeated | group | embedded-rule; -_operand = _group | _literal | rejection | committed; +_literal = char-range | exception | string; +_group = optional | repeated | group | embedded-rule; +_operand = _group | _literal | rejection | committed; -_expression = alternation; +_expression = alternation; (* RULES *) -rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq; +rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq; (* This is the root rule of the grammar. *) -grammar = _, {rules, _}; +grammar = _, {rule, _}; .fi .PP .RE diff --git a/print-syntax.c b/print-syntax.c index 6a00fb8..e4c202b 100644 --- a/print-syntax.c +++ b/print-syntax.c @@ -30,7 +30,7 @@ print_sentence(const union libparser_sentence *sentence, int indent) case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION: printf("("); print_sentence(sentence->binary.left, indent + 1); - printf(" ?| \n%*.s", indent + 1, ""); + printf(" ?|\n%*.s", indent + 1, ""); indent = print_sentence(sentence->binary.right, indent + 1); printf(")"); indent += 1; @@ -39,7 +39,7 @@ print_sentence(const union libparser_sentence *sentence, int indent) case LIBPARSER_SENTENCE_TYPE_ALTERNATION: printf("("); print_sentence(sentence->binary.left, indent + 1); - printf(" | \n%*.s", indent + 1, ""); + printf(" |\n%*.s", indent + 1, ""); indent = print_sentence(sentence->binary.right, indent + 1); printf(")"); indent += 1; |
