aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-02-26 17:23:41 +0100
committerMattias Andrée <m@maandree.se>2026-02-26 17:23:41 +0100
commitf75673290768677c08135f6aada53298544a14f2 (patch)
tree6b2fb2cbf4f380c400fcd4fb3ed5a594f2b400a9
parentAdd extras/libparser-syntax-highlighter (diff)
downloadlibparser-f75673290768677c08135f6aada53298544a14f2.tar.gz
libparser-f75673290768677c08135f6aada53298544a14f2.tar.bz2
libparser-f75673290768677c08135f6aada53298544a14f2.tar.xz
cleanup
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to '')
-rw-r--r--README88
-rw-r--r--TODO2
-rw-r--r--libparser.788
-rw-r--r--print-syntax.c4
4 files changed, 92 insertions, 90 deletions
diff --git a/README b/README
index b5b833e..0183879 100644
--- a/README
+++ b/README
@@ -25,83 +25,85 @@ EXTENDED DESCRIPTION
(* CHARACTER CLASSES *)
- _space = " " | "\n" | "\t";
- _alpha = <"a", "z"> | <"A", "Z">;
- _octal = <"0", "7">;
- _digit = <"0", "9">;
- _xdigit = _digit | <"a", "f"> | <"A", "F">;
- _nonascii = <128, 255>;
+ _space = " " | "\n" | "\t";
+ _alpha = <"a", "z"> | <"A", "Z">;
+ _octal = <"0", "7">;
+ _digit = <"0", "9">;
+ _xdigit = _digit | <"a", "f"> | <"A", "F">;
+ _nonascii = <128, 255>;
(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
- _comment_char = _space | !"*", !"\"", <"!", 0xFF>;
- _comment_tail = [_comment_char], [_string], ("*)" | _comment_tail | -);
- _comment = "(*", _comment_tail;
+ _comment_str_esc = "\\", (_space | <"!", 255>);
+ _comment_str_char = _space | !"\"", <"!", 255>;
+ _comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
+ _comment_char = _space | !"*)", !"\"", <"!", 0xFF>;
+ _comment = "(*", {_comment_char | _comment_str}, ("*)" | -);
- _ = {_space | _comment};
+ _ = {_space | _comment};
(* IDENTIFIERS *)
- _identifier_head = _alpha | _digit | _nonascii | "_";
- _identifier_tail = _identifier_head | "-";
+ _identifier_head = _alpha | _digit | _nonascii | "_";
+ _identifier_tail = _identifier_head | "-";
- identifier = _identifier_head, {_identifier_tail};
+ identifier = _identifier_head, {_identifier_tail};
(* STRINGS *)
- _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
- _escape_hex = ("x" | "X"), _xdigit, _xdigit;
- _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *)
- _escape = _escape_simple | _escape_hex | _escape_octal | -;
- _character = "\\", _escape | !"\"", <" ", 0xFF>;
- _string = "\"", _character, {_character}, ("\"" | -);
+ _escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
+ _escape_hex = ("x" | "X"), _xdigit, _xdigit;
+ _escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+ _escape = _escape_simple | _escape_hex | _escape_octal | -;
+ _character = "\\", _escape | !"\"", <" ", 0xFF>;
+ _string = "\"", _character, {_character}, ("\"" | -);
- string = _string
- character = "\"", _character, ("\"" | -);
+ string = _string;
+ character = "\"", _character, ("\"" | -);
(* INTEGERS *)
- _decimal = _digit, {_digit};
- _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit};
+ _decimal = _digit, {_digit};
+ _hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit};
- integer = _decimal | _hexadecimal; (* May not exceed 255. *)
+ integer = _hexadecimal | _decimal; (* May not exceed 255. *)
(* GROUPINGS *)
- _low = character | integer;
- _high = character | integer;
+ _low = character | integer;
+ _high = character | integer;
- nondeterministic = "?";
+ nondeterministic = "?";
- committed = "+", _, _operand;
- rejection = "!", _, _operand;
- concatenation = _operand, {_, ",", _, _operand};
- alternation = concatenation, {_, [nondeterministic], "|", _, concatenation};
- optional = [nondeterministic], "[", _, _expression, _, "]";
- repeated = [nondeterministic], "{", _, _expression, _, "}";
- group = "(", _, _expression, _, ")";
- char-range = "<", _, _low, _, ",", _, _high, "_", ">";
- exception = "-";
- embedded-rule = identifier;
+ committed = "+", _, _operand;
+ rejection = "!", _, _operand;
+ concatenation = _operand, {_, ",", _, _operand};
+ alternation = concatenation, {_, [nondeterministic], "|", _, concatenation};
+ optional = [nondeterministic], "[", _, _expression, _, "]";
+ repeated = [nondeterministic], "{", _, _expression, _, "}";
+ group = "(", _, _expression, _, ")";
+ char-range = "<", _, _low, _, ",", _, _high, _, ">";
+ exception = "-";
+ embedded-rule = identifier;
- _literal = char-range | exception | string;
- _group = optional | repeated | group | embedded-rule;
- _operand = _group | _literal | rejection | committed;
+ _literal = char-range | exception | string;
+ _group = optional | repeated | group | embedded-rule;
+ _operand = _group | _literal | rejection | committed;
- _expression = alternation;
+ _expression = alternation;
(* RULES *)
- rule = identifier, _, "=", _, _expression, _, ";";
+ rule = identifier, _, "=", _, _expression, _, ";";
(* This is the root rule of the grammar. *)
- grammar = _, {rules, _};
+ grammar = _, {rule, _};
The file must be encoded in UTF-8, with LF as the line
break (CR and FF are illegal just because).
diff --git a/TODO b/TODO
index 0989ca3..8f7c514 100644
--- a/TODO
+++ b/TODO
@@ -17,5 +17,3 @@ Add support for prelexed
the application to take action on parsed rules and
deallocate memory that is no longer needed after that.
The hooks shall also be able to cause the parser to abort.
-
-Add tests
diff --git a/libparser.7 b/libparser.7
index 174bac9..d06e5ab 100644
--- a/libparser.7
+++ b/libparser.7
@@ -37,83 +37,85 @@ input can be described in its own grammar:
.nf
(* CHARACTER CLASSES *)
-_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
-_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
-_octal = <\(dq0\(dq, \(dq7\(dq>;
-_digit = <\(dq0\(dq, \(dq9\(dq>;
-_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
-_nonascii = <128, 255>;
+_space = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
+_alpha = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
+_octal = <\(dq0\(dq, \(dq7\(dq>;
+_digit = <\(dq0\(dq, \(dq9\(dq>;
+_xdigit = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
+_nonascii = <128, 255>;
(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
-_comment_char = _space | !\(dq*\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
-_comment_tail = [_comment_char], [_string], (\(dq*)\(dq | [*], _comment_tail | -);
-_comment = \(dq(*\(dq, _comment_tail;
+_comment_str_esc = \(dq\e\(dq\(dq, (_space | <\(dq!\(dq, 0xFF>);
+_comment_str_char = _space | !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment_str = \(dq\e\(dq\(dq, {_comment_str_esc | _comment_str_char}, (\(dq\e\(dq\(dq | -);
+_comment_char = _space | !\(dq*)\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment = \(dq(*\(dq, {_comment_char | _comment_str}, (\(dq*)\(dq | -);
-_ = {_space | _comment};
+_ = {_space | _comment};
(* IDENTIFIERS *)
-_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq;
-_identifier_tail = _identifier_head | \(dq-\(dq;
+_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq;
+_identifier_tail = _identifier_head | \(dq-\(dq;
-identifier = _identifier_head, {_identifier_tail};
+identifier = _identifier_head, {_identifier_tail};
(* STRINGS *)
-_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
-_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
-_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *)
-_escape = _escape_simple | _escape_hex | _escape_octal | -;
-_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
-_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
+_escape_simple = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
+_escape_hex = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
+_escape_octal = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+_escape = _escape_simple | _escape_hex | _escape_octal | -;
+_character = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
+_string = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
-string = _string;
-character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
+string = _string;
+character = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
(* INTEGERS *)
-_decimal = _digit, {_digit};
-_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
+_decimal = _digit, {_digit};
+_hexadecimal = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
-integer = _decimal | _hexadecimal; (* May not exceed 255. *)
+integer = _hexadecimal | _decimal; (* May not exceed 255. *)
(* GROUPINGS *)
-_low = character | integer;
-_high = character | integer;
+_low = character | integer;
+_high = character | integer;
-nondeterministic = \(dq?\(dq;
+nondeterministic = \(dq?\(dq;
-committed = \(dq+\(dq, _, _operand;
-rejection = \(dq!\(dq, _, _operand;
-concatenation = _operand, {_, \(dq,\(dq, _, _operand};
-alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
-optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
-repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
-group = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
-char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq;
-exception = \(dq-\(dq;
-embedded-rule = identifier;
+committed = \(dq+\(dq, _, _operand;
+rejection = \(dq!\(dq, _, _operand;
+concatenation = _operand, {_, \(dq,\(dq, _, _operand};
+alternation = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
+optional = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
+repeated = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
+group = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
+char-range = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, _, \(dq>\(dq;
+exception = \(dq-\(dq;
+embedded-rule = identifier;
-_literal = char-range | exception | string;
-_group = optional | repeated | group | embedded-rule;
-_operand = _group | _literal | rejection | committed;
+_literal = char-range | exception | string;
+_group = optional | repeated | group | embedded-rule;
+_operand = _group | _literal | rejection | committed;
-_expression = alternation;
+_expression = alternation;
(* RULES *)
-rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
+rule = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
(* This is the root rule of the grammar. *)
-grammar = _, {rules, _};
+grammar = _, {rule, _};
.fi
.PP
.RE
diff --git a/print-syntax.c b/print-syntax.c
index 6a00fb8..e4c202b 100644
--- a/print-syntax.c
+++ b/print-syntax.c
@@ -30,7 +30,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
printf("(");
print_sentence(sentence->binary.left, indent + 1);
- printf(" ?| \n%*.s", indent + 1, "");
+ printf(" ?|\n%*.s", indent + 1, "");
indent = print_sentence(sentence->binary.right, indent + 1);
printf(")");
indent += 1;
@@ -39,7 +39,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
printf("(");
print_sentence(sentence->binary.left, indent + 1);
- printf(" | \n%*.s", indent + 1, "");
+ printf(" |\n%*.s", indent + 1, "");
indent = print_sentence(sentence->binary.right, indent + 1);
printf(")");
indent += 1;