cleanup

Signed-off-by: Mattias Andrée <m@maandree.se>
author: Mattias Andrée <m@maandree.se> 2026-02-26 17:23:41 +0100
committer: Mattias Andrée <m@maandree.se> 2026-02-26 17:23:41 +0100
commit: f75673290768677c08135f6aada53298544a14f2 (patch)
tree: 6b2fb2cbf4f380c400fcd4fb3ed5a594f2b400a9
parent: Add extras/libparser-syntax-highlighter (diff)
download: libparser-f75673290768677c08135f6aada53298544a14f2.tar.gz
libparser-f75673290768677c08135f6aada53298544a14f2.tar.bz2
libparser-f75673290768677c08135f6aada53298544a14f2.tar.xz
4 files changed, 92 insertions, 90 deletions
diff --git a/README b/README
index b5b833e..0183879 100644
--- a/README
+++ b/README
@@ -25,83 +25,85 @@ EXTENDED DESCRIPTION
 
 		(* CHARACTER CLASSES *)
 
-		_space           = " " | "\n" | "\t";
-		_alpha           = <"a", "z"> | <"A", "Z">;
-		_octal           = <"0", "7">;
-		_digit           = <"0", "9">;
-		_xdigit          = _digit | <"a", "f"> | <"A", "F">;
-		_nonascii        = <128, 255>;
+		_space            = " " | "\n" | "\t";
+		_alpha            = <"a", "z"> | <"A", "Z">;
+		_octal            = <"0", "7">;
+		_digit            = <"0", "9">;
+		_xdigit           = _digit | <"a", "f"> | <"A", "F">;
+		_nonascii         = <128, 255>;
 
 
 		(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
 
-		_comment_char    = _space | !"*", !"\"", <"!", 0xFF>;
-		_comment_tail    = [_comment_char], [_string], ("*)" | _comment_tail | -);
-		_comment         = "(*", _comment_tail;
+		_comment_str_esc  = "\\", (_space | <"!", 255>);
+		_comment_str_char = _space | !"\"", <"!", 255>;
+		_comment_str      =  "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
+		_comment_char     = _space | !"*)", !"\"", <"!", 0xFF>;
+		_comment          = "(*", {_comment_char | _comment_str}, ("*)" | -);
 
-		_                = {_space | _comment};
+		_                 = {_space | _comment};
 
 
 		(* IDENTIFIERS *)
 
-		_identifier_head = _alpha | _digit | _nonascii | "_";
-		_identifier_tail = _identifier_head | "-";
+		_identifier_head  = _alpha | _digit | _nonascii | "_";
+		_identifier_tail  = _identifier_head | "-";
 
-		identifier       = _identifier_head, {_identifier_tail};
+		identifier        = _identifier_head, {_identifier_tail};
 
 
 		(* STRINGS *)
 
-		_escape_simple   = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
-		_escape_hex      = ("x" | "X"), _xdigit, _xdigit;
-		_escape_octal    = _octal, {_octal}; (* May not exceed 255 in base 10 *)
-		_escape          = _escape_simple | _escape_hex | _escape_octal | -;
-		_character       = "\\", _escape | !"\"", <" ", 0xFF>;
-		_string          = "\"", _character, {_character}, ("\"" | -);
+		_escape_simple    = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
+		_escape_hex       = ("x" | "X"), _xdigit, _xdigit;
+		_escape_octal     = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+		_escape           = _escape_simple | _escape_hex | _escape_octal | -;
+		_character        = "\\", _escape | !"\"", <" ", 0xFF>;
+		_string           = "\"", _character, {_character}, ("\"" | -);
 
-		string           = _string
-		character        = "\"", _character, ("\"" | -);
+		string            = _string;
+		character         = "\"", _character, ("\"" | -);
 
 
 		(* INTEGERS *)
 
-		_decimal         = _digit, {_digit};
-		_hexadecimal     = "0", ("x" | "X"), _xdigit, {_xdigit};
+		_decimal          = _digit, {_digit};
+		_hexadecimal      = "0", ("x" | "X"), _xdigit, {_xdigit};
 
-		integer          = _decimal | _hexadecimal; (* May not exceed 255. *)
+		integer           = _hexadecimal | _decimal; (* May not exceed 255. *)
 
 
 		(* GROUPINGS *)
 
-		_low             = character | integer;
-		_high            = character | integer;
+		_low              = character | integer;
+		_high             = character | integer;
 
-		nondeterministic = "?";
+		nondeterministic  = "?";
 
-		committed        = "+", _, _operand;
-		rejection        = "!", _, _operand;
-		concatenation    = _operand, {_, ",", _, _operand};
-		alternation      = concatenation, {_, [nondeterministic], "|", _, concatenation};
-		optional         = [nondeterministic], "[", _, _expression, _, "]";
-		repeated         = [nondeterministic], "{", _, _expression, _, "}";
-		group            = "(", _, _expression, _, ")";
-		char-range       = "<", _, _low, _, ",", _, _high, "_", ">";
-		exception        = "-";
-		embedded-rule    = identifier;
+		committed         = "+", _, _operand;
+		rejection         = "!", _, _operand;
+		concatenation     = _operand, {_, ",", _, _operand};
+		alternation       = concatenation, {_, [nondeterministic], "|", _, concatenation};
+		optional          = [nondeterministic], "[", _, _expression, _, "]";
+		repeated          = [nondeterministic], "{", _, _expression, _, "}";
+		group             = "(", _, _expression, _, ")";
+		char-range        = "<", _, _low, _, ",", _, _high, _, ">";
+		exception         = "-";
+		embedded-rule     = identifier;
 
-		_literal         = char-range | exception | string;
-		_group           = optional | repeated | group | embedded-rule;
-		_operand         = _group | _literal | rejection | committed;
+		_literal          = char-range | exception | string;
+		_group            = optional | repeated | group | embedded-rule;
+		_operand          = _group | _literal | rejection | committed;
 
-		_expression      = alternation;
+		_expression       = alternation;
 
 
 		(* RULES *)
 
-		rule             = identifier, _, "=", _, _expression, _, ";";
+		rule              = identifier, _, "=", _, _expression, _, ";";
 
 		(* This is the root rule of the grammar. *)
-		grammar          = _, {rules, _};
+		grammar           = _, {rule, _};
 
 	The file must be encoded in UTF-8, with LF as the line
 	break (CR and FF are illegal just because).
diff --git a/TODO b/TODO
index 0989ca3..8f7c514 100644
--- a/TODO
+++ b/TODO
@@ -17,5 +17,3 @@ Add support for prelexed
 	the application to take action on parsed rules and
 	deallocate memory that is no longer needed after that.
 	The hooks shall also be able to cause the parser to abort.
-
-Add tests
diff --git a/libparser.7 b/libparser.7
index 174bac9..d06e5ab 100644
--- a/libparser.7
+++ b/libparser.7
@@ -37,83 +37,85 @@ input can be described in its own grammar:
 .nf
 (* CHARACTER CLASSES *)
 
-_space           = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
-_alpha           = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
-_octal           = <\(dq0\(dq, \(dq7\(dq>;
-_digit           = <\(dq0\(dq, \(dq9\(dq>;
-_xdigit          = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
-_nonascii        = <128, 255>;
+_space            = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
+_alpha            = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
+_octal            = <\(dq0\(dq, \(dq7\(dq>;
+_digit            = <\(dq0\(dq, \(dq9\(dq>;
+_xdigit           = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
+_nonascii         = <128, 255>;
 
 
 (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
 
-_comment_char    = _space | !\(dq*\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
-_comment_tail    = [_comment_char], [_string], (\(dq*)\(dq | [*], _comment_tail | -);
-_comment         = \(dq(*\(dq, _comment_tail;
+_comment_str_esc  = \(dq\e\(dq\(dq, (_space | <\(dq!\(dq, 0xFF>);
+_comment_str_char = _space | !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment_str      = \(dq\e\(dq\(dq, {_comment_str_esc | _comment_str_char}, (\(dq\e\(dq\(dq | -);
+_comment_char     = _space | !\(dq*)\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment          = \(dq(*\(dq, {_comment_char | _comment_str}, (\(dq*)\(dq | -);
 
-_                = {_space | _comment};
+_                 = {_space | _comment};
 
 
 (* IDENTIFIERS *)
 
-_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq;
-_identifier_tail = _identifier_head | \(dq-\(dq;
+_identifier_head  = _alpha | _digit | _nonascii | \(dq_\(dq;
+_identifier_tail  = _identifier_head | \(dq-\(dq;
 
-identifier       = _identifier_head, {_identifier_tail};
+identifier        = _identifier_head, {_identifier_tail};
 
 
 (* STRINGS *)
 
-_escape_simple   = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
-_escape_hex      = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
-_escape_octal    = _octal, {_octal}; (* May not exceed 255 in base 10 *)
-_escape          = _escape_simple | _escape_hex | _escape_octal | -;
-_character       = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
-_string          = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
+_escape_simple    = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
+_escape_hex       = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
+_escape_octal     = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+_escape           = _escape_simple | _escape_hex | _escape_octal | -;
+_character        = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
+_string           = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
 
-string           = _string;
-character        = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
+string            = _string;
+character         = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
 
 
 (* INTEGERS *)
 
-_decimal         = _digit, {_digit};
-_hexadecimal     = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
+_decimal          = _digit, {_digit};
+_hexadecimal      = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
 
-integer          = _decimal | _hexadecimal; (* May not exceed 255. *)
+integer           = _hexadecimal | _decimal; (* May not exceed 255. *)
 
 
 (* GROUPINGS *)
 
-_low             = character | integer;
-_high            = character | integer;
+_low              = character | integer;
+_high             = character | integer;
 
-nondeterministic = \(dq?\(dq;
+nondeterministic  = \(dq?\(dq;
 
-committed        = \(dq+\(dq, _, _operand;
-rejection        = \(dq!\(dq, _, _operand;
-concatenation    = _operand, {_, \(dq,\(dq, _, _operand};
-alternation      = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
-optional         = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
-repeated         = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
-group            = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
-char-range       = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq;
-exception        = \(dq-\(dq;
-embedded-rule    = identifier;
+committed         = \(dq+\(dq, _, _operand;
+rejection         = \(dq!\(dq, _, _operand;
+concatenation     = _operand, {_, \(dq,\(dq, _, _operand};
+alternation       = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
+optional          = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
+repeated          = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
+group             = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
+char-range        = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, _, \(dq>\(dq;
+exception         = \(dq-\(dq;
+embedded-rule     = identifier;
 
-_literal         = char-range | exception | string;
-_group           = optional | repeated | group | embedded-rule;
-_operand         = _group | _literal | rejection | committed;
+_literal          = char-range | exception | string;
+_group            = optional | repeated | group | embedded-rule;
+_operand          = _group | _literal | rejection | committed;
 
-_expression      = alternation;
+_expression       = alternation;
 
 
 (* RULES *)
 
-rule             = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
+rule              = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
 
 (* This is the root rule of the grammar. *)
-grammar          = _, {rules, _};
+grammar           = _, {rule, _};
 .fi
 .PP
 .RE
diff --git a/print-syntax.c b/print-syntax.c
index 6a00fb8..e4c202b 100644
--- a/print-syntax.c
+++ b/print-syntax.c
@@ -30,7 +30,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
 		printf("(");
 		print_sentence(sentence->binary.left, indent + 1);
-		printf(" ?| \n%*.s", indent + 1, "");
+		printf(" ?|\n%*.s", indent + 1, "");
 		indent = print_sentence(sentence->binary.right, indent + 1);
 		printf(")");
 		indent += 1;
@@ -39,7 +39,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 		printf("(");
 		print_sentence(sentence->binary.left, indent + 1);
-		printf(" | \n%*.s", indent + 1, "");
+		printf(" |\n%*.s", indent + 1, "");
 		indent = print_sentence(sentence->binary.right, indent + 1);
 		printf(")");
 		indent += 1;
author	Mattias Andrée <m@maandree.se>	2026-02-26 17:23:41 +0100
committer	Mattias Andrée <m@maandree.se>	2026-02-26 17:23:41 +0100
commit	f75673290768677c08135f6aada53298544a14f2 (patch)
tree	6b2fb2cbf4f380c400fcd4fb3ed5a594f2b400a9
parent	Add extras/libparser-syntax-highlighter (diff)
download	libparser-f75673290768677c08135f6aada53298544a14f2.tar.gz libparser-f75673290768677c08135f6aada53298544a14f2.tar.bz2 libparser-f75673290768677c08135f6aada53298544a14f2.tar.xz