From f75673290768677c08135f6aada53298544a14f2 Mon Sep 17 00:00:00 2001
From: Mattias Andrée <m@maandree.se>
Date: Thu, 26 Feb 2026 17:23:41 +0100
Subject: cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mattias Andrée <m@maandree.se>
---
 README         | 88 ++++++++++++++++++++++++++++++----------------------------
 TODO           |  2 --
 libparser.7    | 88 ++++++++++++++++++++++++++++++----------------------------
 print-syntax.c |  4 +--
 4 files changed, 92 insertions(+), 90 deletions(-)

diff --git a/README b/README
index b5b833e..0183879 100644
--- a/README
+++ b/README
@@ -25,83 +25,85 @@ EXTENDED DESCRIPTION
 
 		(* CHARACTER CLASSES *)
 
-		_space           = " " | "\n" | "\t";
-		_alpha           = <"a", "z"> | <"A", "Z">;
-		_octal           = <"0", "7">;
-		_digit           = <"0", "9">;
-		_xdigit          = _digit | <"a", "f"> | <"A", "F">;
-		_nonascii        = <128, 255>;
+		_space            = " " | "\n" | "\t";
+		_alpha            = <"a", "z"> | <"A", "Z">;
+		_octal            = <"0", "7">;
+		_digit            = <"0", "9">;
+		_xdigit           = _digit | <"a", "f"> | <"A", "F">;
+		_nonascii         = <128, 255>;
 
 
 		(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
 
-		_comment_char    = _space | !"*", !"\"", <"!", 0xFF>;
-		_comment_tail    = [_comment_char], [_string], ("*)" | _comment_tail | -);
-		_comment         = "(*", _comment_tail;
+		_comment_str_esc  = "\\", (_space | <"!", 255>);
+		_comment_str_char = _space | !"\"", <"!", 255>;
+		_comment_str      =  "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
+		_comment_char     = _space | !"*)", !"\"", <"!", 0xFF>;
+		_comment          = "(*", {_comment_char | _comment_str}, ("*)" | -);
 
-		_                = {_space | _comment};
+		_                 = {_space | _comment};
 
 
 		(* IDENTIFIERS *)
 
-		_identifier_head = _alpha | _digit | _nonascii | "_";
-		_identifier_tail = _identifier_head | "-";
+		_identifier_head  = _alpha | _digit | _nonascii | "_";
+		_identifier_tail  = _identifier_head | "-";
 
-		identifier       = _identifier_head, {_identifier_tail};
+		identifier        = _identifier_head, {_identifier_tail};
 
 
 		(* STRINGS *)
 
-		_escape_simple   = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
-		_escape_hex      = ("x" | "X"), _xdigit, _xdigit;
-		_escape_octal    = _octal, {_octal}; (* May not exceed 255 in base 10 *)
-		_escape          = _escape_simple | _escape_hex | _escape_octal | -;
-		_character       = "\\", _escape | !"\"", <" ", 0xFF>;
-		_string          = "\"", _character, {_character}, ("\"" | -);
+		_escape_simple    = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
+		_escape_hex       = ("x" | "X"), _xdigit, _xdigit;
+		_escape_octal     = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+		_escape           = _escape_simple | _escape_hex | _escape_octal | -;
+		_character        = "\\", _escape | !"\"", <" ", 0xFF>;
+		_string           = "\"", _character, {_character}, ("\"" | -);
 
-		string           = _string
-		character        = "\"", _character, ("\"" | -);
+		string            = _string;
+		character         = "\"", _character, ("\"" | -);
 
 
 		(* INTEGERS *)
 
-		_decimal         = _digit, {_digit};
-		_hexadecimal     = "0", ("x" | "X"), _xdigit, {_xdigit};
+		_decimal          = _digit, {_digit};
+		_hexadecimal      = "0", ("x" | "X"), _xdigit, {_xdigit};
 
-		integer          = _decimal | _hexadecimal; (* May not exceed 255. *)
+		integer           = _hexadecimal | _decimal; (* May not exceed 255. *)
 
 
 		(* GROUPINGS *)
 
-		_low             = character | integer;
-		_high            = character | integer;
+		_low              = character | integer;
+		_high             = character | integer;
 
-		nondeterministic = "?";
+		nondeterministic  = "?";
 
-		committed        = "+", _, _operand;
-		rejection        = "!", _, _operand;
-		concatenation    = _operand, {_, ",", _, _operand};
-		alternation      = concatenation, {_, [nondeterministic], "|", _, concatenation};
-		optional         = [nondeterministic], "[", _, _expression, _, "]";
-		repeated         = [nondeterministic], "{", _, _expression, _, "}";
-		group            = "(", _, _expression, _, ")";
-		char-range       = "<", _, _low, _, ",", _, _high, "_", ">";
-		exception        = "-";
-		embedded-rule    = identifier;
+		committed         = "+", _, _operand;
+		rejection         = "!", _, _operand;
+		concatenation     = _operand, {_, ",", _, _operand};
+		alternation       = concatenation, {_, [nondeterministic], "|", _, concatenation};
+		optional          = [nondeterministic], "[", _, _expression, _, "]";
+		repeated          = [nondeterministic], "{", _, _expression, _, "}";
+		group             = "(", _, _expression, _, ")";
+		char-range        = "<", _, _low, _, ",", _, _high, _, ">";
+		exception         = "-";
+		embedded-rule     = identifier;
 
-		_literal         = char-range | exception | string;
-		_group           = optional | repeated | group | embedded-rule;
-		_operand         = _group | _literal | rejection | committed;
+		_literal          = char-range | exception | string;
+		_group            = optional | repeated | group | embedded-rule;
+		_operand          = _group | _literal | rejection | committed;
 
-		_expression      = alternation;
+		_expression       = alternation;
 
 
 		(* RULES *)
 
-		rule             = identifier, _, "=", _, _expression, _, ";";
+		rule              = identifier, _, "=", _, _expression, _, ";";
 
 		(* This is the root rule of the grammar. *)
-		grammar          = _, {rules, _};
+		grammar           = _, {rule, _};
 
 	The file must be encoded in UTF-8, with LF as the line
 	break (CR and FF are illegal just because).
diff --git a/TODO b/TODO
index 0989ca3..8f7c514 100644
--- a/TODO
+++ b/TODO
@@ -17,5 +17,3 @@ Add support for prelexed
 	the application to take action on parsed rules and
 	deallocate memory that is no longer needed after that.
 	The hooks shall also be able to cause the parser to abort.
-
-Add tests
diff --git a/libparser.7 b/libparser.7
index 174bac9..d06e5ab 100644
--- a/libparser.7
+++ b/libparser.7
@@ -37,83 +37,85 @@ input can be described in its own grammar:
 .nf
 (* CHARACTER CLASSES *)
 
-_space           = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
-_alpha           = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
-_octal           = <\(dq0\(dq, \(dq7\(dq>;
-_digit           = <\(dq0\(dq, \(dq9\(dq>;
-_xdigit          = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
-_nonascii        = <128, 255>;
+_space            = \(dq \(dq | \(dq\en\(dq | \(dq\et\(dq;
+_alpha            = <\(dqa\(dq, \(dqz\(dq> | <\(dqA\(dq, \(dqZ\(dq>;
+_octal            = <\(dq0\(dq, \(dq7\(dq>;
+_digit            = <\(dq0\(dq, \(dq9\(dq>;
+_xdigit           = _digit | <\(dqa\(dq, \(dqf\(dq> | <\(dqA\(dq, \(dqF\(dq>;
+_nonascii         = <128, 255>;
 
 
 (* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
 
-_comment_char    = _space | !\(dq*\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
-_comment_tail    = [_comment_char], [_string], (\(dq*)\(dq | [*], _comment_tail | -);
-_comment         = \(dq(*\(dq, _comment_tail;
+_comment_str_esc  = \(dq\e\(dq\(dq, (_space | <\(dq!\(dq, 0xFF>);
+_comment_str_char = _space | !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment_str      = \(dq\e\(dq\(dq, {_comment_str_esc | _comment_str_char}, (\(dq\e\(dq\(dq | -);
+_comment_char     = _space | !\(dq*)\(dq, !\(dq\e\(dq\(dq, <\(dq!\(dq, 0xFF>;
+_comment          = \(dq(*\(dq, {_comment_char | _comment_str}, (\(dq*)\(dq | -);
 
-_                = {_space | _comment};
+_                 = {_space | _comment};
 
 
 (* IDENTIFIERS *)
 
-_identifier_head = _alpha | _digit | _nonascii | \(dq_\(dq;
-_identifier_tail = _identifier_head | \(dq-\(dq;
+_identifier_head  = _alpha | _digit | _nonascii | \(dq_\(dq;
+_identifier_tail  = _identifier_head | \(dq-\(dq;
 
-identifier       = _identifier_head, {_identifier_tail};
+identifier        = _identifier_head, {_identifier_tail};
 
 
 (* STRINGS *)
 
-_escape_simple   = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
-_escape_hex      = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
-_escape_octal    = _octal, {_octal}; (* May not exceed 255 in base 10 *)
-_escape          = _escape_simple | _escape_hex | _escape_octal | -;
-_character       = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
-_string          = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
+_escape_simple    = \(dq\e\e\(dq | \(dq\e\(dq\(dq | \(dq'\(dq | \(dqa\(dq | \(dqb\(dq | \(dqf\(dq | \(dqn\(dq | \(dqr\(dq | \(dqt\(dq | \(dqv\(dq;
+_escape_hex       = (\(dqx\(dq | \(dqX\(dq), _xdigit, _xdigit;
+_escape_octal     = _octal, {_octal}; (* May not exceed 255 in base 10 *)
+_escape           = _escape_simple | _escape_hex | _escape_octal | -;
+_character        = \(dq\e\e\(dq, _escape | !\(dq\e\(dq\(dq, <\(dq \(dq, 0xFF>;
+_string           = \(dq\e\(dq\(dq, _character, {_character}, (\(dq\e\(dq\(dq | -);
 
-string           = _string;
-character        = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
+string            = _string;
+character         = \(dq\e\(dq\(dq, _character, (\(dq\e\(dq\(dq | -);
 
 
 (* INTEGERS *)
 
-_decimal         = _digit, {_digit};
-_hexadecimal     = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
+_decimal          = _digit, {_digit};
+_hexadecimal      = \(dq0\(dq, (\(dqx\(dq | \(dqX\(dq), _xdigit, {_xdigit};
 
-integer          = _decimal | _hexadecimal; (* May not exceed 255. *)
+integer           = _hexadecimal | _decimal; (* May not exceed 255. *)
 
 
 (* GROUPINGS *)
 
-_low             = character | integer;
-_high            = character | integer;
+_low              = character | integer;
+_high             = character | integer;
 
-nondeterministic = \(dq?\(dq;
+nondeterministic  = \(dq?\(dq;
 
-committed        = \(dq+\(dq, _, _operand;
-rejection        = \(dq!\(dq, _, _operand;
-concatenation    = _operand, {_, \(dq,\(dq, _, _operand};
-alternation      = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
-optional         = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
-repeated         = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
-group            = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
-char-range       = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq;
-exception        = \(dq-\(dq;
-embedded-rule    = identifier;
+committed         = \(dq+\(dq, _, _operand;
+rejection         = \(dq!\(dq, _, _operand;
+concatenation     = _operand, {_, \(dq,\(dq, _, _operand};
+alternation       = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
+optional          = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
+repeated          = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
+group             = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
+char-range        = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, _, \(dq>\(dq;
+exception         = \(dq-\(dq;
+embedded-rule     = identifier;
 
-_literal         = char-range | exception | string;
-_group           = optional | repeated | group | embedded-rule;
-_operand         = _group | _literal | rejection | committed;
+_literal          = char-range | exception | string;
+_group            = optional | repeated | group | embedded-rule;
+_operand          = _group | _literal | rejection | committed;
 
-_expression      = alternation;
+_expression       = alternation;
 
 
 (* RULES *)
 
-rule             = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
+rule              = identifier, _, \(dq=\(dq, _, _expression, _, \(dq;\(dq;
 
 (* This is the root rule of the grammar. *)
-grammar          = _, {rules, _};
+grammar           = _, {rule, _};
 .fi
 .PP
 .RE
diff --git a/print-syntax.c b/print-syntax.c
index 6a00fb8..e4c202b 100644
--- a/print-syntax.c
+++ b/print-syntax.c
@@ -30,7 +30,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
 		printf("(");
 		print_sentence(sentence->binary.left, indent + 1);
-		printf(" ?| \n%*.s", indent + 1, "");
+		printf(" ?|\n%*.s", indent + 1, "");
 		indent = print_sentence(sentence->binary.right, indent + 1);
 		printf(")");
 		indent += 1;
@@ -39,7 +39,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 		printf("(");
 		print_sentence(sentence->binary.left, indent + 1);
-		printf(" | \n%*.s", indent + 1, "");
+		printf(" |\n%*.s", indent + 1, "");
 		indent = print_sentence(sentence->binary.right, indent + 1);
 		printf(")");
 		indent += 1;
-- 
cgit v1.3.1