Improve style, add comments, and fix so that all unused and undefined rules are listed

Signed-off-by: Mattias Andrée <m@maandree.se>
author: Mattias Andrée <m@maandree.se> 2026-01-03 03:25:13 +0100
committer: Mattias Andrée <m@maandree.se> 2026-01-03 03:25:13 +0100
commit: 6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b (patch)
tree: 1a547ae55c9ec9fd6b9dc2cd5679dad072b99b2a
parent: Fixes to print-syntax.c (diff)
download: libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.gz
libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.bz2
libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.xz
1 files changed, 240 insertions, 100 deletions
diff --git a/libparser-generate.c b/libparser-generate.c
index 3f544c3..a9a3d96 100644
--- a/libparser-generate.c
+++ b/libparser-generate.c
@@ -19,29 +19,37 @@ usage(void)
 }
 
 
-#define eprintf(...) (fprintf(stderr, __VA_ARGS__), exit(1))
+#define weprintf(...) (fprintf(stderr, __VA_ARGS__))
+#define eprintf(...) (weprintf(__VA_ARGS__), exit(1))
 
 
 struct token {
+	/* text position in file { */
 	size_t lineno;
 	size_t column;
 	size_t character;
-	char s[];
+	/* } */
+	char s[]; /* the text */
 };
 
 struct node {
-	struct token *token;
-	struct node *parent;
-	struct node *next;
-	struct node *data;
-	struct node **head;
+	struct token *token; /* node text */
+	struct node *parent; /* parent node in tree */
+	struct node *next; /* next element in list */
+	struct node *data; /* beginning of subsentence */
+	struct node **head; /* end of subsentence */
 };
 
 
+/* declared rules, used to detect duplicates,
+ * and compared with `want_rules` to detected
+ * unused rules */
 static char **rule_names = NULL;
 static size_t nrule_names = 0;
 static size_t rule_names_size = 0;
 
+/* used rules, compared with `rule_names`
+ * to detect used but undefined rules */
 static char **want_rules = NULL;
 static size_t nwant_rules = 0;
 static size_t want_rules_size = 0;
@@ -119,13 +127,13 @@ check_utf8(char *buf, size_t *ip, size_t len)
 	uint32_t cp;
 	if ((buf[*ip] & 0xE0) == 0xC0) {
 		cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xC0);
-		req = 2;
+		req = 2u;
 	} else if ((buf[*ip] & 0xF0) == 0xE0) {
 		cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xE0);
-		req = 3;
+		req = 3u;
 	} else if ((buf[*ip] & 0xF8) == 0xF0) {
 		cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xF0);
-		req = 4;
+		req = 4u;
 	} else {
 		return 0;
 	}
@@ -143,11 +151,11 @@ check_utf8(char *buf, size_t *ip, size_t len)
 	if (cp < (uint32_t)1 << (7 + 0 * 6))
 		return 0;
 	if (cp < (uint32_t)1 << (5 + 1 * 6))
-		return req == 2;
+		return req == 2u;
 	if (cp < (uint32_t)1 << (4 + 2 * 6))
-		return req == 3;
+		return req == 3u;
 	if (cp <= UINT32_C(0x10FFFF))
-		return req == 4;
+		return req == 4u;
 	return 0;
 }
 
@@ -171,14 +179,14 @@ readall_and_validate(int fd, const char *fname)
 		}
 	}
 
-	for (i = 0; i < len; i++) {
+	for (i = 0; i < len;) {
 		if (buf[i] == '\n') {
-			lineno += 1;
+			lineno += 1u;
 			column = 0;
 			character = 0;
 		} else if (buf[i] == '\t') {
-			column += 8 - column % 8;
-			character += 1;
+			column += 8u - column % 8u;
+			character += 1u;
 		} else if (buf[i] == '\r') {
 			eprintf("%s: %s contains a CR character on line %zu at column %zu (character %zu)\n",
 			        argv0, fname, lineno, column, character);
@@ -189,8 +197,8 @@ readall_and_validate(int fd, const char *fname)
 			eprintf("%s: %s contains a NUL byte on line %zu at column %zu (character %zu)\n",
 			        argv0, fname, lineno, column, character);
 		} else if (!(buf[i] & 0x80)) {
-			character += 1;
-			column += 1;
+			character += 1u;
+			column += 1u;
 		} else if ((buf[i] & 0xC0) == 0x80) {
 			eprintf("%s: %s contains a illegal byte on line %zu at column %zu (character %zu)\n",
 			        argv0, fname, lineno, column, character);
@@ -199,13 +207,14 @@ readall_and_validate(int fd, const char *fname)
 				eprintf("%s: %s contains a illegal byte sequence on line %zu at column %zu (character %zu)\n",
 				        argv0, fname, lineno, column, character);
 			}
-			i--;
-			character += 1;
-			column += 1;
+			character += 1u;
+			column += 1u;
+			continue;
 		}
+		i++;
 	}
 
-	buf = erealloc(buf, len + 1);
+	buf = erealloc(buf, len + 1u);
 	buf[len] = '\0';
 
 	return buf;
@@ -237,7 +246,7 @@ tokenise(const char *data)
 			token_column = column;
 			token_character = character;
 			if (token_len == token_size)
-				token = erealloc(token, token_size += 16);
+				token = erealloc(token, token_size += 16u);
 			token[token_len++] = data[i];
 			if (isidentifier(data[i])) {
 				state = IDENTIFIER;
@@ -252,10 +261,10 @@ tokenise(const char *data)
 			} else {
 			add_token:
 				if (token_len == token_size)
-					token = erealloc(token, token_size += 16);
+					token = erealloc(token, token_size += 16u);
 				token[token_len++] = '\0';
 				if (ntokens == tokens_size)
-					tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens));
+					tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens));
 				tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len);
 				tokens[ntokens]->lineno = token_lineno;
 				tokens[ntokens]->column = token_column;
@@ -270,15 +279,15 @@ tokenise(const char *data)
 			if (isidentifier(data[i]) || data[i] == '-') {
 			add_char:
 				if (token_len == token_size)
-					token = erealloc(token, token_size += 16);
+					token = erealloc(token, token_size += 16u);
 				token[token_len++] = data[i];
 			} else {
 			add_token_and_do_again:
 				if (token_len == token_size)
-					token = erealloc(token, token_size += 16);
+					token = erealloc(token, token_size += 16u);
 				token[token_len++] = '\0';
 				if (ntokens == tokens_size)
-					tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens));
+					tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens));
 				tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len);
 				tokens[ntokens]->lineno = token_lineno;
 				tokens[ntokens]->column = token_column;
@@ -310,7 +319,7 @@ tokenise(const char *data)
 				        argv0, lineno, column, character);
 			}
 			if (token_len == token_size)
-				token = erealloc(token, token_size += 16);
+				token = erealloc(token, token_size += 16u);
 			token[token_len++] = data[i];
 			state = STRING;
 			break;
@@ -327,24 +336,25 @@ tokenise(const char *data)
 		};
 
 		if (data[i] == '\n') {
-			lineno += 1;
+			lineno += 1u;
 			column = 0;
 			character = 0;
 		} else if (data[i] == '\t') {
-			column += 8 - column % 8;
-			character += 1;
+			column += 8u - column % 8u;
+			character += 1u;
 		} else {
-			character += (data[i] & 0xC0) != 0x80;
-			column += 1;
+			character += (size_t)((data[i] & 0xC0) != 0x80);
+			column += 1u;
 		}
 	}
 	if (state != NEW_TOKEN && state != SPACE)
 		eprintf("%s: premature end of file\n", argv0);
 
-	tokens = ereallocarray(tokens, ntokens + 1, sizeof(*tokens));
-	tokens[ntokens] = NULL;
 	free(token);
 
+	tokens = ereallocarray(tokens, ntokens + 1u, sizeof(*tokens));
+	tokens[ntokens] = NULL;
+
 	return tokens;
 }
 
@@ -354,6 +364,7 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 {
 	size_t index = (*indexp)++, left, right;
 	struct node *next, *low, *high;
+	const char *type;
 
 	for (; node->token->s[0] == '('; node = next) {
 		next = node->data;
@@ -361,14 +372,19 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 		free(node);
 	}
 
-	if (node->token->s[0] == '[' || node->token->s[0] == '{' || node->token->s[0] == '!') {
+	switch (node->token->s[0]) {
+	case '[': type = "OPTIONAL"; goto unary;
+	case '{': type = "REPEATED"; goto unary;
+	case '!': type = "REJECTION"; unary:
 		emit_and_free_sentence(node->data, indexp);
 		printf("static union libparser_sentence sentence_%zu_%zu = {.unary = {"
 		           ".type = LIBPARSER_SENTENCE_TYPE_%s, .sentence = &sentence_%zu_%zu"
 		       "}};\n",
-		       nrule_names, index, node->token->s[0] == '[' ? "OPTIONAL" :
-		                           node->token->s[0] == '{' ? "REPEATED" : "REJECTION", nrule_names, index + 1);
-	} else if (node->token->s[0] == '<') {
+		       nrule_names, index,
+		       type, nrule_names, index + 1u);
+		break;
+
+	case '<':
 		low = node->data;
 		high = node->data->next;
 		if ((unsigned char)low->token->s[0] > (unsigned char)high->token->s[0]) {
@@ -380,12 +396,16 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 		printf("static union libparser_sentence sentence_%zu_%zu = {.char_range = {"
 		           ".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhu, .high = %hhu"
 		       "}};\n",
-		       nrule_names, index, (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]);
+		       nrule_names, index,
+		       (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]);
 		free(low->token);
 		free(high->token);
 		free(low);
 		free(high);
-	} else if (node->token->s[0] == '|' || node->token->s[0] == ',') {
+		break;
+
+	case '|': type = "ALTERNATION"; goto binary;
+	case ',': type = "CONCATENATION"; binary:
 		right = *indexp;
 		emit_and_free_sentence(node->data->next, indexp);
 		left = *indexp;
@@ -394,25 +414,35 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 		           ".type = LIBPARSER_SENTENCE_TYPE_%s, "
 		           ".left = &sentence_%zu_%zu, .right = &sentence_%zu_%zu"
 		       "}};\n",
-		       nrule_names, index, node->token->s[0] == '|' ? "ALTERNATION" : "CONCATENATION",
+		       nrule_names, index,
+		       type,
 		       nrule_names, left, nrule_names, right);
-	} else if (node->token->s[0] == '"') {
+		break;
+
+	case '"':
 		printf("static union libparser_sentence sentence_%zu_%zu = {.string = {"
 		           ".type = LIBPARSER_SENTENCE_TYPE_STRING, "
-		           ".string = %s\", .length = sizeof(%s\") - 1"
+		           ".string = %s\", .length = sizeof(%s\") - 1U"
 		       "}};\n",
-		       nrule_names, index, node->token->s, node->token->s);
-	} else if (node->token->s[0] == '-') {
+		       nrule_names, index,
+		       node->token->s, node->token->s);
+		break;
+
+	case '-':
 		printf("static union libparser_sentence sentence_%zu_%zu = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n",
 		       nrule_names, index);
-	} else {
+		break;
+
+	default:
 		if (nwant_rules == want_rules_size)
-			want_rules = ereallocarray(want_rules, want_rules_size += 16, sizeof(*want_rules));
+			want_rules = ereallocarray(want_rules, want_rules_size += 16u, sizeof(*want_rules));
 		want_rules[nwant_rules++] = estrdup(node->token->s);
 		printf("static union libparser_sentence sentence_%zu_%zu = {.rule = {"
 		           ".type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"%s\""
 		       "}};\n",
-		       nrule_names, index, node->token->s);
+		       nrule_names, index,
+		       node->token->s);
+		break;
 	}
 
 	free(node->token);
@@ -423,55 +453,85 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 static struct node *
 order_sentences(struct node *node)
 {
-	struct node *tail = NULL, **head = &tail;
-	struct node *stack = NULL;
+	struct node *tail = NULL, **head = &tail; /* output queue */
+	struct node *stack = NULL; /* reordering stack */
 	struct node *next, *prev;
 
+	/* Reorder symbols from infix order to postfix order */
 	for (; node; node = next) {
 		next = node->next;
-		if (node->token->s[0] == '(' || node->token->s[0] == '[' || node->token->s[0] == '{') {
-			node->data = order_sentences(node->data);
-			*head = node;
-			head = &node->next;
-		} else if (node->token->s[0] == '|' || node->token->s[0] == ',') {
+
+		switch (node->token->s[0]) {
+		case '|':
+		case ',':
 		again_operators:
 			if (!stack) {
-				node->next = stack;
-				stack = node;
+				/* if the queue is empty, we have to place our node on it */
+				goto push_to_stack;
 			} else if (node->token->s[0] == ',' && stack->token->s[0] == '|') {
-				node->next = stack;
-				stack = node;
+				/* Likewise if we have a concatenation while the have
+				 * an alternation on the stack, since concatenation have
+				 * higher precedence */
+				goto push_to_stack;
 			} else if (node->token->s[0] == stack->token->s[0]) {
+				/* If however our operator is the same as on the top
+				 * of the stack (meaning they have the same precedence),
+				 * we pop the top of the stack onto the queue, and push
+				 * our operator to the stack */
 				*head = stack;
 				head = &stack->next;
 				stack = stack->next;
+			push_to_stack:
 				node->next = stack;
 				stack = node;
 			} else {
+				/* Otherwise, that is if we have an alternation but
+				 * the top of the stack is an concatenation (that is
+				 * our operator has lower precedence), we pop the
+				 * top of the stack into the queue, and compare
+				 * the operator again against the stack */
 				*head = stack;
 				head = &stack->next;
 				stack = stack->next;
 				goto again_operators;
 			}
-		} else {
-			if (node->token->s[0] == '!')
-				node->data = order_sentences(node->data);
+			break;
+
+		case '(':
+		case '[':
+		case '{':
+		case '!':
+			/* Everything else we immediately put into the queue,
+			 * but for brackets and unary operators, we simply
+			 * use recursion to order inner sentences */
+			node->data = order_sentences(node->data);
+			/* fall through */
+		default:
 			*head = node;
 			head = &node->next;
+			break;
 		}
 	}
 
+	/* Anything left on the stack is popped into the queue */
 	for (; stack; stack = next) {
 		next = stack->next;
 		*head = stack;
 		head = &stack->next;
 	}
 
+	/* Properly terminate the queue */
 	*head = NULL;
 
+	/* Convert the postfix notation into a tree */
 	for (stack = tail, prev = NULL; stack; prev = stack, stack = next) {
+		/* Reverse the queue (we need to look
+		 * backwards, but the list is singly linked) */
 		next = stack->next;
 		stack->next = prev;
+		/* But when a binary operator is encountered,
+		 * consume the two tokens in front of it in
+		 * queue, and add them as it's operands */
 		if (stack->token->s[0] == '|' || stack->token->s[0] == ',') {
 			prev = stack->next->next->next;
 			stack->data = stack->next->next;
@@ -493,10 +553,11 @@ emit_and_free_rule(struct node *rule)
 	rule->data = order_sentences(rule->data);
 	emit_and_free_sentence(rule->data, &index);
 
-	printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n", nrule_names, rule->token->s, nrule_names);
+	printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n",
+	       nrule_names, rule->token->s, nrule_names);
 
 	if (nrule_names == rule_names_size)
-		rule_names = ereallocarray(rule_names, rule_names_size += 16, sizeof(*rule_names));
+		rule_names = ereallocarray(rule_names, rule_names_size += 16u, sizeof(*rule_names));
 	rule_names[nrule_names++] = estrdup(rule->token->s);
 	free(rule->token);
 	free(rule);
@@ -553,14 +614,17 @@ main(int argc, char *argv[])
 	i = 0;
 again:
 	for (; tokens[i]; i++) {
-		if (tokens[i + 1] && tokens[i]->s[0] == '(' && tokens[i + 1]->s[0] == '*') {
+		/* Remove comments, they cannot be nested, but
+		 * they can contain strings (recognised during
+		 * tokenisation) which may contain comment-tokens */
+		if (tokens[i + 1u] && tokens[i]->s[0] == '(' && tokens[i + 1u]->s[0] == '*') {
 			free(tokens[i]);
-			free(tokens[i + 1]);
-			for (i += 2; tokens[i] && tokens[i + 1]; i++) {
-				if (tokens[i]->s[0] == '*' && tokens[i + 1]->s[0] == ')') {
+			free(tokens[i + 1u]);
+			for (i += 2u; tokens[i] && tokens[i + 1u]; i++) {
+				if (tokens[i]->s[0] == '*' && tokens[i + 1u]->s[0] == ')') {
 					free(tokens[i]);
-					free(tokens[i + 1]);
-					i += 2;
+					free(tokens[i + 1u]);
+					i += 2u;
 					goto again;
 				}
 				free(tokens[i]);
@@ -568,19 +632,29 @@ again:
 			eprintf("%s: premature end of file\n", argv0);
 		}
 
+		/* Also remove any whitespace (the tokeniser
+		 * simple and does not recognise mulltisymbol
+		 * tokens (that is apart form strings and
+		 * identifiers) so it cannot ignore whitespace. */
+		if (isspace(tokens[i]->s[0])) {
+			free(tokens[i]);
+			continue;
+		}
+
+		/* For the sake of code readability, identify
+		 * the token type */
 		if (tokens[i]->s[0] == '"') {
 			type = STRING;
 		} else if (isidentifier(tokens[i]->s[0])) {
 			type = IDENTIFIER;
-		} else if (isspace(tokens[i]->s[0])) {
-			free(tokens[i]);
-			continue;
 		} else {
 			type = SYMBOL;
 		}
 
 		switch (state) {
 		case NEW_RULE:
+			/* At the beginning of a new rule (which is the initial state),
+			 * we expect an identifier for the rule */
 			if (type != IDENTIFIER) {
 				eprintf("%s: expected an identifier on line %zu at column %zu (character %zu)\n",
 				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
@@ -588,7 +662,10 @@ again:
 			stack = calloc(1, sizeof(*stack));
 			stack->token = tokens[i];
 			stack->head = &stack->data;
+			/* and then we expect an equals sign */
 			state = EXPECT_EQUALS;
+			/* but we have to make sure the name isn't already
+			 * being used for another rule */
 			for (j = 0; j < nrule_names; j++) {
 				if (!strcmp(rule_names[j], tokens[i]->s)) {
 					eprintf("%s: duplicate definition of \"%s\" on line %zu at column %zu (character %zu)\n",
@@ -598,19 +675,40 @@ again:
 			break;
 
 		case EXPECT_EQUALS:
+			/* After the identifier for the rule, we make sure
+			 * we get and equals sign */
 			if (type != SYMBOL || tokens[i]->s[0] != '=') {
 				eprintf("%s: expected an '=' on line %zu at column %zu (character %zu)\n",
 				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
 			}
 			free(tokens[i]);
+			/* and then we expect to get an operand as the
+			 * next token */
 			state = EXPECT_OPERAND;
 			break;
 
 		case EXPECT_OPERAND:
+			/* An operand can be a bracketed sentence,
+			 * the value range, an exception, a rejection
+			 * or an identifier (an embedded rule) */
 			if (type == SYMBOL) {
 				if (tokens[i]->s[0] == '(' || tokens[i]->s[0] == '[' || tokens[i]->s[0] == '{') {
+					/* If the operand is a bracketed sentence,
+					 * we push the opening bracket into the
+					 * stack to keep track of required matching
+					 * closing statements; and we still expect
+					 * the next token to be an operand */
+					goto push_stack;
+				} else if (tokens[i]->s[0] == '!') {
+					/* Likewise for rejections (it is added to
+					 * the stack but it is an unary operator
+					 * so no matching symbol will be expected) */
 					goto push_stack;
 				} else if (tokens[i]->s[0] == '<') {
+					/* Likewise for value ranges, but we expect
+					 * the next token to be a numerical value,
+					 * which will represent the range's lower
+					 * bound */
 					state = EXPECT_RANGE_LOW;
 				push_stack:
 					parent_node = stack;
@@ -619,15 +717,21 @@ again:
 					stack->token = tokens[i];
 					stack->head = &stack->data;
 				} else if (tokens[i]->s[0] == '-') {
+					/* If the operand is an exception, it added
+					 * to the sentence and, and the next token
+					 * is expected to be a binary operator or
+					 * the end of the sentence */
 					goto add;
-				} else if (tokens[i]->s[0] == '!') {
-					goto push_stack;
 				} else {
 				stray:
 					eprintf("%s: stray '%c' on line %zu at column %zu (character %zu)\n",
 					        argv0, tokens[i]->s[0], tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
 				}
 			} else {
+				/* If the symbol is an embedded rule, it
+				 * is added to the sentence and the next
+				 * token is expected to be a binary
+				 * operator or the end of the sentence */
 			add:
 				state = EXPECT_OPERATOR;
 				goto add_singleton;
@@ -635,15 +739,21 @@ again:
 			break;
 
 		case EXPECT_OPERATOR:
+			/* When we get an binary operator, or the end
+			 * of a sentence, we have to pop out all unary
+			 * operators (rejects) from the stack */
 			while (stack->token->s[0] == '!') {
 				*stack->parent->head = stack;
 				stack->parent->head = &stack->next;
 				stack = stack->parent;
 			}
 			if (tokens[i]->s[0] == '|' || tokens[i]->s[0] == ',') {
+				/* If we have an binary operator, we add
+				 * it to the sentence and expect the next
+				 * token to be an operand */
 				state = EXPECT_OPERAND;
 			add_singleton:
-				node = calloc(1, sizeof(*node));
+				node = calloc(1u, sizeof(*node));
 				node->token = tokens[i];
 				*stack->head = node;
 				stack->head = &node->next;
@@ -656,6 +766,11 @@ again:
 					goto stray;
 				goto pop;
 			} else if (tokens[i]->s[0] == '}') {
+				/* If we have a closing bracket we verify
+				 * that it matching the top of the stack,
+				 * we then pop the top of the stack
+				 * (the bracketed sentence) into the end
+				 * of the sentence it appeared in */
 				if (stack->token->s[0] != '{')
 					goto stray;
 			pop:
@@ -664,6 +779,11 @@ again:
 				stack->parent->head = &stack->next;
 				stack = stack->parent;
 			} else if (tokens[i]->s[0] == ';') {
+				/* If are are have a semicolon, we are
+				 * at the end of the rule, and expect
+				 * the stack to be free from brackets
+				 * (the stack will however contain the
+				 * rule, which is emitted and deallocated) */
 				if (stack->token->s[0] == ')' || stack->token->s[0] == ']' || stack->token->s[0] == '}')
 					eprintf("%s: premature end of rule on line %zu at column %zu (character %zu): "
 					        "'%s' on line %zu at column %zu (character %zu) not closed\n",
@@ -683,10 +803,15 @@ again:
 			break;
 
 		case EXPECT_RANGE_LOW:
+			/* After the "<" that beginnins a value range,
+			 * there is numberal value, which is followed
+			 * by a comma, */
 			state = EXPECT_RANGE_DELIM;
 			goto add_range_bound;
 
 		case EXPECT_RANGE_DELIM:
+			/* the comma is followed by another value:
+			 * the upper boundary, */
 			if (type != SYMBOL || tokens[i]->s[0] != ',') {
 				eprintf("%s: expected an ',' on line %zu at column %zu (character %zu)\n",
 				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
@@ -696,12 +821,30 @@ again:
 			break;
 
 		case EXPECT_RANGE_HIGH:
+			/* which is followed by a ">", */
 			state = EXPECT_RANGE_CLOSE;
+			goto add_range_bound;
+
+		case EXPECT_RANGE_CLOSE:
+			/* after the ">", a binary operator
+			 * or the end of the sentence is expected */
+			if (type != SYMBOL || tokens[i]->s[0] != '>') {
+				eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n",
+				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
+			}
+			state = EXPECT_OPERATOR;
+			/* Once we have the ">", we pop the "<"
+			 * from the stack and add the value range
+			 * to the the sentence. */
+			goto pop;
+
 		add_range_bound:
 			if (type == IDENTIFIER) {
+				/* A value can be specified in hexadecimal format
+				 * (prefixed with ("0x" or "0X") or in decimal format */
 				val = 0;
 				if (tokens[i]->s[0] == '0' && (tokens[i]->s[1] == 'x' || tokens[i]->s[1] == 'X')) {
-					for (j = 2; isxdigit(tokens[i]->s[j]) && val < 255; j++)
+					for (j = 2u; isxdigit(tokens[i]->s[j]) && val < 255; j++)
 						val = (val * 16) | ((tokens[i]->s[j] & 15) + (tokens[i]->s[j] > '9' ? 9 : 0));
 				} else {
 					for (j = 0; isdigit(tokens[i]->s[j]) && val < 255; j++)
@@ -712,11 +855,12 @@ again:
 				tokens[i]->s[0] = (char)val;
 				tokens[i]->s[1] = '\0';
 			} else if (type == STRING) {
-				/* tokens[i]->s[0] is '"' */
-				if (!tokens[i]->s[1]) {
+				/* or as a single character string */
+				if (!tokens[i]->s[1]) { /* tokens[i]->s[0] is '"' */
 					goto invalid_range;
 				} else if (tokens[i]->s[1] == '\\') {
-					j = 3;
+					/* The character can be specified using an escape sequence */
+					j = 3u;
 					if (tokens[i]->s[2] == '"') {
 						tokens[i]->s[1] = '"';
 					} else if (tokens[i]->s[2] == '\'') {
@@ -739,10 +883,10 @@ again:
 						val = ((tokens[i]->s[3] & 15) + (tokens[i]->s[3] > '9' ? 9 : 0)) * 16;
 						val |= (tokens[i]->s[4] & 15) + (tokens[i]->s[4] > '9' ? 9 : 0);
 						tokens[i]->s[0] = (char)val;
-						j = 5;
+						j = 5u;
 					} else if ('0' <= tokens[i]->s[2] && tokens[i]->s[2] <= '7') {
 						val = 0;
-						for (j = 2; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++)
+						for (j = 2u; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++)
 							val = (val * 8) | (tokens[i]->s[j] & 15);
 						if (val > 255)
 							goto invalid_range;
@@ -767,14 +911,6 @@ again:
 			}
 			goto add_singleton;
 
-		case EXPECT_RANGE_CLOSE:
-			if (type != SYMBOL || tokens[i]->s[0] != '>') {
-				eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n",
-				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
-			}
-			state = EXPECT_OPERATOR;
-			goto pop;
-
 		default:
 			abort();
 		}
@@ -783,6 +919,7 @@ again:
 	if (state != NEW_RULE)
 		eprintf("%s: premature end of file\n", argv0);
 
+	/* Detect unused and undefined symbols */
 	err = 0;
 	qsort(rule_names, nrule_names, sizeof(*rule_names), strpcmp);
 	qsort(want_rules, nwant_rules, sizeof(*want_rules), strpcmp);
@@ -790,39 +927,41 @@ again:
 		cmp = strcmp(rule_names[i], want_rules[j]);
 		if (!cmp) {
 			i++;
-			for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
+			for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++);
 		} else if (!strcmp(rule_names[i], argv[0])) {
 			i++;
 		} else if (cmp < 0) {
-			eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
+			weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
 			i++;
 			err = 1;
 		} else {
-			eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
+			weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
 			for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
 			err = 1;
 		}
 	}
 	for (; i < nrule_names; i++) {
 		if (strcmp(rule_names[i], argv[0])) {
-			eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
+			weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
 			err = 1;
 		}
 	}
 	while (j < nwant_rules) {
-		eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
-		for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
+		weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
+		for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++);
 		err = 1;
 	}
 	if (err)
 		exit(1);
 
+	/* Verify that the main rule actually exist */
 	for (i = 0; i < nrule_names; i++)
 		if (!strcmp(rule_names[i], argv[0]))
 			goto found_main;
 	eprintf("%s: specified main rule (\"%s\") was not defined\n", argv0, argv[0]);
-
 found_main:
+
+	/* Emit predefined rules */
 	printf("static union libparser_sentence noeof_sentence = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n");
 	printf("static struct libparser_rule noeof_rule = {\"@noeof\", &noeof_sentence};\n");
 	printf("static union libparser_sentence noeof_rule_sentence = {.rule = "
@@ -850,6 +989,7 @@ found_main:
 	       "}};\n");
 	printf("static struct libparser_rule main_rule = {\"@start\", &main_sentence};\n");
 
+	/* Emit the rule table */
 	printf("const struct libparser_rule *const libparser_rule_table[] = {\n");
 	for (i = 0; i < nrule_names; i++) {
 		printf("\t&rule_%zu,\n", i);
author	Mattias Andrée <m@maandree.se>	2026-01-03 03:25:13 +0100
committer	Mattias Andrée <m@maandree.se>	2026-01-03 03:25:13 +0100
commit	6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b (patch)
tree	1a547ae55c9ec9fd6b9dc2cd5679dad072b99b2a
parent	Fixes to print-syntax.c (diff)
download	libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.gz libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.bz2 libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.xz