aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-03 03:25:13 +0100
committerMattias Andrée <m@maandree.se>2026-01-03 03:25:13 +0100
commit6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b (patch)
tree1a547ae55c9ec9fd6b9dc2cd5679dad072b99b2a
parentFixes to print-syntax.c (diff)
downloadlibparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.gz
libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.bz2
libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.xz
Improve style, add comments, and fix so that all unused and undefined rules are listed
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--libparser-generate.c340
1 files changed, 240 insertions, 100 deletions
diff --git a/libparser-generate.c b/libparser-generate.c
index 3f544c3..a9a3d96 100644
--- a/libparser-generate.c
+++ b/libparser-generate.c
@@ -19,29 +19,37 @@ usage(void)
}
-#define eprintf(...) (fprintf(stderr, __VA_ARGS__), exit(1))
+#define weprintf(...) (fprintf(stderr, __VA_ARGS__))
+#define eprintf(...) (weprintf(__VA_ARGS__), exit(1))
struct token {
+ /* text position in file { */
size_t lineno;
size_t column;
size_t character;
- char s[];
+ /* } */
+ char s[]; /* the text */
};
struct node {
- struct token *token;
- struct node *parent;
- struct node *next;
- struct node *data;
- struct node **head;
+ struct token *token; /* node text */
+ struct node *parent; /* parent node in tree */
+ struct node *next; /* next element in list */
+ struct node *data; /* beginning of subsentence */
+ struct node **head; /* end of subsentence */
};
+/* declared rules, used to detect duplicates,
+ * and compared with `want_rules` to detected
+ * unused rules */
static char **rule_names = NULL;
static size_t nrule_names = 0;
static size_t rule_names_size = 0;
+/* used rules, compared with `rule_names`
+ * to detect used but undefined rules */
static char **want_rules = NULL;
static size_t nwant_rules = 0;
static size_t want_rules_size = 0;
@@ -119,13 +127,13 @@ check_utf8(char *buf, size_t *ip, size_t len)
uint32_t cp;
if ((buf[*ip] & 0xE0) == 0xC0) {
cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xC0);
- req = 2;
+ req = 2u;
} else if ((buf[*ip] & 0xF0) == 0xE0) {
cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xE0);
- req = 3;
+ req = 3u;
} else if ((buf[*ip] & 0xF8) == 0xF0) {
cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xF0);
- req = 4;
+ req = 4u;
} else {
return 0;
}
@@ -143,11 +151,11 @@ check_utf8(char *buf, size_t *ip, size_t len)
if (cp < (uint32_t)1 << (7 + 0 * 6))
return 0;
if (cp < (uint32_t)1 << (5 + 1 * 6))
- return req == 2;
+ return req == 2u;
if (cp < (uint32_t)1 << (4 + 2 * 6))
- return req == 3;
+ return req == 3u;
if (cp <= UINT32_C(0x10FFFF))
- return req == 4;
+ return req == 4u;
return 0;
}
@@ -171,14 +179,14 @@ readall_and_validate(int fd, const char *fname)
}
}
- for (i = 0; i < len; i++) {
+ for (i = 0; i < len;) {
if (buf[i] == '\n') {
- lineno += 1;
+ lineno += 1u;
column = 0;
character = 0;
} else if (buf[i] == '\t') {
- column += 8 - column % 8;
- character += 1;
+ column += 8u - column % 8u;
+ character += 1u;
} else if (buf[i] == '\r') {
eprintf("%s: %s contains a CR character on line %zu at column %zu (character %zu)\n",
argv0, fname, lineno, column, character);
@@ -189,8 +197,8 @@ readall_and_validate(int fd, const char *fname)
eprintf("%s: %s contains a NUL byte on line %zu at column %zu (character %zu)\n",
argv0, fname, lineno, column, character);
} else if (!(buf[i] & 0x80)) {
- character += 1;
- column += 1;
+ character += 1u;
+ column += 1u;
} else if ((buf[i] & 0xC0) == 0x80) {
eprintf("%s: %s contains a illegal byte on line %zu at column %zu (character %zu)\n",
argv0, fname, lineno, column, character);
@@ -199,13 +207,14 @@ readall_and_validate(int fd, const char *fname)
eprintf("%s: %s contains a illegal byte sequence on line %zu at column %zu (character %zu)\n",
argv0, fname, lineno, column, character);
}
- i--;
- character += 1;
- column += 1;
+ character += 1u;
+ column += 1u;
+ continue;
}
+ i++;
}
- buf = erealloc(buf, len + 1);
+ buf = erealloc(buf, len + 1u);
buf[len] = '\0';
return buf;
@@ -237,7 +246,7 @@ tokenise(const char *data)
token_column = column;
token_character = character;
if (token_len == token_size)
- token = erealloc(token, token_size += 16);
+ token = erealloc(token, token_size += 16u);
token[token_len++] = data[i];
if (isidentifier(data[i])) {
state = IDENTIFIER;
@@ -252,10 +261,10 @@ tokenise(const char *data)
} else {
add_token:
if (token_len == token_size)
- token = erealloc(token, token_size += 16);
+ token = erealloc(token, token_size += 16u);
token[token_len++] = '\0';
if (ntokens == tokens_size)
- tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens));
+ tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens));
tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len);
tokens[ntokens]->lineno = token_lineno;
tokens[ntokens]->column = token_column;
@@ -270,15 +279,15 @@ tokenise(const char *data)
if (isidentifier(data[i]) || data[i] == '-') {
add_char:
if (token_len == token_size)
- token = erealloc(token, token_size += 16);
+ token = erealloc(token, token_size += 16u);
token[token_len++] = data[i];
} else {
add_token_and_do_again:
if (token_len == token_size)
- token = erealloc(token, token_size += 16);
+ token = erealloc(token, token_size += 16u);
token[token_len++] = '\0';
if (ntokens == tokens_size)
- tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens));
+ tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens));
tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len);
tokens[ntokens]->lineno = token_lineno;
tokens[ntokens]->column = token_column;
@@ -310,7 +319,7 @@ tokenise(const char *data)
argv0, lineno, column, character);
}
if (token_len == token_size)
- token = erealloc(token, token_size += 16);
+ token = erealloc(token, token_size += 16u);
token[token_len++] = data[i];
state = STRING;
break;
@@ -327,24 +336,25 @@ tokenise(const char *data)
};
if (data[i] == '\n') {
- lineno += 1;
+ lineno += 1u;
column = 0;
character = 0;
} else if (data[i] == '\t') {
- column += 8 - column % 8;
- character += 1;
+ column += 8u - column % 8u;
+ character += 1u;
} else {
- character += (data[i] & 0xC0) != 0x80;
- column += 1;
+ character += (size_t)((data[i] & 0xC0) != 0x80);
+ column += 1u;
}
}
if (state != NEW_TOKEN && state != SPACE)
eprintf("%s: premature end of file\n", argv0);
- tokens = ereallocarray(tokens, ntokens + 1, sizeof(*tokens));
- tokens[ntokens] = NULL;
free(token);
+ tokens = ereallocarray(tokens, ntokens + 1u, sizeof(*tokens));
+ tokens[ntokens] = NULL;
+
return tokens;
}
@@ -354,6 +364,7 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
{
size_t index = (*indexp)++, left, right;
struct node *next, *low, *high;
+ const char *type;
for (; node->token->s[0] == '('; node = next) {
next = node->data;
@@ -361,14 +372,19 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
free(node);
}
- if (node->token->s[0] == '[' || node->token->s[0] == '{' || node->token->s[0] == '!') {
+ switch (node->token->s[0]) {
+ case '[': type = "OPTIONAL"; goto unary;
+ case '{': type = "REPEATED"; goto unary;
+ case '!': type = "REJECTION"; unary:
emit_and_free_sentence(node->data, indexp);
printf("static union libparser_sentence sentence_%zu_%zu = {.unary = {"
".type = LIBPARSER_SENTENCE_TYPE_%s, .sentence = &sentence_%zu_%zu"
"}};\n",
- nrule_names, index, node->token->s[0] == '[' ? "OPTIONAL" :
- node->token->s[0] == '{' ? "REPEATED" : "REJECTION", nrule_names, index + 1);
- } else if (node->token->s[0] == '<') {
+ nrule_names, index,
+ type, nrule_names, index + 1u);
+ break;
+
+ case '<':
low = node->data;
high = node->data->next;
if ((unsigned char)low->token->s[0] > (unsigned char)high->token->s[0]) {
@@ -380,12 +396,16 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
printf("static union libparser_sentence sentence_%zu_%zu = {.char_range = {"
".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhu, .high = %hhu"
"}};\n",
- nrule_names, index, (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]);
+ nrule_names, index,
+ (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]);
free(low->token);
free(high->token);
free(low);
free(high);
- } else if (node->token->s[0] == '|' || node->token->s[0] == ',') {
+ break;
+
+ case '|': type = "ALTERNATION"; goto binary;
+ case ',': type = "CONCATENATION"; binary:
right = *indexp;
emit_and_free_sentence(node->data->next, indexp);
left = *indexp;
@@ -394,25 +414,35 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
".type = LIBPARSER_SENTENCE_TYPE_%s, "
".left = &sentence_%zu_%zu, .right = &sentence_%zu_%zu"
"}};\n",
- nrule_names, index, node->token->s[0] == '|' ? "ALTERNATION" : "CONCATENATION",
+ nrule_names, index,
+ type,
nrule_names, left, nrule_names, right);
- } else if (node->token->s[0] == '"') {
+ break;
+
+ case '"':
printf("static union libparser_sentence sentence_%zu_%zu = {.string = {"
".type = LIBPARSER_SENTENCE_TYPE_STRING, "
- ".string = %s\", .length = sizeof(%s\") - 1"
+ ".string = %s\", .length = sizeof(%s\") - 1U"
"}};\n",
- nrule_names, index, node->token->s, node->token->s);
- } else if (node->token->s[0] == '-') {
+ nrule_names, index,
+ node->token->s, node->token->s);
+ break;
+
+ case '-':
printf("static union libparser_sentence sentence_%zu_%zu = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n",
nrule_names, index);
- } else {
+ break;
+
+ default:
if (nwant_rules == want_rules_size)
- want_rules = ereallocarray(want_rules, want_rules_size += 16, sizeof(*want_rules));
+ want_rules = ereallocarray(want_rules, want_rules_size += 16u, sizeof(*want_rules));
want_rules[nwant_rules++] = estrdup(node->token->s);
printf("static union libparser_sentence sentence_%zu_%zu = {.rule = {"
".type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"%s\""
"}};\n",
- nrule_names, index, node->token->s);
+ nrule_names, index,
+ node->token->s);
+ break;
}
free(node->token);
@@ -423,55 +453,85 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
static struct node *
order_sentences(struct node *node)
{
- struct node *tail = NULL, **head = &tail;
- struct node *stack = NULL;
+ struct node *tail = NULL, **head = &tail; /* output queue */
+ struct node *stack = NULL; /* reordering stack */
struct node *next, *prev;
+ /* Reorder symbols from infix order to postfix order */
for (; node; node = next) {
next = node->next;
- if (node->token->s[0] == '(' || node->token->s[0] == '[' || node->token->s[0] == '{') {
- node->data = order_sentences(node->data);
- *head = node;
- head = &node->next;
- } else if (node->token->s[0] == '|' || node->token->s[0] == ',') {
+
+ switch (node->token->s[0]) {
+ case '|':
+ case ',':
again_operators:
if (!stack) {
- node->next = stack;
- stack = node;
+ /* if the queue is empty, we have to place our node on it */
+ goto push_to_stack;
} else if (node->token->s[0] == ',' && stack->token->s[0] == '|') {
- node->next = stack;
- stack = node;
+ /* Likewise if we have a concatenation while the have
+ * an alternation on the stack, since concatenation have
+ * higher precedence */
+ goto push_to_stack;
} else if (node->token->s[0] == stack->token->s[0]) {
+ /* If however our operator is the same as on the top
+ * of the stack (meaning they have the same precedence),
+ * we pop the top of the stack onto the queue, and push
+ * our operator to the stack */
*head = stack;
head = &stack->next;
stack = stack->next;
+ push_to_stack:
node->next = stack;
stack = node;
} else {
+ /* Otherwise, that is if we have an alternation but
+ * the top of the stack is an concatenation (that is
+ * our operator has lower precedence), we pop the
+ * top of the stack into the queue, and compare
+ * the operator again against the stack */
*head = stack;
head = &stack->next;
stack = stack->next;
goto again_operators;
}
- } else {
- if (node->token->s[0] == '!')
- node->data = order_sentences(node->data);
+ break;
+
+ case '(':
+ case '[':
+ case '{':
+ case '!':
+ /* Everything else we immediately put into the queue,
+ * but for brackets and unary operators, we simply
+ * use recursion to order inner sentences */
+ node->data = order_sentences(node->data);
+ /* fall through */
+ default:
*head = node;
head = &node->next;
+ break;
}
}
+ /* Anything left on the stack is popped into the queue */
for (; stack; stack = next) {
next = stack->next;
*head = stack;
head = &stack->next;
}
+ /* Properly terminate the queue */
*head = NULL;
+ /* Convert the postfix notation into a tree */
for (stack = tail, prev = NULL; stack; prev = stack, stack = next) {
+ /* Reverse the queue (we need to look
+ * backwards, but the list is singly linked) */
next = stack->next;
stack->next = prev;
+ /* But when a binary operator is encountered,
+ * consume the two tokens in front of it in
+ * queue, and add them as it's operands */
if (stack->token->s[0] == '|' || stack->token->s[0] == ',') {
prev = stack->next->next->next;
stack->data = stack->next->next;
@@ -493,10 +553,11 @@ emit_and_free_rule(struct node *rule)
rule->data = order_sentences(rule->data);
emit_and_free_sentence(rule->data, &index);
- printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n", nrule_names, rule->token->s, nrule_names);
+ printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n",
+ nrule_names, rule->token->s, nrule_names);
if (nrule_names == rule_names_size)
- rule_names = ereallocarray(rule_names, rule_names_size += 16, sizeof(*rule_names));
+ rule_names = ereallocarray(rule_names, rule_names_size += 16u, sizeof(*rule_names));
rule_names[nrule_names++] = estrdup(rule->token->s);
free(rule->token);
free(rule);
@@ -553,14 +614,17 @@ main(int argc, char *argv[])
i = 0;
again:
for (; tokens[i]; i++) {
- if (tokens[i + 1] && tokens[i]->s[0] == '(' && tokens[i + 1]->s[0] == '*') {
+ /* Remove comments, they cannot be nested, but
+ * they can contain strings (recognised during
+ * tokenisation) which may contain comment-tokens */
+ if (tokens[i + 1u] && tokens[i]->s[0] == '(' && tokens[i + 1u]->s[0] == '*') {
free(tokens[i]);
- free(tokens[i + 1]);
- for (i += 2; tokens[i] && tokens[i + 1]; i++) {
- if (tokens[i]->s[0] == '*' && tokens[i + 1]->s[0] == ')') {
+ free(tokens[i + 1u]);
+ for (i += 2u; tokens[i] && tokens[i + 1u]; i++) {
+ if (tokens[i]->s[0] == '*' && tokens[i + 1u]->s[0] == ')') {
free(tokens[i]);
- free(tokens[i + 1]);
- i += 2;
+ free(tokens[i + 1u]);
+ i += 2u;
goto again;
}
free(tokens[i]);
@@ -568,19 +632,29 @@ again:
eprintf("%s: premature end of file\n", argv0);
}
+ /* Also remove any whitespace (the tokeniser
+ * simple and does not recognise mulltisymbol
+ * tokens (that is apart form strings and
+ * identifiers) so it cannot ignore whitespace. */
+ if (isspace(tokens[i]->s[0])) {
+ free(tokens[i]);
+ continue;
+ }
+
+ /* For the sake of code readability, identify
+ * the token type */
if (tokens[i]->s[0] == '"') {
type = STRING;
} else if (isidentifier(tokens[i]->s[0])) {
type = IDENTIFIER;
- } else if (isspace(tokens[i]->s[0])) {
- free(tokens[i]);
- continue;
} else {
type = SYMBOL;
}
switch (state) {
case NEW_RULE:
+ /* At the beginning of a new rule (which is the initial state),
+ * we expect an identifier for the rule */
if (type != IDENTIFIER) {
eprintf("%s: expected an identifier on line %zu at column %zu (character %zu)\n",
argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
@@ -588,7 +662,10 @@ again:
stack = calloc(1, sizeof(*stack));
stack->token = tokens[i];
stack->head = &stack->data;
+ /* and then we expect an equals sign */
state = EXPECT_EQUALS;
+ /* but we have to make sure the name isn't already
+ * being used for another rule */
for (j = 0; j < nrule_names; j++) {
if (!strcmp(rule_names[j], tokens[i]->s)) {
eprintf("%s: duplicate definition of \"%s\" on line %zu at column %zu (character %zu)\n",
@@ -598,19 +675,40 @@ again:
break;
case EXPECT_EQUALS:
+ /* After the identifier for the rule, we make sure
+ * we get and equals sign */
if (type != SYMBOL || tokens[i]->s[0] != '=') {
eprintf("%s: expected an '=' on line %zu at column %zu (character %zu)\n",
argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
}
free(tokens[i]);
+ /* and then we expect to get an operand as the
+ * next token */
state = EXPECT_OPERAND;
break;
case EXPECT_OPERAND:
+ /* An operand can be a bracketed sentence,
+ * the value range, an exception, a rejection
+ * or an identifier (an embedded rule) */
if (type == SYMBOL) {
if (tokens[i]->s[0] == '(' || tokens[i]->s[0] == '[' || tokens[i]->s[0] == '{') {
+ /* If the operand is a bracketed sentence,
+ * we push the opening bracket into the
+ * stack to keep track of required matching
+ * closing statements; and we still expect
+ * the next token to be an operand */
+ goto push_stack;
+ } else if (tokens[i]->s[0] == '!') {
+ /* Likewise for rejections (it is added to
+ * the stack but it is an unary operator
+ * so no matching symbol will be expected) */
goto push_stack;
} else if (tokens[i]->s[0] == '<') {
+ /* Likewise for value ranges, but we expect
+ * the next token to be a numerical value,
+ * which will represent the range's lower
+ * bound */
state = EXPECT_RANGE_LOW;
push_stack:
parent_node = stack;
@@ -619,15 +717,21 @@ again:
stack->token = tokens[i];
stack->head = &stack->data;
} else if (tokens[i]->s[0] == '-') {
+ /* If the operand is an exception, it added
+ * to the sentence and, and the next token
+ * is expected to be a binary operator or
+ * the end of the sentence */
goto add;
- } else if (tokens[i]->s[0] == '!') {
- goto push_stack;
} else {
stray:
eprintf("%s: stray '%c' on line %zu at column %zu (character %zu)\n",
argv0, tokens[i]->s[0], tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
}
} else {
+ /* If the symbol is an embedded rule, it
+ * is added to the sentence and the next
+ * token is expected to be a binary
+ * operator or the end of the sentence */
add:
state = EXPECT_OPERATOR;
goto add_singleton;
@@ -635,15 +739,21 @@ again:
break;
case EXPECT_OPERATOR:
+ /* When we get an binary operator, or the end
+ * of a sentence, we have to pop out all unary
+ * operators (rejects) from the stack */
while (stack->token->s[0] == '!') {
*stack->parent->head = stack;
stack->parent->head = &stack->next;
stack = stack->parent;
}
if (tokens[i]->s[0] == '|' || tokens[i]->s[0] == ',') {
+ /* If we have an binary operator, we add
+ * it to the sentence and expect the next
+ * token to be an operand */
state = EXPECT_OPERAND;
add_singleton:
- node = calloc(1, sizeof(*node));
+ node = calloc(1u, sizeof(*node));
node->token = tokens[i];
*stack->head = node;
stack->head = &node->next;
@@ -656,6 +766,11 @@ again:
goto stray;
goto pop;
} else if (tokens[i]->s[0] == '}') {
+ /* If we have a closing bracket we verify
+ * that it matching the top of the stack,
+ * we then pop the top of the stack
+ * (the bracketed sentence) into the end
+ * of the sentence it appeared in */
if (stack->token->s[0] != '{')
goto stray;
pop:
@@ -664,6 +779,11 @@ again:
stack->parent->head = &stack->next;
stack = stack->parent;
} else if (tokens[i]->s[0] == ';') {
+ /* If are are have a semicolon, we are
+ * at the end of the rule, and expect
+ * the stack to be free from brackets
+ * (the stack will however contain the
+ * rule, which is emitted and deallocated) */
if (stack->token->s[0] == ')' || stack->token->s[0] == ']' || stack->token->s[0] == '}')
eprintf("%s: premature end of rule on line %zu at column %zu (character %zu): "
"'%s' on line %zu at column %zu (character %zu) not closed\n",
@@ -683,10 +803,15 @@ again:
break;
case EXPECT_RANGE_LOW:
+ /* After the "<" that beginnins a value range,
+ * there is numberal value, which is followed
+ * by a comma, */
state = EXPECT_RANGE_DELIM;
goto add_range_bound;
case EXPECT_RANGE_DELIM:
+ /* the comma is followed by another value:
+ * the upper boundary, */
if (type != SYMBOL || tokens[i]->s[0] != ',') {
eprintf("%s: expected an ',' on line %zu at column %zu (character %zu)\n",
argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
@@ -696,12 +821,30 @@ again:
break;
case EXPECT_RANGE_HIGH:
+ /* which is followed by a ">", */
state = EXPECT_RANGE_CLOSE;
+ goto add_range_bound;
+
+ case EXPECT_RANGE_CLOSE:
+ /* after the ">", a binary operator
+ * or the end of the sentence is expected */
+ if (type != SYMBOL || tokens[i]->s[0] != '>') {
+ eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n",
+ argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
+ }
+ state = EXPECT_OPERATOR;
+ /* Once we have the ">", we pop the "<"
+ * from the stack and add the value range
+ * to the the sentence. */
+ goto pop;
+
add_range_bound:
if (type == IDENTIFIER) {
+ /* A value can be specified in hexadecimal format
+ * (prefixed with ("0x" or "0X") or in decimal format */
val = 0;
if (tokens[i]->s[0] == '0' && (tokens[i]->s[1] == 'x' || tokens[i]->s[1] == 'X')) {
- for (j = 2; isxdigit(tokens[i]->s[j]) && val < 255; j++)
+ for (j = 2u; isxdigit(tokens[i]->s[j]) && val < 255; j++)
val = (val * 16) | ((tokens[i]->s[j] & 15) + (tokens[i]->s[j] > '9' ? 9 : 0));
} else {
for (j = 0; isdigit(tokens[i]->s[j]) && val < 255; j++)
@@ -712,11 +855,12 @@ again:
tokens[i]->s[0] = (char)val;
tokens[i]->s[1] = '\0';
} else if (type == STRING) {
- /* tokens[i]->s[0] is '"' */
- if (!tokens[i]->s[1]) {
+ /* or as a single character string */
+ if (!tokens[i]->s[1]) { /* tokens[i]->s[0] is '"' */
goto invalid_range;
} else if (tokens[i]->s[1] == '\\') {
- j = 3;
+ /* The character can be specified using an escape sequence */
+ j = 3u;
if (tokens[i]->s[2] == '"') {
tokens[i]->s[1] = '"';
} else if (tokens[i]->s[2] == '\'') {
@@ -739,10 +883,10 @@ again:
val = ((tokens[i]->s[3] & 15) + (tokens[i]->s[3] > '9' ? 9 : 0)) * 16;
val |= (tokens[i]->s[4] & 15) + (tokens[i]->s[4] > '9' ? 9 : 0);
tokens[i]->s[0] = (char)val;
- j = 5;
+ j = 5u;
} else if ('0' <= tokens[i]->s[2] && tokens[i]->s[2] <= '7') {
val = 0;
- for (j = 2; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++)
+ for (j = 2u; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++)
val = (val * 8) | (tokens[i]->s[j] & 15);
if (val > 255)
goto invalid_range;
@@ -767,14 +911,6 @@ again:
}
goto add_singleton;
- case EXPECT_RANGE_CLOSE:
- if (type != SYMBOL || tokens[i]->s[0] != '>') {
- eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n",
- argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
- }
- state = EXPECT_OPERATOR;
- goto pop;
-
default:
abort();
}
@@ -783,6 +919,7 @@ again:
if (state != NEW_RULE)
eprintf("%s: premature end of file\n", argv0);
+ /* Detect unused and undefined symbols */
err = 0;
qsort(rule_names, nrule_names, sizeof(*rule_names), strpcmp);
qsort(want_rules, nwant_rules, sizeof(*want_rules), strpcmp);
@@ -790,39 +927,41 @@ again:
cmp = strcmp(rule_names[i], want_rules[j]);
if (!cmp) {
i++;
- for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
+ for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++);
} else if (!strcmp(rule_names[i], argv[0])) {
i++;
} else if (cmp < 0) {
- eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
+ weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
i++;
err = 1;
} else {
- eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
+ weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
err = 1;
}
}
for (; i < nrule_names; i++) {
if (strcmp(rule_names[i], argv[0])) {
- eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
+ weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]);
err = 1;
}
}
while (j < nwant_rules) {
- eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
- for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++);
+ weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]);
+ for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++);
err = 1;
}
if (err)
exit(1);
+ /* Verify that the main rule actually exist */
for (i = 0; i < nrule_names; i++)
if (!strcmp(rule_names[i], argv[0]))
goto found_main;
eprintf("%s: specified main rule (\"%s\") was not defined\n", argv0, argv[0]);
-
found_main:
+
+ /* Emit predefined rules */
printf("static union libparser_sentence noeof_sentence = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n");
printf("static struct libparser_rule noeof_rule = {\"@noeof\", &noeof_sentence};\n");
printf("static union libparser_sentence noeof_rule_sentence = {.rule = "
@@ -850,6 +989,7 @@ found_main:
"}};\n");
printf("static struct libparser_rule main_rule = {\"@start\", &main_sentence};\n");
+ /* Emit the rule table */
printf("const struct libparser_rule *const libparser_rule_table[] = {\n");
for (i = 0; i < nrule_names; i++) {
printf("\t&rule_%zu,\n", i);