diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-03 03:25:13 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-03 03:25:13 +0100 |
| commit | 6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b (patch) | |
| tree | 1a547ae55c9ec9fd6b9dc2cd5679dad072b99b2a | |
| parent | Fixes to print-syntax.c (diff) | |
| download | libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.gz libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.bz2 libparser-6ae86e3453c2fa13f2e15d19a6f3a7e65cecbc0b.tar.xz | |
Improve style, add comments, and fix so that all unused and undefined rules are listed
Signed-off-by: Mattias Andrée <m@maandree.se>
| -rw-r--r-- | libparser-generate.c | 340 |
1 files changed, 240 insertions, 100 deletions
diff --git a/libparser-generate.c b/libparser-generate.c index 3f544c3..a9a3d96 100644 --- a/libparser-generate.c +++ b/libparser-generate.c @@ -19,29 +19,37 @@ usage(void) } -#define eprintf(...) (fprintf(stderr, __VA_ARGS__), exit(1)) +#define weprintf(...) (fprintf(stderr, __VA_ARGS__)) +#define eprintf(...) (weprintf(__VA_ARGS__), exit(1)) struct token { + /* text position in file { */ size_t lineno; size_t column; size_t character; - char s[]; + /* } */ + char s[]; /* the text */ }; struct node { - struct token *token; - struct node *parent; - struct node *next; - struct node *data; - struct node **head; + struct token *token; /* node text */ + struct node *parent; /* parent node in tree */ + struct node *next; /* next element in list */ + struct node *data; /* beginning of subsentence */ + struct node **head; /* end of subsentence */ }; +/* declared rules, used to detect duplicates, + * and compared with `want_rules` to detected + * unused rules */ static char **rule_names = NULL; static size_t nrule_names = 0; static size_t rule_names_size = 0; +/* used rules, compared with `rule_names` + * to detect used but undefined rules */ static char **want_rules = NULL; static size_t nwant_rules = 0; static size_t want_rules_size = 0; @@ -119,13 +127,13 @@ check_utf8(char *buf, size_t *ip, size_t len) uint32_t cp; if ((buf[*ip] & 0xE0) == 0xC0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xC0); - req = 2; + req = 2u; } else if ((buf[*ip] & 0xF0) == 0xE0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xE0); - req = 3; + req = 3u; } else if ((buf[*ip] & 0xF8) == 0xF0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xF0); - req = 4; + req = 4u; } else { return 0; } @@ -143,11 +151,11 @@ check_utf8(char *buf, size_t *ip, size_t len) if (cp < (uint32_t)1 << (7 + 0 * 6)) return 0; if (cp < (uint32_t)1 << (5 + 1 * 6)) - return req == 2; + return req == 2u; if (cp < (uint32_t)1 << (4 + 2 * 6)) - return req == 3; + return req == 3u; if (cp <= UINT32_C(0x10FFFF)) - return req == 4; + return req == 4u; return 0; } @@ -171,14 +179,14 @@ readall_and_validate(int fd, const char *fname) } } - for (i = 0; i < len; i++) { + for (i = 0; i < len;) { if (buf[i] == '\n') { - lineno += 1; + lineno += 1u; column = 0; character = 0; } else if (buf[i] == '\t') { - column += 8 - column % 8; - character += 1; + column += 8u - column % 8u; + character += 1u; } else if (buf[i] == '\r') { eprintf("%s: %s contains a CR character on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); @@ -189,8 +197,8 @@ readall_and_validate(int fd, const char *fname) eprintf("%s: %s contains a NUL byte on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } else if (!(buf[i] & 0x80)) { - character += 1; - column += 1; + character += 1u; + column += 1u; } else if ((buf[i] & 0xC0) == 0x80) { eprintf("%s: %s contains a illegal byte on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); @@ -199,13 +207,14 @@ readall_and_validate(int fd, const char *fname) eprintf("%s: %s contains a illegal byte sequence on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } - i--; - character += 1; - column += 1; + character += 1u; + column += 1u; + continue; } + i++; } - buf = erealloc(buf, len + 1); + buf = erealloc(buf, len + 1u); buf[len] = '\0'; return buf; @@ -237,7 +246,7 @@ tokenise(const char *data) token_column = column; token_character = character; if (token_len == token_size) - token = erealloc(token, token_size += 16); + token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; if (isidentifier(data[i])) { state = IDENTIFIER; @@ -252,10 +261,10 @@ tokenise(const char *data) } else { add_token: if (token_len == token_size) - token = erealloc(token, token_size += 16); + token = erealloc(token, token_size += 16u); token[token_len++] = '\0'; if (ntokens == tokens_size) - tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens)); + tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens)); tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len); tokens[ntokens]->lineno = token_lineno; tokens[ntokens]->column = token_column; @@ -270,15 +279,15 @@ tokenise(const char *data) if (isidentifier(data[i]) || data[i] == '-') { add_char: if (token_len == token_size) - token = erealloc(token, token_size += 16); + token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; } else { add_token_and_do_again: if (token_len == token_size) - token = erealloc(token, token_size += 16); + token = erealloc(token, token_size += 16u); token[token_len++] = '\0'; if (ntokens == tokens_size) - tokens = ereallocarray(tokens, tokens_size += 16, sizeof(*tokens)); + tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens)); tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len); tokens[ntokens]->lineno = token_lineno; tokens[ntokens]->column = token_column; @@ -310,7 +319,7 @@ tokenise(const char *data) argv0, lineno, column, character); } if (token_len == token_size) - token = erealloc(token, token_size += 16); + token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; state = STRING; break; @@ -327,24 +336,25 @@ tokenise(const char *data) }; if (data[i] == '\n') { - lineno += 1; + lineno += 1u; column = 0; character = 0; } else if (data[i] == '\t') { - column += 8 - column % 8; - character += 1; + column += 8u - column % 8u; + character += 1u; } else { - character += (data[i] & 0xC0) != 0x80; - column += 1; + character += (size_t)((data[i] & 0xC0) != 0x80); + column += 1u; } } if (state != NEW_TOKEN && state != SPACE) eprintf("%s: premature end of file\n", argv0); - tokens = ereallocarray(tokens, ntokens + 1, sizeof(*tokens)); - tokens[ntokens] = NULL; free(token); + tokens = ereallocarray(tokens, ntokens + 1u, sizeof(*tokens)); + tokens[ntokens] = NULL; + return tokens; } @@ -354,6 +364,7 @@ emit_and_free_sentence(struct node *node, size_t *indexp) { size_t index = (*indexp)++, left, right; struct node *next, *low, *high; + const char *type; for (; node->token->s[0] == '('; node = next) { next = node->data; @@ -361,14 +372,19 @@ emit_and_free_sentence(struct node *node, size_t *indexp) free(node); } - if (node->token->s[0] == '[' || node->token->s[0] == '{' || node->token->s[0] == '!') { + switch (node->token->s[0]) { + case '[': type = "OPTIONAL"; goto unary; + case '{': type = "REPEATED"; goto unary; + case '!': type = "REJECTION"; unary: emit_and_free_sentence(node->data, indexp); printf("static union libparser_sentence sentence_%zu_%zu = {.unary = {" ".type = LIBPARSER_SENTENCE_TYPE_%s, .sentence = &sentence_%zu_%zu" "}};\n", - nrule_names, index, node->token->s[0] == '[' ? "OPTIONAL" : - node->token->s[0] == '{' ? "REPEATED" : "REJECTION", nrule_names, index + 1); - } else if (node->token->s[0] == '<') { + nrule_names, index, + type, nrule_names, index + 1u); + break; + + case '<': low = node->data; high = node->data->next; if ((unsigned char)low->token->s[0] > (unsigned char)high->token->s[0]) { @@ -380,12 +396,16 @@ emit_and_free_sentence(struct node *node, size_t *indexp) printf("static union libparser_sentence sentence_%zu_%zu = {.char_range = {" ".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhu, .high = %hhu" "}};\n", - nrule_names, index, (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]); + nrule_names, index, + (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]); free(low->token); free(high->token); free(low); free(high); - } else if (node->token->s[0] == '|' || node->token->s[0] == ',') { + break; + + case '|': type = "ALTERNATION"; goto binary; + case ',': type = "CONCATENATION"; binary: right = *indexp; emit_and_free_sentence(node->data->next, indexp); left = *indexp; @@ -394,25 +414,35 @@ emit_and_free_sentence(struct node *node, size_t *indexp) ".type = LIBPARSER_SENTENCE_TYPE_%s, " ".left = &sentence_%zu_%zu, .right = &sentence_%zu_%zu" "}};\n", - nrule_names, index, node->token->s[0] == '|' ? "ALTERNATION" : "CONCATENATION", + nrule_names, index, + type, nrule_names, left, nrule_names, right); - } else if (node->token->s[0] == '"') { + break; + + case '"': printf("static union libparser_sentence sentence_%zu_%zu = {.string = {" ".type = LIBPARSER_SENTENCE_TYPE_STRING, " - ".string = %s\", .length = sizeof(%s\") - 1" + ".string = %s\", .length = sizeof(%s\") - 1U" "}};\n", - nrule_names, index, node->token->s, node->token->s); - } else if (node->token->s[0] == '-') { + nrule_names, index, + node->token->s, node->token->s); + break; + + case '-': printf("static union libparser_sentence sentence_%zu_%zu = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n", nrule_names, index); - } else { + break; + + default: if (nwant_rules == want_rules_size) - want_rules = ereallocarray(want_rules, want_rules_size += 16, sizeof(*want_rules)); + want_rules = ereallocarray(want_rules, want_rules_size += 16u, sizeof(*want_rules)); want_rules[nwant_rules++] = estrdup(node->token->s); printf("static union libparser_sentence sentence_%zu_%zu = {.rule = {" ".type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"%s\"" "}};\n", - nrule_names, index, node->token->s); + nrule_names, index, + node->token->s); + break; } free(node->token); @@ -423,55 +453,85 @@ emit_and_free_sentence(struct node *node, size_t *indexp) static struct node * order_sentences(struct node *node) { - struct node *tail = NULL, **head = &tail; - struct node *stack = NULL; + struct node *tail = NULL, **head = &tail; /* output queue */ + struct node *stack = NULL; /* reordering stack */ struct node *next, *prev; + /* Reorder symbols from infix order to postfix order */ for (; node; node = next) { next = node->next; - if (node->token->s[0] == '(' || node->token->s[0] == '[' || node->token->s[0] == '{') { - node->data = order_sentences(node->data); - *head = node; - head = &node->next; - } else if (node->token->s[0] == '|' || node->token->s[0] == ',') { + + switch (node->token->s[0]) { + case '|': + case ',': again_operators: if (!stack) { - node->next = stack; - stack = node; + /* if the queue is empty, we have to place our node on it */ + goto push_to_stack; } else if (node->token->s[0] == ',' && stack->token->s[0] == '|') { - node->next = stack; - stack = node; + /* Likewise if we have a concatenation while the have + * an alternation on the stack, since concatenation have + * higher precedence */ + goto push_to_stack; } else if (node->token->s[0] == stack->token->s[0]) { + /* If however our operator is the same as on the top + * of the stack (meaning they have the same precedence), + * we pop the top of the stack onto the queue, and push + * our operator to the stack */ *head = stack; head = &stack->next; stack = stack->next; + push_to_stack: node->next = stack; stack = node; } else { + /* Otherwise, that is if we have an alternation but + * the top of the stack is an concatenation (that is + * our operator has lower precedence), we pop the + * top of the stack into the queue, and compare + * the operator again against the stack */ *head = stack; head = &stack->next; stack = stack->next; goto again_operators; } - } else { - if (node->token->s[0] == '!') - node->data = order_sentences(node->data); + break; + + case '(': + case '[': + case '{': + case '!': + /* Everything else we immediately put into the queue, + * but for brackets and unary operators, we simply + * use recursion to order inner sentences */ + node->data = order_sentences(node->data); + /* fall through */ + default: *head = node; head = &node->next; + break; } } + /* Anything left on the stack is popped into the queue */ for (; stack; stack = next) { next = stack->next; *head = stack; head = &stack->next; } + /* Properly terminate the queue */ *head = NULL; + /* Convert the postfix notation into a tree */ for (stack = tail, prev = NULL; stack; prev = stack, stack = next) { + /* Reverse the queue (we need to look + * backwards, but the list is singly linked) */ next = stack->next; stack->next = prev; + /* But when a binary operator is encountered, + * consume the two tokens in front of it in + * queue, and add them as it's operands */ if (stack->token->s[0] == '|' || stack->token->s[0] == ',') { prev = stack->next->next->next; stack->data = stack->next->next; @@ -493,10 +553,11 @@ emit_and_free_rule(struct node *rule) rule->data = order_sentences(rule->data); emit_and_free_sentence(rule->data, &index); - printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n", nrule_names, rule->token->s, nrule_names); + printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n", + nrule_names, rule->token->s, nrule_names); if (nrule_names == rule_names_size) - rule_names = ereallocarray(rule_names, rule_names_size += 16, sizeof(*rule_names)); + rule_names = ereallocarray(rule_names, rule_names_size += 16u, sizeof(*rule_names)); rule_names[nrule_names++] = estrdup(rule->token->s); free(rule->token); free(rule); @@ -553,14 +614,17 @@ main(int argc, char *argv[]) i = 0; again: for (; tokens[i]; i++) { - if (tokens[i + 1] && tokens[i]->s[0] == '(' && tokens[i + 1]->s[0] == '*') { + /* Remove comments, they cannot be nested, but + * they can contain strings (recognised during + * tokenisation) which may contain comment-tokens */ + if (tokens[i + 1u] && tokens[i]->s[0] == '(' && tokens[i + 1u]->s[0] == '*') { free(tokens[i]); - free(tokens[i + 1]); - for (i += 2; tokens[i] && tokens[i + 1]; i++) { - if (tokens[i]->s[0] == '*' && tokens[i + 1]->s[0] == ')') { + free(tokens[i + 1u]); + for (i += 2u; tokens[i] && tokens[i + 1u]; i++) { + if (tokens[i]->s[0] == '*' && tokens[i + 1u]->s[0] == ')') { free(tokens[i]); - free(tokens[i + 1]); - i += 2; + free(tokens[i + 1u]); + i += 2u; goto again; } free(tokens[i]); @@ -568,19 +632,29 @@ again: eprintf("%s: premature end of file\n", argv0); } + /* Also remove any whitespace (the tokeniser + * simple and does not recognise mulltisymbol + * tokens (that is apart form strings and + * identifiers) so it cannot ignore whitespace. */ + if (isspace(tokens[i]->s[0])) { + free(tokens[i]); + continue; + } + + /* For the sake of code readability, identify + * the token type */ if (tokens[i]->s[0] == '"') { type = STRING; } else if (isidentifier(tokens[i]->s[0])) { type = IDENTIFIER; - } else if (isspace(tokens[i]->s[0])) { - free(tokens[i]); - continue; } else { type = SYMBOL; } switch (state) { case NEW_RULE: + /* At the beginning of a new rule (which is the initial state), + * we expect an identifier for the rule */ if (type != IDENTIFIER) { eprintf("%s: expected an identifier on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); @@ -588,7 +662,10 @@ again: stack = calloc(1, sizeof(*stack)); stack->token = tokens[i]; stack->head = &stack->data; + /* and then we expect an equals sign */ state = EXPECT_EQUALS; + /* but we have to make sure the name isn't already + * being used for another rule */ for (j = 0; j < nrule_names; j++) { if (!strcmp(rule_names[j], tokens[i]->s)) { eprintf("%s: duplicate definition of \"%s\" on line %zu at column %zu (character %zu)\n", @@ -598,19 +675,40 @@ again: break; case EXPECT_EQUALS: + /* After the identifier for the rule, we make sure + * we get and equals sign */ if (type != SYMBOL || tokens[i]->s[0] != '=') { eprintf("%s: expected an '=' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } free(tokens[i]); + /* and then we expect to get an operand as the + * next token */ state = EXPECT_OPERAND; break; case EXPECT_OPERAND: + /* An operand can be a bracketed sentence, + * the value range, an exception, a rejection + * or an identifier (an embedded rule) */ if (type == SYMBOL) { if (tokens[i]->s[0] == '(' || tokens[i]->s[0] == '[' || tokens[i]->s[0] == '{') { + /* If the operand is a bracketed sentence, + * we push the opening bracket into the + * stack to keep track of required matching + * closing statements; and we still expect + * the next token to be an operand */ + goto push_stack; + } else if (tokens[i]->s[0] == '!') { + /* Likewise for rejections (it is added to + * the stack but it is an unary operator + * so no matching symbol will be expected) */ goto push_stack; } else if (tokens[i]->s[0] == '<') { + /* Likewise for value ranges, but we expect + * the next token to be a numerical value, + * which will represent the range's lower + * bound */ state = EXPECT_RANGE_LOW; push_stack: parent_node = stack; @@ -619,15 +717,21 @@ again: stack->token = tokens[i]; stack->head = &stack->data; } else if (tokens[i]->s[0] == '-') { + /* If the operand is an exception, it added + * to the sentence and, and the next token + * is expected to be a binary operator or + * the end of the sentence */ goto add; - } else if (tokens[i]->s[0] == '!') { - goto push_stack; } else { stray: eprintf("%s: stray '%c' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->s[0], tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } } else { + /* If the symbol is an embedded rule, it + * is added to the sentence and the next + * token is expected to be a binary + * operator or the end of the sentence */ add: state = EXPECT_OPERATOR; goto add_singleton; @@ -635,15 +739,21 @@ again: break; case EXPECT_OPERATOR: + /* When we get an binary operator, or the end + * of a sentence, we have to pop out all unary + * operators (rejects) from the stack */ while (stack->token->s[0] == '!') { *stack->parent->head = stack; stack->parent->head = &stack->next; stack = stack->parent; } if (tokens[i]->s[0] == '|' || tokens[i]->s[0] == ',') { + /* If we have an binary operator, we add + * it to the sentence and expect the next + * token to be an operand */ state = EXPECT_OPERAND; add_singleton: - node = calloc(1, sizeof(*node)); + node = calloc(1u, sizeof(*node)); node->token = tokens[i]; *stack->head = node; stack->head = &node->next; @@ -656,6 +766,11 @@ again: goto stray; goto pop; } else if (tokens[i]->s[0] == '}') { + /* If we have a closing bracket we verify + * that it matching the top of the stack, + * we then pop the top of the stack + * (the bracketed sentence) into the end + * of the sentence it appeared in */ if (stack->token->s[0] != '{') goto stray; pop: @@ -664,6 +779,11 @@ again: stack->parent->head = &stack->next; stack = stack->parent; } else if (tokens[i]->s[0] == ';') { + /* If are are have a semicolon, we are + * at the end of the rule, and expect + * the stack to be free from brackets + * (the stack will however contain the + * rule, which is emitted and deallocated) */ if (stack->token->s[0] == ')' || stack->token->s[0] == ']' || stack->token->s[0] == '}') eprintf("%s: premature end of rule on line %zu at column %zu (character %zu): " "'%s' on line %zu at column %zu (character %zu) not closed\n", @@ -683,10 +803,15 @@ again: break; case EXPECT_RANGE_LOW: + /* After the "<" that beginnins a value range, + * there is numberal value, which is followed + * by a comma, */ state = EXPECT_RANGE_DELIM; goto add_range_bound; case EXPECT_RANGE_DELIM: + /* the comma is followed by another value: + * the upper boundary, */ if (type != SYMBOL || tokens[i]->s[0] != ',') { eprintf("%s: expected an ',' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); @@ -696,12 +821,30 @@ again: break; case EXPECT_RANGE_HIGH: + /* which is followed by a ">", */ state = EXPECT_RANGE_CLOSE; + goto add_range_bound; + + case EXPECT_RANGE_CLOSE: + /* after the ">", a binary operator + * or the end of the sentence is expected */ + if (type != SYMBOL || tokens[i]->s[0] != '>') { + eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n", + argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); + } + state = EXPECT_OPERATOR; + /* Once we have the ">", we pop the "<" + * from the stack and add the value range + * to the the sentence. */ + goto pop; + add_range_bound: if (type == IDENTIFIER) { + /* A value can be specified in hexadecimal format + * (prefixed with ("0x" or "0X") or in decimal format */ val = 0; if (tokens[i]->s[0] == '0' && (tokens[i]->s[1] == 'x' || tokens[i]->s[1] == 'X')) { - for (j = 2; isxdigit(tokens[i]->s[j]) && val < 255; j++) + for (j = 2u; isxdigit(tokens[i]->s[j]) && val < 255; j++) val = (val * 16) | ((tokens[i]->s[j] & 15) + (tokens[i]->s[j] > '9' ? 9 : 0)); } else { for (j = 0; isdigit(tokens[i]->s[j]) && val < 255; j++) @@ -712,11 +855,12 @@ again: tokens[i]->s[0] = (char)val; tokens[i]->s[1] = '\0'; } else if (type == STRING) { - /* tokens[i]->s[0] is '"' */ - if (!tokens[i]->s[1]) { + /* or as a single character string */ + if (!tokens[i]->s[1]) { /* tokens[i]->s[0] is '"' */ goto invalid_range; } else if (tokens[i]->s[1] == '\\') { - j = 3; + /* The character can be specified using an escape sequence */ + j = 3u; if (tokens[i]->s[2] == '"') { tokens[i]->s[1] = '"'; } else if (tokens[i]->s[2] == '\'') { @@ -739,10 +883,10 @@ again: val = ((tokens[i]->s[3] & 15) + (tokens[i]->s[3] > '9' ? 9 : 0)) * 16; val |= (tokens[i]->s[4] & 15) + (tokens[i]->s[4] > '9' ? 9 : 0); tokens[i]->s[0] = (char)val; - j = 5; + j = 5u; } else if ('0' <= tokens[i]->s[2] && tokens[i]->s[2] <= '7') { val = 0; - for (j = 2; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++) + for (j = 2u; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++) val = (val * 8) | (tokens[i]->s[j] & 15); if (val > 255) goto invalid_range; @@ -767,14 +911,6 @@ again: } goto add_singleton; - case EXPECT_RANGE_CLOSE: - if (type != SYMBOL || tokens[i]->s[0] != '>') { - eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n", - argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); - } - state = EXPECT_OPERATOR; - goto pop; - default: abort(); } @@ -783,6 +919,7 @@ again: if (state != NEW_RULE) eprintf("%s: premature end of file\n", argv0); + /* Detect unused and undefined symbols */ err = 0; qsort(rule_names, nrule_names, sizeof(*rule_names), strpcmp); qsort(want_rules, nwant_rules, sizeof(*want_rules), strpcmp); @@ -790,39 +927,41 @@ again: cmp = strcmp(rule_names[i], want_rules[j]); if (!cmp) { i++; - for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++); + for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++); } else if (!strcmp(rule_names[i], argv[0])) { i++; } else if (cmp < 0) { - eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); + weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); i++; err = 1; } else { - eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); + weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++); err = 1; } } for (; i < nrule_names; i++) { if (strcmp(rule_names[i], argv[0])) { - eprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); + weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); err = 1; } } while (j < nwant_rules) { - eprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); - for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++); + weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); + for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++); err = 1; } if (err) exit(1); + /* Verify that the main rule actually exist */ for (i = 0; i < nrule_names; i++) if (!strcmp(rule_names[i], argv[0])) goto found_main; eprintf("%s: specified main rule (\"%s\") was not defined\n", argv0, argv[0]); - found_main: + + /* Emit predefined rules */ printf("static union libparser_sentence noeof_sentence = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n"); printf("static struct libparser_rule noeof_rule = {\"@noeof\", &noeof_sentence};\n"); printf("static union libparser_sentence noeof_rule_sentence = {.rule = " @@ -850,6 +989,7 @@ found_main: "}};\n"); printf("static struct libparser_rule main_rule = {\"@start\", &main_sentence};\n"); + /* Emit the rule table */ printf("const struct libparser_rule *const libparser_rule_table[] = {\n"); for (i = 0; i < nrule_names; i++) { printf("\t&rule_%zu,\n", i); |
