/* See LICENSE file for copyright and license details. */ #include #include #include #include #include #include #include #include static const char *argv0 = "libparser-generate"; static void usage(void) { fprintf(stderr, "usage: %s main-rule\n", argv0); exit(1); } #define weprintf(...) (fprintf(stderr, __VA_ARGS__)) #define eprintf(...) (weprintf(__VA_ARGS__), exit(1)) struct token { /* text position in file { */ size_t lineno; size_t column; size_t character; /* } */ char s[]; /* the text */ }; struct node { struct token *token; /* node text */ struct node *parent; /* parent node in tree */ struct node *next; /* next element in list */ struct node *data; /* beginning of subsentence */ struct node **head; /* end of subsentence */ }; /* declared rules, used to detect duplicates, * and compared with `want_rules` to detected * unused rules */ static char **rule_names = NULL; static size_t nrule_names = 0; static size_t rule_names_size = 0; /* used rules, compared with `rule_names` * to detect used but undefined rules */ static char **want_rules = NULL; static size_t nwant_rules = 0; static size_t want_rules_size = 0; static void * emalloc(size_t n) { void *ret = malloc(n); if (!ret) eprintf("%s: malloc %zu: %s\n", argv0, n, strerror(errno)); return ret; } static void * ecalloc(size_t n, size_t m) { void *ret = calloc(n, m); if (!ret) eprintf("%s: calloc %zu %zu: %s\n", argv0, n, m, strerror(errno)); return ret; } static void * erealloc(void *ptr, size_t n) { void *ret = realloc(ptr, n); if (!ret) eprintf("%s: realloc %p %zu: %s\n", argv0, ptr, n, strerror(errno)); return ret; } static void * ereallocarray(void *ptr, size_t n, size_t m) { void *ret; if (n && m > SIZE_MAX / n) eprintf("%s: realloc %p %zu*%zu: %s\n", argv0, ptr, n, m, strerror(EOVERFLOW)); ret = realloc(ptr, n * m); if (!ret) eprintf("%s: realloc %p %zu*%zu: %s\n", argv0, ptr, n, m, strerror(errno)); return ret; } static char * estrdup(char *s) { size_t n = strlen(s) + 1; char *ret = emalloc(n); memcpy(ret, s, n); return ret; } static int strpcmp(const void *av, const void *bv) { const char *const *a = av; const char *const *b = bv; return strcmp(*a, *b); } static int isidentifier(char c) { return isalnum(c) || !isascii(c) || c == '_'; } static int check_utf8(char *buf, size_t *ip, size_t len) { size_t req, i; uint32_t cp; if ((buf[*ip] & 0xE0) == 0xC0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xC0); req = 2u; } else if ((buf[*ip] & 0xF0) == 0xE0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xE0); req = 3u; } else if ((buf[*ip] & 0xF8) == 0xF0) { cp = (uint32_t)(unsigned char)(buf[*ip] ^ 0xF0); req = 4u; } else { return 0; } if (req > len - *ip) return 0; for (i = 1; i < req; i++) { cp <<= 6; if ((buf[*ip + i] & 0xC0) != 0x80) return 0; cp |= (uint32_t)(unsigned char)(buf[*ip + i] ^ 0x80); } *ip += req; if ((cp & UINT32_C(0xFFF8000)) == UINT32_C(0xD8000)) return 0; if (cp < (uint32_t)1 << (7 + 0 * 6)) return 0; if (cp < (uint32_t)1 << (5 + 1 * 6)) return req == 2u; if (cp < (uint32_t)1 << (4 + 2 * 6)) return req == 3u; if (cp <= UINT32_C(0x10FFFF)) return req == 4u; return 0; } static char * readall_and_validate(int fd, const char *fname) { size_t lineno = 1, column = 0, character = 0; size_t size = 0, len = 0, i; char *buf = NULL; ssize_t r; for (;; len += (size_t)r) { if (len == size) buf = erealloc(buf, size += 1024); r = read(fd, &buf[len], size - len); if (r <= 0) { if (!r) break; eprintf("%s: read %s: %s\n", argv0, fname, strerror(errno)); } } for (i = 0; i < len;) { if (buf[i] == '\n') { lineno += 1u; column = 0; character = 0; } else if (buf[i] == '\t') { column += 8u - column % 8u; character += 1u; } else if (buf[i] == '\r') { eprintf("%s: %s contains a CR character on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } else if ((0 < buf[i] && buf[i] < ' ') || buf[i] == 0x7F) { eprintf("%s: %s contains a illegal character on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } else if (buf[i] == '\0') { eprintf("%s: %s contains a NUL byte on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } else if (!(buf[i] & 0x80)) { character += 1u; column += 1u; } else if ((buf[i] & 0xC0) == 0x80) { eprintf("%s: %s contains a illegal byte on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } else { if (!check_utf8(buf, &i, len)) { eprintf("%s: %s contains a illegal byte sequence on line %zu at column %zu (character %zu)\n", argv0, fname, lineno, column, character); } character += 1u; column += 1u; continue; } i++; } buf = erealloc(buf, len + 1u); buf[len] = '\0'; return buf; } static struct token ** tokenise(const char *data) { enum { NEW_TOKEN, IDENTIFIER, STRING, STRING_ESC, SPACE } state = NEW_TOKEN; size_t lineno = 1, column = 0, character = 0; size_t token_lineno = 0, token_column = 0, token_character = 0; struct token **tokens = NULL; char *token = NULL; size_t i, ntokens = 0, tokens_size = 0; size_t token_len = 0, token_size = 0; for (i = 0; data[i]; i++) { again: switch (state) { case NEW_TOKEN: token_lineno = lineno; token_column = column; token_character = character; if (token_len == token_size) token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; if (isidentifier(data[i])) { state = IDENTIFIER; } else if (isspace(data[i])) { state = SPACE; } else if (data[i] == '"') { state = STRING; if (data[i + 1] == '"') { eprintf("%s: empty string token on line %zu at column %zu (character %zu)\n", argv0, lineno, column, character); } } else { add_token: if (token_len == token_size) token = erealloc(token, token_size += 16u); token[token_len++] = '\0'; if (ntokens == tokens_size) tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens)); tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len); tokens[ntokens]->lineno = token_lineno; tokens[ntokens]->column = token_column; tokens[ntokens]->character = token_character; stpcpy(tokens[ntokens++]->s, token); token_len = 0; state = NEW_TOKEN; } break; case IDENTIFIER: if (isidentifier(data[i]) || data[i] == '-') { add_char: if (token_len == token_size) token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; } else { add_token_and_do_again: if (token_len == token_size) token = erealloc(token, token_size += 16u); token[token_len++] = '\0'; if (ntokens == tokens_size) tokens = ereallocarray(tokens, tokens_size += 16u, sizeof(*tokens)); tokens[ntokens] = emalloc(offsetof(struct token, s) + token_len); tokens[ntokens]->lineno = token_lineno; tokens[ntokens]->column = token_column; tokens[ntokens]->character = token_character; stpcpy(tokens[ntokens++]->s, token); token_len = 0; state = NEW_TOKEN; goto again; } break; case STRING: if (data[i] == '\n' || data[i] == '\t') { eprintf("%s: illegal whitespace on line %zu at column %zu (character %zu)\n", argv0, lineno, column, character); } else if (data[i] == '"') { goto add_token; } else if (data[i] == '\\') { state = STRING_ESC; goto add_char; } else { goto add_char; } break; case STRING_ESC: if (data[i] == '\n' || data[i] == '\t') { eprintf("%s: illegal whitespace on line %zu at column %zu (character %zu)\n", argv0, lineno, column, character); } if (token_len == token_size) token = erealloc(token, token_size += 16u); token[token_len++] = data[i]; state = STRING; break; case SPACE: if (isspace(data[i])) goto add_char; else goto add_token_and_do_again; break; default: abort(); }; if (data[i] == '\n') { lineno += 1u; column = 0; character = 0; } else if (data[i] == '\t') { column += 8u - column % 8u; character += 1u; } else { character += (size_t)((data[i] & 0xC0) != 0x80); column += 1u; } } if (state != NEW_TOKEN && state != SPACE) eprintf("%s: premature end of file\n", argv0); free(token); tokens = ereallocarray(tokens, ntokens + 1u, sizeof(*tokens)); tokens[ntokens] = NULL; return tokens; } static void emit_and_free_sentence(struct node *node, size_t *indexp) { size_t index = (*indexp)++, left, right; struct node *next, *low, *high; const char *type; for (; node->token->s[0] == '('; node = next) { next = node->data; free(node->token); free(node); } switch (node->token->s[0]) { case '[': type = "OPTIONAL"; goto unary; case '{': type = "REPEATED"; goto unary; case '!': type = "REJECTION"; unary: emit_and_free_sentence(node->data, indexp); printf("static union libparser_sentence sentence_%zu_%zu = {.unary = {" ".type = LIBPARSER_SENTENCE_TYPE_%s, .sentence = &sentence_%zu_%zu" "}};\n", nrule_names, index, type, nrule_names, index + 1u); break; case '<': low = node->data; high = node->data->next; if ((unsigned char)low->token->s[0] > (unsigned char)high->token->s[0]) { eprintf("%s: lower character range bound on line %zu at column %zu (character %zu) " "is greater than upper bound on line %zu at column %zu (character %zu)\n", argv0, low->token->lineno, low->token->column, low->token->character, high->token->lineno, high->token->column, high->token->character); } printf("static union libparser_sentence sentence_%zu_%zu = {.char_range = {" ".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhu, .high = %hhu" "}};\n", nrule_names, index, (unsigned char)low->token->s[0], (unsigned char)high->token->s[0]); free(low->token); free(high->token); free(low); free(high); break; case '|': type = "ALTERNATION"; goto binary; case ',': type = "CONCATENATION"; binary: right = *indexp; emit_and_free_sentence(node->data->next, indexp); left = *indexp; emit_and_free_sentence(node->data, indexp); printf("static union libparser_sentence sentence_%zu_%zu = {.binary = {" ".type = LIBPARSER_SENTENCE_TYPE_%s, " ".left = &sentence_%zu_%zu, .right = &sentence_%zu_%zu" "}};\n", nrule_names, index, type, nrule_names, left, nrule_names, right); break; case '"': printf("static union libparser_sentence sentence_%zu_%zu = {.string = {" ".type = LIBPARSER_SENTENCE_TYPE_STRING, " ".string = %s\", .length = sizeof(%s\") - 1U" "}};\n", nrule_names, index, node->token->s, node->token->s); break; case '-': printf("static union libparser_sentence sentence_%zu_%zu = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n", nrule_names, index); break; default: if (nwant_rules == want_rules_size) want_rules = ereallocarray(want_rules, want_rules_size += 16u, sizeof(*want_rules)); want_rules[nwant_rules++] = estrdup(node->token->s); printf("static union libparser_sentence sentence_%zu_%zu = {.rule = {" ".type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"%s\"" "}};\n", nrule_names, index, node->token->s); break; } free(node->token); free(node); } static struct node * order_sentences(struct node *node) { struct node *tail = NULL, **head = &tail; /* output queue */ struct node *stack = NULL; /* reordering stack */ struct node *next, *prev; /* Reorder symbols from infix order to postfix order */ for (; node; node = next) { next = node->next; switch (node->token->s[0]) { case '|': case ',': again_operators: if (!stack) { /* if the queue is empty, we have to place our node on it */ goto push_to_stack; } else if (node->token->s[0] == ',' && stack->token->s[0] == '|') { /* Likewise if we have a concatenation while the have * an alternation on the stack, since concatenation have * higher precedence */ goto push_to_stack; } else if (node->token->s[0] == stack->token->s[0]) { /* If however our operator is the same as on the top * of the stack (meaning they have the same precedence), * we pop the top of the stack onto the queue, and push * our operator to the stack */ *head = stack; head = &stack->next; stack = stack->next; push_to_stack: node->next = stack; stack = node; } else { /* Otherwise, that is if we have an alternation but * the top of the stack is an concatenation (that is * our operator has lower precedence), we pop the * top of the stack into the queue, and compare * the operator again against the stack */ *head = stack; head = &stack->next; stack = stack->next; goto again_operators; } break; case '(': case '[': case '{': case '!': /* Everything else we immediately put into the queue, * but for brackets and unary operators, we simply * use recursion to order inner sentences */ node->data = order_sentences(node->data); /* fall through */ default: *head = node; head = &node->next; break; } } /* Anything left on the stack is popped into the queue */ for (; stack; stack = next) { next = stack->next; *head = stack; head = &stack->next; } /* Properly terminate the queue */ *head = NULL; /* Convert the postfix notation into a tree */ for (stack = tail, prev = NULL; stack; prev = stack, stack = next) { /* Reverse the queue (we need to look * backwards, but the list is singly linked) */ next = stack->next; stack->next = prev; /* But when a binary operator is encountered, * consume the two tokens in front of it in * queue, and add them as it's operands */ if (stack->token->s[0] == '|' || stack->token->s[0] == ',') { prev = stack->next->next->next; stack->data = stack->next->next; stack->data->next = stack->next; stack->next->next = NULL; /* for debugging */ stack->next = prev; } } return prev; } static void emit_and_free_rule(struct node *rule) { size_t index = 0; rule->data = order_sentences(rule->data); emit_and_free_sentence(rule->data, &index); printf("static struct libparser_rule rule_%zu = {\"%s\", &sentence_%zu_0};\n", nrule_names, rule->token->s, nrule_names); if (nrule_names == rule_names_size) rule_names = ereallocarray(rule_names, rule_names_size += 16u, sizeof(*rule_names)); rule_names[nrule_names++] = estrdup(rule->token->s); free(rule->token); free(rule); } int main(int argc, char *argv[]) { enum { IDENTIFIER, STRING, SYMBOL, } type; enum { NEW_RULE, EXPECT_EQUALS, EXPECT_OPERAND, EXPECT_OPERATOR, EXPECT_RANGE_LOW, EXPECT_RANGE_DELIM, EXPECT_RANGE_HIGH, EXPECT_RANGE_CLOSE } state = NEW_RULE; struct node *stack = NULL, *parent_node, *node; char *data; struct token **tokens; size_t i, j; int cmp, err, val; if (argc) { argv0 = *argv++; argc--; } if (argc && argv[0][0] == '-') { if (argv[0][1] != '-' || argv[0][2]) usage(); argv++; argc--; } if (argc != 1 || !isidentifier(argv[0][0])) usage(); for (i = 0; argv[0][i]; i++) if (!isidentifier(argv[0][i]) && argv[0][i] != '-') usage(); data = readall_and_validate(STDIN_FILENO, ""); tokens = tokenise(data); free(data); printf("#include \n"); i = 0; again: for (; tokens[i]; i++) { /* Remove comments, they cannot be nested, but * they can contain strings (recognised during * tokenisation) which may contain comment-tokens */ if (tokens[i + 1u] && tokens[i]->s[0] == '(' && tokens[i + 1u]->s[0] == '*') { free(tokens[i]); free(tokens[i + 1u]); for (i += 2u; tokens[i] && tokens[i + 1u]; i++) { if (tokens[i]->s[0] == '*' && tokens[i + 1u]->s[0] == ')') { free(tokens[i]); free(tokens[i + 1u]); i += 2u; goto again; } free(tokens[i]); } eprintf("%s: premature end of file\n", argv0); } /* Also remove any whitespace (the tokeniser * simple and does not recognise mulltisymbol * tokens (that is apart form strings and * identifiers) so it cannot ignore whitespace. */ if (isspace(tokens[i]->s[0])) { free(tokens[i]); continue; } /* For the sake of code readability, identify * the token type */ if (tokens[i]->s[0] == '"') { type = STRING; } else if (isidentifier(tokens[i]->s[0])) { type = IDENTIFIER; } else { type = SYMBOL; } switch (state) { case NEW_RULE: /* At the beginning of a new rule (which is the initial state), * we expect an identifier for the rule */ if (type != IDENTIFIER) { eprintf("%s: expected an identifier on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } stack = calloc(1, sizeof(*stack)); stack->token = tokens[i]; stack->head = &stack->data; /* and then we expect an equals sign */ state = EXPECT_EQUALS; /* but we have to make sure the name isn't already * being used for another rule */ for (j = 0; j < nrule_names; j++) { if (!strcmp(rule_names[j], tokens[i]->s)) { eprintf("%s: duplicate definition of \"%s\" on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->s, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } } break; case EXPECT_EQUALS: /* After the identifier for the rule, we make sure * we get and equals sign */ if (type != SYMBOL || tokens[i]->s[0] != '=') { eprintf("%s: expected an '=' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } free(tokens[i]); /* and then we expect to get an operand as the * next token */ state = EXPECT_OPERAND; break; case EXPECT_OPERAND: /* An operand can be a bracketed sentence, * the value range, an exception, a rejection * or an identifier (an embedded rule) */ if (type == SYMBOL) { if (tokens[i]->s[0] == '(' || tokens[i]->s[0] == '[' || tokens[i]->s[0] == '{') { /* If the operand is a bracketed sentence, * we push the opening bracket into the * stack to keep track of required matching * closing statements; and we still expect * the next token to be an operand */ goto push_stack; } else if (tokens[i]->s[0] == '!') { /* Likewise for rejections (it is added to * the stack but it is an unary operator * so no matching symbol will be expected) */ goto push_stack; } else if (tokens[i]->s[0] == '<') { /* Likewise for value ranges, but we expect * the next token to be a numerical value, * which will represent the range's lower * bound */ state = EXPECT_RANGE_LOW; push_stack: parent_node = stack; stack = ecalloc(1, sizeof(*stack)); stack->parent = parent_node; stack->token = tokens[i]; stack->head = &stack->data; } else if (tokens[i]->s[0] == '-') { /* If the operand is an exception, it added * to the sentence and, and the next token * is expected to be a binary operator or * the end of the sentence */ goto add; } else { stray: eprintf("%s: stray '%c' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->s[0], tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } } else { /* If the symbol is an embedded rule, it * is added to the sentence and the next * token is expected to be a binary * operator or the end of the sentence */ add: state = EXPECT_OPERATOR; goto add_singleton; } break; case EXPECT_OPERATOR: /* When we get an binary operator, or the end * of a sentence, we have to pop out all unary * operators (rejects) from the stack */ while (stack->token->s[0] == '!') { *stack->parent->head = stack; stack->parent->head = &stack->next; stack = stack->parent; } if (tokens[i]->s[0] == '|' || tokens[i]->s[0] == ',') { /* If we have an binary operator, we add * it to the sentence and expect the next * token to be an operand */ state = EXPECT_OPERAND; add_singleton: node = calloc(1u, sizeof(*node)); node->token = tokens[i]; *stack->head = node; stack->head = &node->next; } else if (tokens[i]->s[0] == ')') { if (stack->token->s[0] != '(') goto stray; goto pop; } else if (tokens[i]->s[0] == ']') { if (stack->token->s[0] != '[') goto stray; goto pop; } else if (tokens[i]->s[0] == '}') { /* If we have a closing bracket we verify * that it matching the top of the stack, * we then pop the top of the stack * (the bracketed sentence) into the end * of the sentence it appeared in */ if (stack->token->s[0] != '{') goto stray; pop: free(tokens[i]); *stack->parent->head = stack; stack->parent->head = &stack->next; stack = stack->parent; } else if (tokens[i]->s[0] == ';') { /* If are are have a semicolon, we are * at the end of the rule, and expect * the stack to be free from brackets * (the stack will however contain the * rule, which is emitted and deallocated) */ if (stack->token->s[0] == ')' || stack->token->s[0] == ']' || stack->token->s[0] == '}') eprintf("%s: premature end of rule on line %zu at column %zu (character %zu): " "'%s' on line %zu at column %zu (character %zu) not closed\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character, stack->token->s, stack->token->lineno, stack->token->column, stack->token->character); emit_and_free_rule(stack); free(tokens[i]); state = NEW_RULE; } else { eprintf("%s: expected a '|', ',', or '%c' on line %zu at column %zu (character %zu)\n", argv0, stack->token->s[0] == '(' ? ')' : stack->token->s[0] == '[' ? ']' : stack->token->s[0] == '{' ? '}' : ';', tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } break; case EXPECT_RANGE_LOW: /* After the "<" that beginnins a value range, * there is numberal value, which is followed * by a comma, */ state = EXPECT_RANGE_DELIM; goto add_range_bound; case EXPECT_RANGE_DELIM: /* the comma is followed by another value: * the upper boundary, */ if (type != SYMBOL || tokens[i]->s[0] != ',') { eprintf("%s: expected an ',' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } free(tokens[i]); state = EXPECT_RANGE_HIGH; break; case EXPECT_RANGE_HIGH: /* which is followed by a ">", */ state = EXPECT_RANGE_CLOSE; goto add_range_bound; case EXPECT_RANGE_CLOSE: /* after the ">", a binary operator * or the end of the sentence is expected */ if (type != SYMBOL || tokens[i]->s[0] != '>') { eprintf("%s: expected an '>' on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } state = EXPECT_OPERATOR; /* Once we have the ">", we pop the "<" * from the stack and add the value range * to the the sentence. */ goto pop; add_range_bound: if (type == IDENTIFIER) { /* A value can be specified in hexadecimal format * (prefixed with ("0x" or "0X") or in decimal format */ val = 0; if (tokens[i]->s[0] == '0' && (tokens[i]->s[1] == 'x' || tokens[i]->s[1] == 'X')) { for (j = 2u; isxdigit(tokens[i]->s[j]) && val < 255; j++) val = (val * 16) | ((tokens[i]->s[j] & 15) + (tokens[i]->s[j] > '9' ? 9 : 0)); } else { for (j = 0; isdigit(tokens[i]->s[j]) && val < 255; j++) val = val * 10 + (tokens[i]->s[j] & 15); } if (val > 255 || tokens[i]->s[j]) goto invalid_range; tokens[i]->s[0] = (char)val; tokens[i]->s[1] = '\0'; } else if (type == STRING) { /* or as a single character string */ if (!tokens[i]->s[1]) { /* tokens[i]->s[0] is '"' */ goto invalid_range; } else if (tokens[i]->s[1] == '\\') { /* The character can be specified using an escape sequence */ j = 3u; if (tokens[i]->s[2] == '"') { tokens[i]->s[1] = '"'; } else if (tokens[i]->s[2] == '\'') { tokens[i]->s[1] = '\''; } else if (tokens[i]->s[2] == '\\') { tokens[i]->s[1] = '\\'; } else if (tokens[i]->s[2] == 'a') { tokens[i]->s[1] = '\a'; } else if (tokens[i]->s[2] == 'b') { tokens[i]->s[1] = '\b'; } else if (tokens[i]->s[2] == 'f') { tokens[i]->s[1] = '\f'; } else if (tokens[i]->s[2] == 'n') { tokens[i]->s[1] = '\n'; } else if (tokens[i]->s[2] == 'r') { tokens[i]->s[1] = '\r'; } else if (tokens[i]->s[2] == 'v') { tokens[i]->s[1] = '\v'; } else if (tokens[i]->s[2] == 'x' && isxdigit(tokens[i]->s[3]) && isxdigit(tokens[i]->s[4])) { val = ((tokens[i]->s[3] & 15) + (tokens[i]->s[3] > '9' ? 9 : 0)) * 16; val |= (tokens[i]->s[4] & 15) + (tokens[i]->s[4] > '9' ? 9 : 0); tokens[i]->s[0] = (char)val; j = 5u; } else if ('0' <= tokens[i]->s[2] && tokens[i]->s[2] <= '7') { val = 0; for (j = 2u; '0' <= tokens[i]->s[j] && tokens[i]->s[j] <= '7' && val < 255; j++) val = (val * 8) | (tokens[i]->s[j] & 15); if (val > 255) goto invalid_range; tokens[i]->s[0] = (char)val; } else { goto invalid_range; } if (tokens[i]->s[j]) goto invalid_range; tokens[i]->s[1] = '\0'; } else if (tokens[i]->s[2]) { goto invalid_range; } else { tokens[i]->s[0] = tokens[i]->s[1]; tokens[i]->s[1] = '\0'; } } else { invalid_range: eprintf("%s: expected a [0, 255] integer or single byte string " "on line %zu at column %zu (character %zu)\n", argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character); } goto add_singleton; default: abort(); } } free(tokens); if (state != NEW_RULE) eprintf("%s: premature end of file\n", argv0); /* Detect unused and undefined symbols */ err = 0; qsort(rule_names, nrule_names, sizeof(*rule_names), strpcmp); qsort(want_rules, nwant_rules, sizeof(*want_rules), strpcmp); for (i = j = 0; i < nrule_names && j < nwant_rules;) { cmp = strcmp(rule_names[i], want_rules[j]); if (!cmp) { i++; for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++); } else if (!strcmp(rule_names[i], argv[0])) { i++; } else if (cmp < 0) { weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); i++; err = 1; } else { weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); for (j++; j < nwant_rules && !strcmp(want_rules[j - 1], want_rules[j]); j++); err = 1; } } for (; i < nrule_names; i++) { if (strcmp(rule_names[i], argv[0])) { weprintf("%s: rule \"%s\" defined but not used\n", argv0, rule_names[i]); err = 1; } } while (j < nwant_rules) { weprintf("%s: rule \"%s\" used but not defined\n", argv0, want_rules[j]); for (j++; j < nwant_rules && !strcmp(want_rules[j - 1u], want_rules[j]); j++); err = 1; } if (err) exit(1); /* Verify that the main rule actually exist */ for (i = 0; i < nrule_names; i++) if (!strcmp(rule_names[i], argv[0])) goto found_main; eprintf("%s: specified main rule (\"%s\") was not defined\n", argv0, argv[0]); found_main: /* Emit predefined rules */ printf("static union libparser_sentence noeof_sentence = {.type = LIBPARSER_SENTENCE_TYPE_EXCEPTION};\n"); printf("static struct libparser_rule noeof_rule = {\"@noeof\", &noeof_sentence};\n"); printf("static union libparser_sentence noeof_rule_sentence = {.rule = " "{.type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"@noeof\"}" "};\n"); printf("static union libparser_sentence eof_sentence = {.type = LIBPARSER_SENTENCE_TYPE_EOF};\n"); printf("static struct libparser_rule eof_rule = {\"@eof\", &eof_sentence};\n"); printf("static union libparser_sentence eof_rule_sentence = {.rule = " "{.type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"@eof\"}" "};\n"); printf("static union libparser_sentence end_sentence = {.binary = {" ".type = LIBPARSER_SENTENCE_TYPE_ALTERNATION, " ".left = &eof_rule_sentence, .right = &noeof_rule_sentence" "}};\n"); printf("static union libparser_sentence main_rule_sentence = {.rule = " "{.type = LIBPARSER_SENTENCE_TYPE_RULE, .rule = \"%s\"}" "};\n", argv[0]); printf("static union libparser_sentence main_sentence = {.binary = {" ".type = LIBPARSER_SENTENCE_TYPE_CONCATENATION, " ".left = &main_rule_sentence, .right = &end_sentence" "}};\n"); printf("static struct libparser_rule main_rule = {\"@start\", &main_sentence};\n"); /* Emit the rule table */ printf("const struct libparser_rule *const libparser_rule_table[] = {\n"); for (i = 0; i < nrule_names; i++) { printf("\t&rule_%zu,\n", i); free(rule_names[i]); } printf("\t&eof_rule,\n"); printf("\t&noeof_rule,\n"); printf("\t&main_rule,\n"); printf("\tNULL\n};\n"); free(rule_names); for (i = 0; i < nwant_rules; i++) free(want_rules[i]); free(want_rules); if (ferror(stdout) || fflush(stdout) || fclose(stdout)) eprintf("%s: printf: %s\n", argv0, strerror(errno)); return 0; }