diff options
Diffstat (limited to 'libparser.c')
-rw-r--r-- | libparser.c | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/libparser.c b/libparser.c new file mode 100644 index 0000000..9ee22a1 --- /dev/null +++ b/libparser.c @@ -0,0 +1,202 @@ +/* See LICENSE file for copyright and license details. */ +#include "libparser.h" +#include <libsimple.h> + + +struct context { + const struct libparser_rule *const *rules; + struct libparser_unit *cache; + const char *data; + size_t length; + size_t position; + int done; + int exception; +}; + + +static void +free_unit(struct libparser_unit *unit, struct context *ctx) +{ + struct libparser_unit *prev; + while (unit) { + free_unit(unit->in, ctx); + prev = unit; + unit = unit->next; + prev->next = ctx->cache; + ctx->cache = prev; + } +} + + +static struct libparser_unit * +try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx) +{ + struct libparser_unit *unit, *next; + struct libparser_unit **head; + unsigned char c; + size_t i; + + if (!ctx->cache) { + unit = ecalloc(1, sizeof(*unit)); + } else { + unit = ctx->cache; + ctx->cache = unit->next; + unit->in = unit->next = NULL; + } + + unit->rule = rule; + unit->start = ctx->position; + + switch (sentence->type) { + case LIBPARSER_SENTENCE_TYPE_CONCATENATION: + unit->in = try_match(NULL, sentence->binary.left, ctx); + if (!unit->in) + goto mismatch; + if (ctx->done) + break; + unit->in->next = try_match(NULL, sentence->binary.right, ctx); + if (!unit->in->next) { + free_unit(unit->in, ctx); + goto mismatch; + } + if (!unit->in->next->rule || unit->in->next->rule[0] == '_') { + unit->in->next->next = ctx->cache; + ctx->cache = unit->in->next; + unit->in->next = unit->in->next->in; + } + if (!unit->in->rule || unit->in->rule[0] == '_') { + next = unit->in->next; + unit->in->next = ctx->cache; + ctx->cache = unit->in; + unit->in = unit->in->in; + if (unit->in) { + for (head = &unit->in->next; *head; head = &(*head)->next); + *head = next; + } else { + unit->in = next; + } + } + break; + + case LIBPARSER_SENTENCE_TYPE_ALTERNATION: + unit->in = try_match(NULL, sentence->binary.left, ctx); + if (!unit->in) { + unit->in = try_match(NULL, sentence->binary.right, ctx); + if (!unit->in) + goto mismatch; + } + prone: + if (unit->in && (!unit->in->rule || unit->in->rule[0] == '_')) { + unit->in->next = ctx->cache; + ctx->cache = unit->in; + unit->in = unit->in->in; + } + break; + + case LIBPARSER_SENTENCE_TYPE_OPTIONAL: + unit->in = try_match(NULL, sentence->unary.sentence, ctx); + goto prone; + + case LIBPARSER_SENTENCE_TYPE_REPEATED: + head = &unit->in; + while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) { + if (!(*head)->rule || (*head)->rule[0] == '_') { + (*head)->next = ctx->cache; + ctx->cache = *head; + *head = (*head)->in; + while (*head) + head = &(*head)->next; + } else { + head = &(*head)->next; + } + if (ctx->done) + break; + } + break; + + case LIBPARSER_SENTENCE_TYPE_STRING: + if (sentence->string.length > ctx->length - ctx->position) + goto mismatch; + if (memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length)) + goto mismatch; + ctx->position += sentence->string.length; + break; + + case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE: + if (ctx->position == ctx->length) + goto mismatch; + c = ((const unsigned char *)ctx->data)[ctx->position]; + if (sentence->char_range.low > c || c > sentence->char_range.high) + goto mismatch; + ctx->position += 1; + break; + + case LIBPARSER_SENTENCE_TYPE_RULE: + for (i = 0; ctx->rules[i]; i++) + if (!strcmp(ctx->rules[i]->name, sentence->rule.rule)) + break; + if (!ctx->rules[i]) + abort(); + unit->in = try_match(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx); + if (!unit->in) + goto mismatch; + goto prone; + + case LIBPARSER_SENTENCE_TYPE_EXCEPTION: + ctx->done = 1; + ctx->exception = 1; + break; + + case LIBPARSER_SENTENCE_TYPE_EOF: + if (ctx->position != ctx->length) + goto mismatch; + ctx->done = 1; + break; + + default: + abort(); + } + + unit->end = ctx->position; + return unit; + +mismatch: + ctx->position = unit->start; + unit->next = ctx->cache; + ctx->cache = unit; + return NULL; +} + + +struct libparser_unit * +libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, int *exceptionp) +{ + struct libparser_unit *ret, *t; + struct context ctx; + size_t i; + + ctx.rules = rules; + ctx.cache = NULL; + ctx.data = data; + ctx.length = length; + ctx.position = 0; + ctx.done = 0; + ctx.exception = 0; + + for (i = 0; rules[i]; i++) + if (!strcmp(rules[i]->name, "@start")) + break; + if (!rules[i]) + abort(); + + ret = try_match(rules[i]->name, rules[i]->sentence, &ctx); + *exceptionp = ctx.exception; + + while (ctx.cache) { + t = ctx.cache; + ctx.cache = t->next; + free(t); + } + + return ret; +} |