/* See LICENSE file for copyright and license details. */ #include "libparser.h" #include #include #define IS_HIDDEN(RULE) (!(RULE) || (RULE)[0] == '_') /* NULL is used when evaluating a subsentence, * and rule with the prefix "_" are embedded * as subsentences */ struct context { const struct libparser_rule *const *rules; /* rule table */ struct libparser_unit *cache; /* memory allocation cache */ const char *data; /* text being parsed */ size_t length; /* length of text */ size_t position; /* current position in the text */ char done; /* end reached or .exception or .error set */ char exception; /* exception-statement reached */ char error; /* error encountered */ }; /** * Recursively place move a unit into * the memory allocation cache * * @param unit Unit to place in the cache * @param ctx Parsing context (holds the cache) */ static void free_unit(struct libparser_unit *unit, struct context *ctx) { struct libparser_unit *prev; while (unit) { free_unit(unit->in, ctx); prev = unit; unit = unit->next; prev->next = ctx->cache; ctx->cache = prev; } } /** * Recursively deallocate a unit * * @param unit Unit to deallocate */ static void dealloc_unit(struct libparser_unit *unit) { struct libparser_unit *next; for (; unit; unit = next) { dealloc_unit(unit->in); next = unit->next; free(unit); } } /** * Allocate, without initialising * new unit, but first try to reuse * from the memory cache * * On allocation failure, the the * error marked in the parsing context * * @param ctx Parsing context * @return Newly allocated (or polled from the * cache) unit, `NULL` on failure */ static struct libparser_unit * alloc_unit(struct context *ctx) { struct libparser_unit *unit; if (!ctx->cache) { unit = malloc(sizeof(*unit)); if (!unit) { ctx->done = 1; ctx->error = 1; return NULL; } } else { unit = ctx->cache; ctx->cache = unit->next; } return unit; } /** * Embed a rule's or subsentence's matches * into where the rule or subsentence was * matched * * @param where The pointer to the match * @param ctx Parsing context */ static void embed_rule(struct libparser_unit **where, struct context *ctx) { /* remove matched unit */ (*where)->next = ctx->cache; ctx->cache = *where; /* insert interior where matched */ *where = (*where)->in; } static struct libparser_unit * try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx) { struct libparser_unit *unit, *next; struct libparser_unit **head; unsigned char c; size_t i; unit = alloc_unit(ctx); if (!unit) return NULL; unit->end = 0; unit->in = unit->next = NULL; unit->rule = rule; unit->start = ctx->position; switch (sentence->type) { case LIBPARSER_SENTENCE_TYPE_CONCATENATION: unit->in = try_match(NULL, sentence->binary.left, ctx); if (!unit->in) goto mismatch; if (ctx->done) break; unit->in->next = try_match(NULL, sentence->binary.right, ctx); if (!unit->in->next) { free_unit(unit->in, ctx); goto mismatch; } if (IS_HIDDEN(unit->in->next->rule)) embed_rule(&unit->in->next, ctx); if (IS_HIDDEN(unit->in->rule)) { next = unit->in->next; embed_rule(&unit->in, ctx); /* rejoin with right-hand */ if (unit->in) { for (head = &unit->in->next; *head; head = &(*head)->next); *head = next; } else { unit->in = next; } } break; case LIBPARSER_SENTENCE_TYPE_ALTERNATION: unit->in = try_match(NULL, sentence->binary.left, ctx); if (!unit->in) { unit->in = try_match(NULL, sentence->binary.right, ctx); if (!unit->in) goto mismatch; } prone: if (unit->in && IS_HIDDEN(unit->in->rule)) embed_rule(&unit->in, ctx); break; case LIBPARSER_SENTENCE_TYPE_REJECTION: unit->in = try_match(NULL, sentence->unary.sentence, ctx); if (unit->in) { free_unit(unit->in, ctx); if (!ctx->exception) goto mismatch; ctx->exception = 0; } ctx->position = unit->start; unit->rule = NULL; break; case LIBPARSER_SENTENCE_TYPE_OPTIONAL: unit->in = try_match(NULL, sentence->unary.sentence, ctx); goto prone; case LIBPARSER_SENTENCE_TYPE_REPEATED: head = &unit->in; while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) { if (IS_HIDDEN((*head)->rule)) { embed_rule(head, ctx); while (*head) head = &(*head)->next; } else { head = &(*head)->next; } if (ctx->done) break; } break; case LIBPARSER_SENTENCE_TYPE_STRING: if (sentence->string.length > ctx->length - ctx->position) goto mismatch; if (memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length)) goto mismatch; ctx->position += sentence->string.length; break; case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE: if (ctx->position == ctx->length) goto mismatch; c = ((const unsigned char *)ctx->data)[ctx->position]; if (sentence->char_range.low > c || c > sentence->char_range.high) goto mismatch; ctx->position += 1u; break; case LIBPARSER_SENTENCE_TYPE_RULE: for (i = 0; ctx->rules[i]; i++) if (!strcmp(ctx->rules[i]->name, sentence->rule.rule)) break; if (!ctx->rules[i]) abort(); unit->in = try_match(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx); if (!unit->in) goto mismatch; goto prone; case LIBPARSER_SENTENCE_TYPE_EXCEPTION: ctx->done = 1; ctx->exception = 1; break; case LIBPARSER_SENTENCE_TYPE_EOF: if (ctx->position != ctx->length) goto mismatch; ctx->done = 1; break; default: abort(); } unit->end = ctx->position; return unit; mismatch: /* On mismatch, restore position in text */ ctx->position = unit->start; /* and place the unit in the memory cache */ unit->next = ctx->cache; ctx->cache = unit; return NULL; } int libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, struct libparser_unit **rootp) { struct libparser_unit *ret, *t; struct context ctx; size_t i; ctx.rules = rules; ctx.cache = NULL; ctx.data = data; ctx.length = length; ctx.position = 0; ctx.done = 0; ctx.error = 0; ctx.exception = 0; for (i = 0; rules[i]; i++) if (!strcmp(rules[i]->name, "@start")) break; if (!rules[i]) abort(); ret = try_match(rules[i]->name, rules[i]->sentence, &ctx); while (ctx.cache) { t = ctx.cache; ctx.cache = t->next; free(t); } if (ctx.error) { dealloc_unit(ret); *rootp = NULL; return -1; } *rootp = ret; return !ctx.exception; }