aboutsummaryrefslogtreecommitdiffstats
path: root/libparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'libparser.c')
-rw-r--r--libparser.c202
1 files changed, 202 insertions, 0 deletions
diff --git a/libparser.c b/libparser.c
new file mode 100644
index 0000000..9ee22a1
--- /dev/null
+++ b/libparser.c
@@ -0,0 +1,202 @@
+/* See LICENSE file for copyright and license details. */
+#include "libparser.h"
+#include <libsimple.h>
+
+
+struct context {
+ const struct libparser_rule *const *rules;
+ struct libparser_unit *cache;
+ const char *data;
+ size_t length;
+ size_t position;
+ int done;
+ int exception;
+};
+
+
+static void
+free_unit(struct libparser_unit *unit, struct context *ctx)
+{
+ struct libparser_unit *prev;
+ while (unit) {
+ free_unit(unit->in, ctx);
+ prev = unit;
+ unit = unit->next;
+ prev->next = ctx->cache;
+ ctx->cache = prev;
+ }
+}
+
+
+static struct libparser_unit *
+try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
+{
+ struct libparser_unit *unit, *next;
+ struct libparser_unit **head;
+ unsigned char c;
+ size_t i;
+
+ if (!ctx->cache) {
+ unit = ecalloc(1, sizeof(*unit));
+ } else {
+ unit = ctx->cache;
+ ctx->cache = unit->next;
+ unit->in = unit->next = NULL;
+ }
+
+ unit->rule = rule;
+ unit->start = ctx->position;
+
+ switch (sentence->type) {
+ case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
+ unit->in = try_match(NULL, sentence->binary.left, ctx);
+ if (!unit->in)
+ goto mismatch;
+ if (ctx->done)
+ break;
+ unit->in->next = try_match(NULL, sentence->binary.right, ctx);
+ if (!unit->in->next) {
+ free_unit(unit->in, ctx);
+ goto mismatch;
+ }
+ if (!unit->in->next->rule || unit->in->next->rule[0] == '_') {
+ unit->in->next->next = ctx->cache;
+ ctx->cache = unit->in->next;
+ unit->in->next = unit->in->next->in;
+ }
+ if (!unit->in->rule || unit->in->rule[0] == '_') {
+ next = unit->in->next;
+ unit->in->next = ctx->cache;
+ ctx->cache = unit->in;
+ unit->in = unit->in->in;
+ if (unit->in) {
+ for (head = &unit->in->next; *head; head = &(*head)->next);
+ *head = next;
+ } else {
+ unit->in = next;
+ }
+ }
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
+ unit->in = try_match(NULL, sentence->binary.left, ctx);
+ if (!unit->in) {
+ unit->in = try_match(NULL, sentence->binary.right, ctx);
+ if (!unit->in)
+ goto mismatch;
+ }
+ prone:
+ if (unit->in && (!unit->in->rule || unit->in->rule[0] == '_')) {
+ unit->in->next = ctx->cache;
+ ctx->cache = unit->in;
+ unit->in = unit->in->in;
+ }
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
+ unit->in = try_match(NULL, sentence->unary.sentence, ctx);
+ goto prone;
+
+ case LIBPARSER_SENTENCE_TYPE_REPEATED:
+ head = &unit->in;
+ while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) {
+ if (!(*head)->rule || (*head)->rule[0] == '_') {
+ (*head)->next = ctx->cache;
+ ctx->cache = *head;
+ *head = (*head)->in;
+ while (*head)
+ head = &(*head)->next;
+ } else {
+ head = &(*head)->next;
+ }
+ if (ctx->done)
+ break;
+ }
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_STRING:
+ if (sentence->string.length > ctx->length - ctx->position)
+ goto mismatch;
+ if (memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length))
+ goto mismatch;
+ ctx->position += sentence->string.length;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
+ if (ctx->position == ctx->length)
+ goto mismatch;
+ c = ((const unsigned char *)ctx->data)[ctx->position];
+ if (sentence->char_range.low > c || c > sentence->char_range.high)
+ goto mismatch;
+ ctx->position += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_RULE:
+ for (i = 0; ctx->rules[i]; i++)
+ if (!strcmp(ctx->rules[i]->name, sentence->rule.rule))
+ break;
+ if (!ctx->rules[i])
+ abort();
+ unit->in = try_match(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx);
+ if (!unit->in)
+ goto mismatch;
+ goto prone;
+
+ case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
+ ctx->done = 1;
+ ctx->exception = 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_EOF:
+ if (ctx->position != ctx->length)
+ goto mismatch;
+ ctx->done = 1;
+ break;
+
+ default:
+ abort();
+ }
+
+ unit->end = ctx->position;
+ return unit;
+
+mismatch:
+ ctx->position = unit->start;
+ unit->next = ctx->cache;
+ ctx->cache = unit;
+ return NULL;
+}
+
+
+struct libparser_unit *
+libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, int *exceptionp)
+{
+ struct libparser_unit *ret, *t;
+ struct context ctx;
+ size_t i;
+
+ ctx.rules = rules;
+ ctx.cache = NULL;
+ ctx.data = data;
+ ctx.length = length;
+ ctx.position = 0;
+ ctx.done = 0;
+ ctx.exception = 0;
+
+ for (i = 0; rules[i]; i++)
+ if (!strcmp(rules[i]->name, "@start"))
+ break;
+ if (!rules[i])
+ abort();
+
+ ret = try_match(rules[i]->name, rules[i]->sentence, &ctx);
+ *exceptionp = ctx.exception;
+
+ while (ctx.cache) {
+ t = ctx.cache;
+ ctx.cache = t->next;
+ free(t);
+ }
+
+ return ret;
+}