First commit

Signed-off-by: Mattias Andrée <maandree@kth.se>
author: Mattias Andrée <maandree@kth.se> 2021-04-17 12:09:12 +0200
committer: Mattias Andrée <maandree@kth.se> 2021-04-17 12:09:12 +0200
commit: a5218fce6b19591c9412a84a5088c260c0676f61 (patch)
tree: 038f8b8e1064b4f61c831907ac4f20b77711f289 /libparser.c
download: libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.gz
libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.bz2
libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.xz
1 files changed, 202 insertions, 0 deletions
diff --git a/libparser.c b/libparser.c
new file mode 100644
index 0000000..9ee22a1
--- /dev/null
+++ b/libparser.c
@@ -0,0 +1,202 @@
+/* See LICENSE file for copyright and license details. */
+#include "libparser.h"
+#include <libsimple.h>
+
+
+struct context {
+	const struct libparser_rule *const *rules;
+	struct libparser_unit *cache;
+	const char *data;
+	size_t length;
+	size_t position;
+	int done;
+	int exception;
+};
+
+
+static void
+free_unit(struct libparser_unit *unit, struct context *ctx)
+{
+	struct libparser_unit *prev;
+	while (unit) {
+		free_unit(unit->in, ctx);
+		prev = unit;
+		unit = unit->next;
+		prev->next = ctx->cache;
+		ctx->cache = prev;
+	}
+}
+
+
+static struct libparser_unit *
+try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
+{
+	struct libparser_unit *unit, *next;
+	struct libparser_unit **head;
+	unsigned char c;
+	size_t i;
+
+	if (!ctx->cache) {
+		unit = ecalloc(1, sizeof(*unit));
+	} else {
+		unit = ctx->cache;
+		ctx->cache = unit->next;
+		unit->in = unit->next = NULL;
+	}
+
+	unit->rule = rule;
+	unit->start = ctx->position;
+
+	switch (sentence->type) {
+	case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
+		unit->in = try_match(NULL, sentence->binary.left, ctx);
+		if (!unit->in)
+			goto mismatch;
+		if (ctx->done)
+			break;
+		unit->in->next = try_match(NULL, sentence->binary.right, ctx);
+		if (!unit->in->next) {
+			free_unit(unit->in, ctx);
+			goto mismatch;
+		}
+		if (!unit->in->next->rule || unit->in->next->rule[0] == '_') {
+			unit->in->next->next = ctx->cache;
+			ctx->cache = unit->in->next;
+			unit->in->next = unit->in->next->in;
+		}
+		if (!unit->in->rule || unit->in->rule[0] == '_') {
+			next = unit->in->next;
+			unit->in->next = ctx->cache;
+			ctx->cache = unit->in;
+			unit->in = unit->in->in;
+			if (unit->in) {
+				for (head = &unit->in->next; *head; head = &(*head)->next);
+				*head = next;
+			} else {
+				unit->in = next;
+			}
+		}
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
+		unit->in = try_match(NULL, sentence->binary.left, ctx);
+		if (!unit->in) {
+			unit->in = try_match(NULL, sentence->binary.right, ctx);
+			if (!unit->in)
+				goto mismatch;
+		}
+	prone:
+		if (unit->in && (!unit->in->rule || unit->in->rule[0] == '_')) {
+			unit->in->next = ctx->cache;
+			ctx->cache = unit->in;
+			unit->in = unit->in->in;
+		}
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
+		unit->in = try_match(NULL, sentence->unary.sentence, ctx);
+		goto prone;
+
+	case LIBPARSER_SENTENCE_TYPE_REPEATED:
+		head = &unit->in;
+		while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) {
+			if (!(*head)->rule || (*head)->rule[0] == '_') {
+				(*head)->next = ctx->cache;
+				ctx->cache = *head;
+				*head = (*head)->in;
+				while (*head)
+					head = &(*head)->next;
+			} else {
+				head = &(*head)->next;
+			}
+			if (ctx->done)
+				break;
+		}
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_STRING:
+		if (sentence->string.length > ctx->length - ctx->position)
+			goto mismatch;
+		if (memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length))
+			goto mismatch;
+		ctx->position += sentence->string.length;
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
+		if (ctx->position == ctx->length)
+			goto mismatch;
+		c = ((const unsigned char *)ctx->data)[ctx->position];
+		if (sentence->char_range.low > c || c > sentence->char_range.high)
+			goto mismatch;
+		ctx->position += 1;
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_RULE:
+		for (i = 0; ctx->rules[i]; i++)
+			if (!strcmp(ctx->rules[i]->name, sentence->rule.rule))
+				break;
+		if (!ctx->rules[i])
+			abort();
+		unit->in = try_match(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx);
+		if (!unit->in)
+			goto mismatch;
+		goto prone;
+
+	case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
+		ctx->done = 1;
+		ctx->exception = 1;
+		break;
+
+	case LIBPARSER_SENTENCE_TYPE_EOF:
+		if (ctx->position != ctx->length)
+			goto mismatch;
+		ctx->done = 1;
+		break;
+
+	default:
+		abort();
+	}
+
+	unit->end = ctx->position;
+	return unit;
+
+mismatch:
+	ctx->position = unit->start;
+	unit->next = ctx->cache;
+	ctx->cache = unit;
+	return NULL;
+}
+
+
+struct libparser_unit *
+libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, int *exceptionp)
+{
+	struct libparser_unit *ret, *t;
+	struct context ctx;
+	size_t i;
+
+	ctx.rules = rules;
+	ctx.cache = NULL;
+	ctx.data = data;
+	ctx.length = length;
+	ctx.position = 0;
+	ctx.done = 0;
+	ctx.exception = 0;
+
+	for (i = 0; rules[i]; i++)
+		if (!strcmp(rules[i]->name, "@start"))
+			break;
+	if (!rules[i])
+		abort();
+
+	ret = try_match(rules[i]->name, rules[i]->sentence, &ctx);
+	*exceptionp = ctx.exception;
+
+	while (ctx.cache) {
+		t = ctx.cache;
+		ctx.cache = t->next;
+		free(t);
+	}
+
+	return ret;
+}
author	Mattias Andrée <maandree@kth.se>	2021-04-17 12:09:12 +0200
committer	Mattias Andrée <maandree@kth.se>	2021-04-17 12:09:12 +0200
commit	a5218fce6b19591c9412a84a5088c260c0676f61 (patch)
tree	038f8b8e1064b4f61c831907ac4f20b77711f289 /libparser.c
download	libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.gz libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.bz2 libparser-a5218fce6b19591c9412a84a5088c260c0676f61.tar.xz