/* See LICENSE file for copyright and license details. */
#include "libparser.h"
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#define PRINT_ACTIONS 0

#if PRINT_ACTIONS
# include <ctype.h>
# include <stdio.h>
#endif


#define IS_ANONYMOUS(RULE) (!(RULE) || (RULE)[0] == '_')
/* NULL is used when evaluating a subsentence,
 * and rule with the prefix "_" are embedded
 * as subsentences */

#define IS_COMPLETION(FOLLOWUP) (!!(FOLLOWUP).unit)

#define EXHAUSTED SIZE_MAX


/**
 * Follow-up stack node
 * 
 * `.unit` decides whether it is a completion
 * marker or a subsequent sentence; it is a
 * completion marker if and only iff `.unit`
 * is non-`NULL`.
 */
struct followup {
	/**
	 * The unit being completed; if `NULL`
	 * it is a subsequent sentence rather
	 * than a completion marker and,
	 * `.sentence` and `.rule` should be used
	 * instead
	 */
	struct libparser_unit *unit;

	/**
	 * Set if and only if `.unit` is non-`NULL`:
	 * the index of the node
	 */
	size_t unit_index;

	/**
	 * Set if and only if `.unit` is non-`NULL`:
	 * the index of the node of the previous
	 * rejection, `SIZE_MAX` if none
	 */
	size_t previous_rejection;

	/* -- or -- */

	/**
	 * Set if and only if `.unit` is `NULL`:
	 * the `rule` argument `add_unit` should
	 * be called with
	 */
	const char *rule;

	/**
	 * Set if and only if `.unit` is `NULL`:
	 * the `sentence` argument `add_unit`
	 * should be called with
	 */
	const union libparser_sentence *sentence;
};


/**
 * Branching choice
 */
struct choice {
	/**
	 * The current choice value, `EXHAUSTED`
	 * when the choices are exhausted and the
	 * sentence cannot be matched
	 */
	size_t choice;

	/**
	 * The position in the input text
	 */
	size_t position;

	/**
	 * The `rule` argument `add_unit` was
	 * called with
	 */
	const char *rule;

	/**
	 * The `sentence` argument `add_unit`
	 * was called with
	 */
	const union libparser_sentence *sentence;

	/**
	 * The value `.units.count` of the `ctx`
	 * argument `add_unit` was called with
	 */
	size_t unit_index;

	/**
	 * The value `.previous_rejection` of the `ctx`
	 * argument `add_unit` was called with
	 */
	size_t previous_rejection;

	/**
	 * Save state of the follow-up stack
	 */
	struct {
		/**
		 * The follow-ups
		 */
		struct followup *followups; /* TODO add memory pool, maybe even use a memory stack */

		/**
		 * The number of follow-ups; this is
		 * also the number of allocated elements
		 */
		size_t count;
	} followups;
};


/**
 * Parsing state and context
 */
struct context {
	/**
	 * The rule table
	 */
	const struct libparser_rule *const *rules;

	/**
	 * Memory pool for parsing units
	 */
	struct libparser_unit *unit_pool;

	/**
	 * Current choices
	 */
	struct {
		/**
		 * The branching points and the choices made there
		 */
		struct choice *choices;

		/**
		 * Number of allocated choices
		 */
		size_t size;

		/**
		 * Number of made choices (it is be temporarily
		 * one more then .next when amending a choice)
		 */
		size_t count;

		/**
		 * The index of the next choice
		 */
		size_t next;
	} choices;

	/**
	 * Follow-up stack; it is popped any time
	 * a sentence (including literals and
	 * subsentences) has ended
	 */
	struct {
		/**
		 * The follow-ups
		 */
		struct followup *followups;

		/**
		 * The number of allocated slots
		 */
		size_t size;

		/**
		 * The number of used slots
		 */
		size_t count;
	} followups;

	/**
	 * List of units leading up to the end
	 * of the input
	 */
	struct {
		/**
		 * The units
		 */
		struct libparser_unit **units;

		/**
		 * The number of allocated slots
		 */
		size_t size;

		/**
		 * The number of used slots
		 */
		size_t count;
	} units;

	/**
	 * The index of the node of the previous
	 * rejection, `SIZE_MAX` if none
	 */
	size_t previous_rejection;

	/**
	 * The text being parsed
	 */
	const char *data;

	/**
	 * The length of the text
	 */
	size_t length;

	/**
	 * The current position in the text
	 */
	size_t position;

	/**
	 * Set when an exception-statement reached is encountered;
	 * cleared when rejected the exception is rejected
	 */
	char exception;
};


/**
 * Epsilon rule, used when en OPTIONAL is not take
 */
static union libparser_sentence epsilon = {.type = LIBPARSER_SENTENCE_TYPE_EPSILON};


#if PRINT_ACTIONS


static const char *
sentence_type_string(enum libparser_sentence_type type)
{
	switch (type) {
	case LIBPARSER_SENTENCE_TYPE_CONCATENATION: return "CONCATENATION";
	case LIBPARSER_SENTENCE_TYPE_ALTERNATION: return "ALTERNATION";
	case LIBPARSER_SENTENCE_TYPE_REJECTION: return "REJECTION";
	case LIBPARSER_SENTENCE_TYPE_OPTIONAL: return "OPTIONAL";
	case LIBPARSER_SENTENCE_TYPE_REPEATED: return "REPEATED";
	case LIBPARSER_SENTENCE_TYPE_STRING: return "STRING";
	case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE: return "CHAR_RANGE";
	case LIBPARSER_SENTENCE_TYPE_RULE: return "RULE";
	case LIBPARSER_SENTENCE_TYPE_EXCEPTION: return "EXCEPTION";
	case LIBPARSER_SENTENCE_TYPE_EOF: return "EOF";
	case LIBPARSER_SENTENCE_TYPE_EPSILON: return "EPSILON";
	case LIBPARSER_SENTENCE_TYPE_COMMITTED: return "COMMITTED";
	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL: return "ND_OPTIONAL";
	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED: return "ND_REPEATED";
	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION: return "ND_ALTERNATION";
	default: return "<invalid>";
	}
}


static void
print_tree(struct libparser_unit *unit, int indent)
{
	for (; unit; unit = unit->next) {
		fprintf(stderr, "%*.s", indent, "");
		fprintf(stderr, "%s: %s%s%s [%zu, %zu) {%p}\n",
		        unit->rule, sentence_type_string(unit->sentence->type),
		        unit->sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
		        unit->sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
		            ? unit->sentence->rule.rule : "",
		        unit->start, unit->end, (void *)unit);
		print_tree(unit->in, indent + 2);
	}
}


static void
print_state(struct context *ctx)
{
	struct libparser_unit *unit;
	const union libparser_sentence *sentence;
	size_t i;

	fprintf(stderr, "Position in text: %zu of %zu\n", ctx->position, ctx->length);
	fprintf(stderr, "Exception state: %i\n", (int)ctx->exception);

	fprintf(stderr, "Have %zu units:\n", ctx->units.count);
	for (i = 0; i < ctx->units.count; i++) {
		if (ctx->units.units[i]->next)
			fprintf(stderr, "  (\n");
		print_tree(ctx->units.units[i], 4);
		if (ctx->units.units[i]->next)
			fprintf(stderr, "  )\n");
	}

	fprintf(stderr, "Have %zu choices:\n", ctx->choices.count);
	for (i = 0; i < ctx->choices.count; i++) {
		fprintf(stderr, "    For unit %zu with %zu followups at %zu: %s: %s, ",
		        ctx->choices.choices[i].unit_index,
		        ctx->choices.choices[i].followups.count,
		        ctx->choices.choices[i].position,
		        sentence_type_string(ctx->choices.choices[i].sentence->type),
		        ctx->choices.choices[i].rule);
		if (ctx->choices.choices[i].choice == EXHAUSTED)
			fprintf(stderr, "choices EXHAUSTED; ");
		else
			fprintf(stderr, "using choice %zu; ", ctx->choices.choices[i].choice);
		if (ctx->choices.choices[i].previous_rejection == SIZE_MAX) {
			fprintf(stderr, "no rejection stored\n");
		} else {
			fprintf(stderr, "previous rejection: %zu\n",
			        ctx->choices.choices[i].previous_rejection);
		}
	}

	fprintf(stderr, "Have %zu follow-ups pending (outermost at top):\n",
	        ctx->followups.count);
	for (i = 0; i < ctx->followups.count; i++) {
		if (IS_COMPLETION(ctx->followups.followups[i])) {
			unit = ctx->followups.followups[i].unit;
			sentence = unit->sentence;
			fprintf(stderr, "    Completion: unit %zu: %s: %s%s%s [%zu, %zu); ",
			        ctx->followups.followups[i].unit_index,
			        unit->rule, sentence_type_string(sentence->type),
			        sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
			        sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
			            ? sentence->rule.rule : "",
			        unit->start, unit->end);
			if (ctx->followups.followups[i].previous_rejection == SIZE_MAX) {
				fprintf(stderr, "no rejection stored\n");
			} else {
				fprintf(stderr, "previous rejection: %zu\n",
				        ctx->followups.followups[i].previous_rejection);
			}
		} else {
			sentence = ctx->followups.followups[i].sentence;
			fprintf(stderr, "    (%s, %s%s%s)\n",
			        ctx->followups.followups[i].rule,
			        sentence_type_string(sentence->type),
			        sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
			        sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
			            ? sentence->rule.rule : "");
		}
	}
}


static int
print_text(const char *text, size_t textlen)
{
	size_t off = 0, n = 0;
	int len, ret = 0;

	while (off + n < textlen) {
		if (!text[off + n])
			break;
		if (text[off + n] < ' ' || text[off + n] >= 0x7F ||
		    text[off + n] == '"' || text[off + n] == '\\') {
			fprintf(stderr, "%.*s%n", (int)n, &text[off], &len);
			ret += len;
			off += n;
			n = 0;
			switch (text[off]) {
			case '\\': fprintf(stderr, "\\\\%n", &len); break;
			case '\"': fprintf(stderr, "\\\"%n", &len); break;
			case '\a': fprintf(stderr, "\\a%n", &len); break;
			case '\b': fprintf(stderr, "\\b%n", &len); break;
			case '\f': fprintf(stderr, "\\f%n", &len); break;
			case '\n': fprintf(stderr, "\\n%n", &len); break;
			case '\r': fprintf(stderr, "\\r%n", &len); break;
			case '\t': fprintf(stderr, "\\t%n", &len); break;
			case '\v': fprintf(stderr, "\\v%n", &len); break;
			default:
				fprintf(stderr, "\\x%02X%n", +(unsigned char)text[off], &len);
				break;
			}
			off++;
		} else if (n == 4096U) {
			fprintf(stderr, "%.*s%n", (int)n, &text[off], &len);
			ret += len;
			off += n;
			n = 0;
		} else {
			n++;
		}
	}
	if (n) {
		fprintf(stderr, "%.*s%n", (int)n, &text[off], &len);
		ret += len;
	}

	return ret;
}


static int
print_sentence(const union libparser_sentence *sentence, int indent)
{
	unsigned char low, high;
	int len;

	switch (sentence->type) {
	case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
		fprintf(stderr, "(");
		indent = print_sentence(sentence->binary.left, indent + 1);
		fprintf(stderr, ", ");
		indent = print_sentence(sentence->binary.right, indent + 2);
		fprintf(stderr, ")");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
		fprintf(stderr, "(");
		print_sentence(sentence->binary.left, indent + 1);
		fprintf(stderr, " ?| \n%*.s", indent + 1, "");
		indent = print_sentence(sentence->binary.right, indent + 1);
		fprintf(stderr, ")");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
		fprintf(stderr, "(");
		print_sentence(sentence->binary.left, indent + 1);
		fprintf(stderr, " | \n%*.s", indent + 1, "");
		indent = print_sentence(sentence->binary.right, indent + 1);
		fprintf(stderr, ")");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_REJECTION:
		fprintf(stderr, "!(");
		indent = print_sentence(sentence->unary.sentence, indent + 2);
		fprintf(stderr, ")");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_COMMITTED:
		fprintf(stderr, "+(");
		indent = print_sentence(sentence->unary.sentence, indent + 2);
		fprintf(stderr, ")");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
		fprintf(stderr, "?");
		/* fall through */
	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
		fprintf(stderr, "[");
		indent = print_sentence(sentence->unary.sentence, indent + 1);
		fprintf(stderr, "]");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
		fprintf(stderr, "?");
		/* fall through */
	case LIBPARSER_SENTENCE_TYPE_REPEATED:
		fprintf(stderr, "{");
		indent = print_sentence(sentence->unary.sentence, indent + 1);
		fprintf(stderr, "}");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_STRING:
		fprintf(stderr, "\"%n", &len);
		indent += len + print_text(sentence->string.string, sentence->string.length);
		fprintf(stderr, "\"%n", &len);
		indent += len;
		break;

	case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
		low = sentence->char_range.low;
		high = sentence->char_range.high;
		if (isprint(low) && isprint(high))
			fprintf(stderr, "<\"%c\", \"%c\">%n", low, high, &len);
		else if (isprint(sentence->char_range.low))
			fprintf(stderr, "<\"%c\", 0x%02x>%n", low, high, &len);
		else if (isprint(sentence->char_range.high))
			fprintf(stderr, "<0x%02x, \"%c\">%n", low, high, &len);
		else
			fprintf(stderr, "<0x%02x, 0x%02x>%n", low, high, &len);
		indent += len;
		break;

	case LIBPARSER_SENTENCE_TYPE_RULE:
		fprintf(stderr, "%s%n", sentence->rule.rule, &len);
		indent += len;
		break;

	case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
		fprintf(stderr, "-");
		indent += 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_EOF:
		fprintf(stderr, "%s%n", "!<0x00, 0xFF>", &len);
		indent += len;
		break;

	case LIBPARSER_SENTENCE_TYPE_EPSILON:
		fprintf(stderr, "@EPSILON");
		indent += 8;
		break;

	default:
		abort();
	}

	return indent;
}


static void
print_grammar(const struct libparser_rule *const *rules)
{
	size_t i;
	int indent;
	fprintf(stderr, "Rules:\n");
	for (i = 0; rules[i]; i++) {
		fprintf(stderr, "    %s = %n", rules[i]->name, &indent);
		print_sentence(rules[i]->sentence, indent);
		fprintf(stderr, ";\n");
	}
}


#endif


/**
 * Embed a rule's or subsentence's matches
 * into where the rule or subsentence was
 * matched
 * 
 * @param  where  The pointer to the match
 * @param  last   Optional output paramter for the
 *                last embedded, top-level unit
 */
static void
embed_rule(struct libparser_unit **where, struct libparser_unit **last)
{
	struct libparser_unit *old = *where;

#if PRINT_ACTIONS
	fprintf(stderr, "Embedding %s: %s [%zu, %zu)\n", old->rule,
	        sentence_type_string(old->sentence->type), old->start, old->end);
#endif

	if (old->next)
		abort();
	if (old->in == old)
		abort();
	*where = old->in;
	free(old);
	if (last) {
		*last = *where;
		if (*last)
			while ((*last)->next)
				*last = (*last)->next;
	}
}


/**
 * Recursively move a unit into
 * the memory allocation cache
 * 
 * @param  unit  Unit to place in the cache
 * @param  ctx   Parsing context (holds the cache)
 */
static void
free_unit(struct libparser_unit *unit, struct context *ctx)
{
	struct libparser_unit *prev;
	while (unit) {
		free_unit(unit->in, ctx);
		prev = unit;
		unit = unit->next;
		prev->next = ctx->unit_pool;
		ctx->unit_pool = prev;
	}
}


/**
 * Recursively deallocate a unit
 *
 * @param  unit  Unit to deallocate
 */
static void
dealloc_unit(struct libparser_unit *unit)
{
	struct libparser_unit *next;
	for (; unit; unit = next) {
		dealloc_unit(unit->in);
		next = unit->next;
		free(unit);
	}
}


/**
 * Allocate, without initialising
 * new unit, but first try to reuse
 * from the memory cache
 * 
 * @param   ctx  Parsing context
 * @return       Newly allocated (or polled from the
 *               cache) unit, `NULL` on failure
 */
static struct libparser_unit *
alloc_unit(struct context *ctx)
{
	struct libparser_unit *unit;
	if (!ctx->unit_pool) {
		unit = malloc(sizeof(*unit));
		if (!unit)
			return NULL;
	} else {
		unit = ctx->unit_pool;
		ctx->unit_pool = unit->next;
	}
	return unit;
}


/**
 * Grow the unit list (if ncessary) and return a
 * pointer the next empty slot
 * 
 * @param   ctx  Parsing context
 * @return       Pointer to the unit slot
 */
static struct libparser_unit **
new_unit(struct context *ctx)
{
	void *new;
	if (ctx->units.count == ctx->units.size) {
		if (ctx->units.size > SIZE_MAX / sizeof(*ctx->units.units) - 64u) {
		enomem:
			errno = ENOMEM;
			return NULL;
		}
		ctx->units.size += 64u;
		new = realloc(ctx->units.units, ctx->units.size * sizeof(*ctx->units.units));
		if (!new)
			goto enomem;
		ctx->units.units = new;
	}
#if PRINT_ACTIONS
	fprintf(stderr, "New unit slot at %zu\n", ctx->units.count);
#endif
	return &ctx->units.units[ctx->units.count++];
}


/**
 * Create a new follow-up node and push it
 * into the follow-up stack
 * 
 * @param   ctx  Parsing context
 * @return       The follow-up node, `NULL` on failure
 */
static struct followup *
new_followup(struct context *ctx)
{
	void *new;
	if (ctx->followups.count == ctx->followups.size) {
		if (ctx->followups.size > SIZE_MAX / sizeof(*ctx->followups.followups) - 64u) {
		enomem:
			errno = ENOMEM;
			return NULL;
		}
		ctx->followups.size += 64u;
		new = realloc(ctx->followups.followups,
		              ctx->followups.size * sizeof(*ctx->followups.followups));
		if (!new)
			goto enomem;
		ctx->followups.followups = new;
	}

#if PRINT_ACTIONS
	fprintf(stderr, "New follow-up slot at %zu\n", ctx->followups.count);
#endif

	return &ctx->followups.followups[ctx->followups.count++];
}


/**
 * Push sentence into follow-up stack
 * 
 * @param   rule      The name of the rule, or `NULL` if it is an unnamed subsentence
 * @param   sentence  The sentence to push
 * @param   ctx       Parsing context
 * @return            0 on success, -1 on failure
 */
static int
push(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
{
	struct followup *followup = new_followup(ctx);
	if (!followup)
		return -1;

	followup->sentence = sentence;
	followup->unit = NULL;
	followup->rule = rule;

#if PRINT_ACTIONS
	fprintf(stderr, "Pushing follow-up sentence: %s: %s\n",
	        sentence_type_string(sentence->type), rule);
#endif

	return 0;
}


/**
 * Create a unit for a sentence and push it into
 * the follow-up stack to mark the sentence as
 * completed when the follow-up node is processed
 * 
 * @param   rule      The name of the rule, or `NULL` if it is an unnamed subsentence
 * @param   sentence  The sentence to push
 * @param   start     The position in the input text where the sentence begins
 * @param   ctx       Parsing context
 * @return            0 on success, -1 on failure
 */
static int
incomplete(const char *rule, const union libparser_sentence *sentence, size_t start, struct context *ctx)
{
	struct followup *followup;
	struct libparser_unit *unit;
	struct libparser_unit **unit_slot;

	unit = alloc_unit(ctx);
	if (!unit)
		return -1;
	unit->in = unit->next = NULL;
	unit->rule = rule;
	unit->start = start;
	unit->end = start;
	unit->sentence = sentence;

	followup = new_followup(ctx);
	if (!followup) {
		free_unit(unit, ctx);
		return -1;
	}
	followup->unit = unit;
	followup->unit_index = ctx->units.count;
	followup->previous_rejection = ctx->previous_rejection;

#if PRINT_ACTIONS
	fprintf(stderr, "Pushing follow-up completion for unit %zu: %s: %s\n",
	        ctx->units.count, sentence_type_string(sentence->type), rule);
#endif

	unit_slot = new_unit(ctx);
	if (!unit_slot) {
		free_unit(unit, ctx);
		return -1;
	}
	*unit_slot = unit;
#if PRINT_ACTIONS
	fprintf(stderr, "Writing unit to slot: %s: %s, spanning [%zu, %zu)\n",
	        unit->rule, sentence_type_string(unit->sentence->type), unit->start, unit->end);
#endif

	return 0;
}


/**
 * Add branching point to list of choices,
 * or if already added, just the get the
 * current choice
 * 
 * @param   rule        The name of the rule that is a brachning
 *                      point, `NULL` if a subsentence
 * @param   sentence    The sentence that is brachning point
 * @param   start       The position in the input text where the
 *                      brachning begins
 * @param   choice_out  Output parameter for the current choice value
 * @return              0 on success, -1 on failure
 */
static int
braching(const char *rule, const union libparser_sentence *sentence, size_t start, struct context *ctx, size_t *choice_out)
{
	struct choice *choice;
	size_t size;
	void *new;
	if (ctx->choices.next == ctx->choices.count) {
		if (ctx->choices.count == ctx->choices.size) {
			size = ctx->choices.size;
			if (size > SIZE_MAX - 8u) {
			enomem:
				errno = ENOMEM;
				return -1;
			}
			size += 8u;
			if (size > SIZE_MAX / sizeof(*ctx->choices.choices))
				goto enomem;
			new = realloc(ctx->choices.choices, size * sizeof(*ctx->choices.choices));
			if (!new)
				goto enomem;
			ctx->choices.choices = new;
			ctx->choices.size = size;
		}
		choice = &ctx->choices.choices[ctx->choices.next];
		choice->choice = 0;
		choice->position = start;
		choice->rule = rule;
		choice->sentence = sentence;
		choice->unit_index = ctx->units.count;
		choice->previous_rejection = ctx->previous_rejection;
		choice->followups.followups = NULL;
		choice->followups.count = ctx->followups.count;
		if (choice->followups.count) {
			size = choice->followups.count * sizeof(*choice->followups.followups);
			choice->followups.followups = malloc(size);
			if (!choice->followups.followups)
				goto enomem;
			memcpy(choice->followups.followups, ctx->followups.followups, size);
		}
		ctx->choices.count++;

#if PRINT_ACTIONS
		fprintf(stderr, "Adding branching point at %zu for unit %zu: %s: %s, at %zu in text\n",
		        ctx->choices.next, ctx->units.count,
		        sentence_type_string(sentence->type), rule, start);
	} else {
		fprintf(stderr, "Revisiting branching point %zu at %zu in text for unit %zu: "
		                "%s: %s, at %zu in text; choice value: %zu; ",
		        ctx->choices.next, start,
		        ctx->choices.choices[ctx->choices.next].unit_index,
		        sentence_type_string(ctx->choices.choices[ctx->choices.next].sentence->type),
		        ctx->choices.choices[ctx->choices.next].rule,
		        ctx->choices.choices[ctx->choices.next].position,
		        ctx->choices.choices[ctx->choices.next].choice);
		if (ctx->choices.choices[ctx->choices.next].previous_rejection == SIZE_MAX) {
			fprintf(stderr, "no rejection stored\n");
		} else {
			fprintf(stderr, "previous rejection: %zu\n",
			        ctx->choices.choices[ctx->choices.next].previous_rejection);
		}
#endif
	}
	*choice_out = ctx->choices.choices[ctx->choices.next++].choice;
	return 0;
}


/**
 * Add a sentence, as a unit, to the list of units,
 * will adding to the follow-up stack, the subsentences
 * the the completion of the sentence unless it is
 * atomic, and for branching points, record the choice
 * with backtracking data
 * 
 * @param   rule      The name of the rule, `NULL` if it is a subsentence
 * @param   sentence  The sentence to add
 * @param   ctx       Parsing context
 * @return            1 on matched or incomplete, 0 on mismatch, -1 on failure
 */
static int
add_unit(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
{
	struct libparser_unit *unit;
	struct libparser_unit **unit_slot;
	size_t start = ctx->position;
	unsigned char c;
	size_t i, choice;

#if PRINT_ACTIONS
	fprintf(stderr, "Visiting %s: %s, at %zu in text\n",
	        sentence_type_string(sentence->type), rule, start);
#endif

	switch (sentence->type) {
	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
		if (braching(rule, sentence, start, ctx, &choice))
			return -1;
		if (choice > 1u)
			goto exhausted;
		if (incomplete(rule, sentence, start, ctx) ||
		    push(NULL, choice ? sentence->binary.right : sentence->binary.left, ctx))
			return -1;
		return 1;

	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
	case LIBPARSER_SENTENCE_TYPE_REPEATED:
		if (braching(rule, sentence, start, ctx, &choice))
			return -1;
		if (choice == 0u) {
			if (incomplete(rule, sentence, start, ctx) ||
			    push(NULL, sentence, ctx) ||
			    push(NULL, sentence->unary.sentence, ctx))
				return -1;
		} else if (choice == 1u) {
			if (incomplete(rule, sentence, start, ctx) ||
			    push("@epsilon", &epsilon, ctx) ||
			    push(NULL, sentence->unary.sentence, ctx))
				return -1;
		} else if (choice == 2u) {
			if (incomplete(rule, sentence, start, ctx) ||
			    push("@epsilon", &epsilon, ctx) ||
			    push("@epsilon", &epsilon, ctx))
				return -1;
		} else {
			goto exhausted;
		}
		return 1;

	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
		if (braching(rule, sentence, start, ctx, &choice))
			return -1;
		if (choice == 0u) {
			if (incomplete(rule, sentence, start, ctx) ||
			    push(NULL, sentence->unary.sentence, ctx))
				return -1;
		} else if (choice == 1u) {
			if (incomplete(rule, sentence, start, ctx) ||
			    push("@epsilon", &epsilon, ctx))
				return -1;
		} else {
			goto exhausted;
		}
		return 1;

	case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
		if (incomplete(rule, sentence, start, ctx) ||
		    push(NULL, sentence->binary.right, ctx) ||
		    push(NULL, sentence->binary.left, ctx))
			return -1;
		return 1;

	case LIBPARSER_SENTENCE_TYPE_REJECTION:
		if (incomplete(NULL, sentence, start, ctx) ||
		    push(NULL, sentence->unary.sentence, ctx))
			return -1;
		ctx->previous_rejection = ctx->units.count - 1u;
		return 1;

	case LIBPARSER_SENTENCE_TYPE_COMMITTED:
		if (incomplete(NULL, sentence, start, ctx) ||
		    push(NULL, sentence->unary.sentence, ctx))
			return -1;
		return 1;

	case LIBPARSER_SENTENCE_TYPE_RULE:
		for (i = 0; ctx->rules[i]; i++)
			if (!strcmp(ctx->rules[i]->name, sentence->rule.rule))
				break;
		if (!ctx->rules[i])
			abort();
		if (incomplete(rule, sentence, start, ctx) ||
		    push(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx))
			return -1;
		return 1;

	case LIBPARSER_SENTENCE_TYPE_STRING:
		if (sentence->string.length > ctx->length - ctx->position ||
		    memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length))
			return 0;
		ctx->position += sentence->string.length;
		break;

	case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
		if (ctx->position == ctx->length)
			goto mismatch;
		c = ((const unsigned char *)ctx->data)[ctx->position];
		if (sentence->char_range.low > c || c > sentence->char_range.high)
			goto mismatch;
		ctx->position += 1u;
		break;

	case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
		ctx->exception = 1;
		break;

	case LIBPARSER_SENTENCE_TYPE_EOF:
		if (ctx->position != ctx->length)
			goto mismatch;
		break;

	case LIBPARSER_SENTENCE_TYPE_EPSILON:
		break;

	default:
		abort();
	}

	/* for atomic sentences */
	unit = alloc_unit(ctx);
	if (!unit)
		return -1;
	unit->in = unit->next = NULL;
	unit->rule = rule;
	unit->start = start;
	unit->end = ctx->position;
	unit->sentence = sentence;
	unit_slot = new_unit(ctx);
	if (!unit_slot) {
		free_unit(unit, ctx);
		return -1;
	}
	*unit_slot = unit;
#if PRINT_ACTIONS
	fprintf(stderr, "Writing atomic unit to slot: %s: %s, spanning [%zu, %zu)\n",
	        sentence_type_string(unit->sentence->type), unit->rule, unit->start, unit->end);
#endif
	return 1;

exhausted:
	ctx->choices.choices[ctx->choices.next - 1U].choice = EXHAUSTED;
mismatch:
#if PRINT_ACTIONS
	fprintf(stderr, "Returning mismatch\n");
#endif
	return 0;
}


/**
 * Convert list of units into a tree
 * 
 * @param   ctx     Parsing context
 * @param   unit_i  Reference to current position in the unit list
 * @param   last    Output parameter for the last unit on the same
 *                  level as the returned node (used to deal with
 *                  embedding of anonymous sentences)
 * @return          The first top level node from the current
 *                  position in the list of units
 */
static struct libparser_unit *
make_tree(struct context *ctx, size_t *unit_i, struct libparser_unit **last)
{
	/* TODO try to flatten to call stack */
	struct libparser_unit *unit, *middle, **p;
	size_t start;

#if PRINT_ACTIONS
	static int indent = 0;
	fprintf(stderr, "%*.s", indent, "");
	fprintf(stderr, "{ at %zu of %zu\n", *unit_i, ctx->units.count);
	indent += 4;
	fprintf(stderr, "%*.s", indent, "");
#endif

	if (*unit_i == ctx->units.count)
		abort();

	unit = ctx->units.units[(*unit_i)++];
	unit->in = NULL;
	unit->next = NULL;

	switch (unit->sentence->type) {
	case LIBPARSER_SENTENCE_TYPE_RULE:
	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
	case LIBPARSER_SENTENCE_TYPE_COMMITTED:
#if PRINT_ACTIONS
		fprintf(stderr, "Unary unit: %s: %s [%zu, %zu)\n",
		        sentence_type_string(unit->sentence->type),
		        unit->rule, unit->start, unit->end);
#endif
		unit->in = make_tree(ctx, unit_i, last);
		break;

	case LIBPARSER_SENTENCE_TYPE_REJECTION:
#if PRINT_ACTIONS
		fprintf(stderr, "Unary lookahead unit: %s: %s [%zu, %zu)\n",
		        sentence_type_string(unit->sentence->type),
		        unit->rule, unit->start, unit->end);
#endif
		unit->in = make_tree(ctx, unit_i, last);
		start = unit->start;
		dealloc_unit(unit);
		unit = malloc(sizeof(*unit));
		unit->rule = "@epsilon";
		unit->sentence = &epsilon;
		unit->in = unit->next = NULL;
		unit->start = unit->end = start;
		*last = unit;
		goto out;

	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
	case LIBPARSER_SENTENCE_TYPE_REPEATED:
	case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
#if PRINT_ACTIONS
		fprintf(stderr, "Binary unit: %s: %s [%zu, %zu)\n",
		        sentence_type_string(unit->sentence->type),
		        unit->rule, unit->start, unit->end);
#endif
		unit->in = make_tree(ctx, unit_i, &middle);
		if (IS_ANONYMOUS(unit->in->rule)) {
			embed_rule(&unit->in, &middle);
			if (!middle) {
				p = &unit->in;
				goto p_found;
			}
		}
		p = &middle->next;
		if (*p)
			while ((*p)->next)
				p = &(*p)->next;
	p_found:
		*p = make_tree(ctx, unit_i, last);
		if (IS_ANONYMOUS((*p)->rule))
			embed_rule(&*p, last);
		goto out;

	case LIBPARSER_SENTENCE_TYPE_STRING:
	case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
	case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
	case LIBPARSER_SENTENCE_TYPE_EOF:
	case LIBPARSER_SENTENCE_TYPE_EPSILON:
		*last = unit;
#if PRINT_ACTIONS
		fprintf(stderr, "Atomic unit: %s: %s [%zu, %zu)\n",
		        sentence_type_string(unit->sentence->type),
		        unit->rule, unit->start, unit->end);
#endif
		goto out;
	default:
		abort();
	}

	if (IS_ANONYMOUS(unit->in->rule))
		embed_rule(&unit->in, last);

out:
#if PRINT_ACTIONS
	indent -= 4;
	fprintf(stderr, "%*.s", indent, "");
	fprintf(stderr, "} at %zu of %zu\n", *unit_i, ctx->units.count);
#endif
	if (!unit)
		abort();
	return unit;
}


/**
 * Remove all epsilons from the tree
 * 
 * @param   unit  The tree
 * @return        The tree
 */
static struct libparser_unit *
remove_epsilons(struct libparser_unit *unit)
{
	struct libparser_unit **node, *deleted;

	if (!unit)
		return NULL;

	node = &unit;
	while (*node) {
		if ((*node)->sentence->type == LIBPARSER_SENTENCE_TYPE_EPSILON) {
			deleted = *node;
			*node = (*node)->next;
			free(deleted);
		} else {
			(*node)->in = remove_epsilons((*node)->in);
			node = &(*node)->next;
		}
	}

	return unit;
}


/**
 * Check whether the inner most branching-point
 * contains a rejection
 * 
 * @param   ctx  Parsing context
 * @reutrn       1 if branching-point contains rejection,
 *               0 otherwise
 */
#ifdef __GNUC__
__attribute__((__pure__))
#endif
static int
choice_contains_rejection(struct context *ctx)
{
	size_t i = ctx->choices.choices[ctx->choices.count - 1u].previous_rejection;
	return ctx->previous_rejection != i;
}


int
libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, struct libparser_unit **rootp)
{
	const char *rule;
	const union libparser_sentence *sentence;
	struct libparser_unit *ret = NULL, *t, *unit;
	struct context ctx;
	size_t i, j, n;
	int match;

	ctx.rules = rules;
	ctx.choices.choices = NULL;
	ctx.choices.count = 0;
	ctx.choices.size = 0;
	ctx.choices.next = 0;
	ctx.followups.followups = NULL;
	ctx.followups.size = 0;
	ctx.followups.count = 0;
	ctx.units.units = NULL;
	ctx.units.size = 0;
	ctx.units.count = 0;
	ctx.unit_pool = NULL;
	ctx.previous_rejection = SIZE_MAX;
	ctx.data = data;
	ctx.length = length;
	ctx.position = 0;
	ctx.exception = 0;

	for (i = 0; rules[i]; i++)
		if (!strcmp(rules[i]->name, "@start"))
			break;
	if (!rules[i])
		abort();
	rule = rules[i]->name;
	sentence = rules[i]->sentence;

#if PRINT_ACTIONS
	print_grammar(rules);
	fprintf(stderr, "Input text: \"");
	print_text(ctx.data, ctx.length);
	fprintf(stderr, "\"\n");
#endif

	/* TODO guard against left-side recursion */
	/* TODO break REPEATED on empty match */
followup:
#if PRINT_ACTIONS
	print_state(&ctx);
#endif
	match = add_unit(rule, sentence, &ctx);
	if (match < 0) {
#if PRINT_ACTIONS
		fprintf(stderr, "Error encountered\n");
#endif
		goto out;
	}
	while (ctx.followups.count) {
		i = --ctx.followups.count;
		if (IS_COMPLETION(ctx.followups.followups[i])) {
			unit = ctx.followups.followups[i].unit;
			sentence = unit->sentence;
#if PRINT_ACTIONS
			fprintf(stderr, "Popping completion of sentence: %s: %s, "
			                "for unit %zu, at %zu, consuming %zu subunits\n",
			        sentence_type_string(sentence->type), unit->rule,
			        ctx.followups.followups[i].unit_index, ctx.position,
			        ctx.units.count - ctx.followups.followups[i].unit_index - 1U);
#endif
			if (sentence->type == LIBPARSER_SENTENCE_TYPE_REJECTION) {
#if PRINT_ACTIONS
				fprintf(stderr, "Checking REJECTION\n");
#endif
				if (!match) {
#if PRINT_ACTIONS
					fprintf(stderr, "Match set, replacing REJECTION with EPSILON\n");
					fprintf(stderr, "Before:\n");
					print_state(&ctx);
#endif
					match = 1;
					for (j = ctx.followups.followups[i].unit_index + 1u; j < ctx.units.count; j++)
						free_unit(ctx.units.units[j], &ctx);
					j = ctx.followups.followups[i].unit_index;
					ctx.units.count = j + 1u;
					if (unit != ctx.units.units[j])
						abort();
					if (unit->in)
						abort();
					if (unit->next)
						abort();
					unit->rule = "@epsilon";
					unit->sentence = &epsilon;
					while (ctx.choices.count && ctx.choices.choices[ctx.choices.count - 1u].unit_index >= j) {
						ctx.choices.count--;
						free(ctx.choices.choices[ctx.choices.count].followups.followups);
					}
#if PRINT_ACTIONS
					fprintf(stderr, "After:\n");
					print_state(&ctx);
				} else if (ctx.exception) {
					fprintf(stderr, "Match and exception cleared\n");
					match = 0;
#endif
				} else {
#if PRINT_ACTIONS
					fprintf(stderr, "Match cleared\n");
#endif
					match = 0;
				}
				ctx.exception = 0;
				ctx.position = unit->start;
			}
			if (!match) 
				goto mismatch;
			if (sentence->type == LIBPARSER_SENTENCE_TYPE_COMMITTED ||
			    sentence->type == LIBPARSER_SENTENCE_TYPE_ALTERNATION ||
			    sentence->type == LIBPARSER_SENTENCE_TYPE_OPTIONAL ||
			    sentence->type == LIBPARSER_SENTENCE_TYPE_REPEATED) {
#if PRINT_ACTIONS
				fprintf(stderr, "Removing interior choices for matched %s\n",
				        sentence_type_string(sentence->type));
#endif
				j = ctx.followups.followups[i].unit_index;
				while (ctx.choices.count &&
				       ctx.choices.choices[ctx.choices.count - 1u].unit_index > j) {
					ctx.choices.next = --ctx.choices.count;
					free(ctx.choices.choices[ctx.choices.count].followups.followups);
				}
			}
#if PRINT_ACTIONS
			fprintf(stderr, "Setting end of popped unit to %zu\n", ctx.position);
#endif
			unit->end = ctx.position;
			ctx.previous_rejection = ctx.followups.followups[i].previous_rejection;
		} else if (match) {
			rule = ctx.followups.followups[i].rule;
			sentence = ctx.followups.followups[i].sentence;
#if PRINT_ACTIONS
			fprintf(stderr, "Popping follow-up sentence: %s: %s\n",
			        sentence_type_string(sentence->type), rule);
#endif
			goto followup;
		} else mismatch: if (ctx.choices.count && !choice_contains_rejection(&ctx)) {
			i = ctx.choices.count - 1u;
			ctx.choices.next = i;
			n = ctx.units.count;
			ctx.units.count = ctx.choices.choices[i].unit_index;
			for (j = ctx.units.count; j < n; j++)
				free_unit(ctx.units.units[j], &ctx);
			if (ctx.choices.choices[i].followups.count > ctx.followups.size)
				abort();
			ctx.followups.count = ctx.choices.choices[i].followups.count;
			memcpy(ctx.followups.followups,
			       ctx.choices.choices[i].followups.followups,
			       ctx.followups.count * sizeof(*ctx.followups.followups));
			if (ctx.choices.choices[i].choice == EXHAUSTED) {
				free(ctx.choices.choices[i].followups.followups);
				ctx.choices.count--;
				goto mismatch;
			}
			ctx.choices.choices[i].choice++;
			ctx.position = ctx.choices.choices[i].position;
			ctx.previous_rejection = ctx.choices.choices[i].previous_rejection;
			rule = ctx.choices.choices[i].rule;
			sentence = ctx.choices.choices[i].sentence;
#if PRINT_ACTIONS
			fprintf(stderr, "Mismatch: retrying from branching point %zu (%s, %s), "
			                "and unit %zu, from %zu, now with choice %zu\n",
			        i, sentence_type_string(sentence->type), rule, ctx.position,
			        ctx.units.count, ctx.choices.choices[i].choice);
#endif
			goto followup;
#if PRINT_ACTIONS
		} else {
			fprintf(stderr, "Mismatch with %zu pending follow-ups and no choices, "
			        "continuing polling follow-ups\n",
			        ctx.followups.count);
			print_state(&ctx);
#endif
		}
	}

#if PRINT_ACTIONS
	fprintf(stderr, "Result determined: %s\n", match ? "match" : "mismatch");
	fprintf(stderr, "Due to exception?: %s\n", ctx.exception ? "yes" : "no");
	print_state(&ctx);
#endif

	if (match) {
		if (!ctx.units.count)
			abort();
#if PRINT_ACTIONS
		fprintf(stderr, "Converting into tree\n");
#endif
		i = 0;
		ret = make_tree(&ctx, &i, &(struct libparser_unit *){NULL});
		if (ret && IS_ANONYMOUS(ret->rule)) {
#if PRINT_ACTIONS
			fprintf(stderr, "Embedding at top level:\n    ");
#endif
			embed_rule(&ret, NULL);
		}
		if (i != ctx.units.count)
			abort();
		ctx.units.count = 0;

		ret = remove_epsilons(ret);

#if PRINT_ACTIONS
		fprintf(stderr, "Match tree:\n");
		print_tree(ret, 4);
#endif
	} else if (ctx.choices.count) {
#if PRINT_ACTIONS
		fprintf(stderr, "Result with mismatch but remaining choices, trying next alternative\n");
#endif
		ctx.exception = 0;
		goto mismatch;
	}

out:
	for (i = 0; i < ctx.choices.count; i++)
		free(ctx.choices.choices[i].followups.followups);
	free(ctx.choices.choices);
	free(ctx.followups.followups);
	for (i = 0; i < ctx.units.count; i++)
		dealloc_unit(ctx.units.units[i]);
	free(ctx.units.units);
	while (ctx.unit_pool) {
		t = ctx.unit_pool;
		ctx.unit_pool = t->next;
		free(t);
	}

	if (match < 0) {
		dealloc_unit(ret);
		*rootp = NULL;
		return -1;
	}

	*rootp = ret;
	return !ctx.exception;
}