aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--calc-example/calc.syntax3
-rw-r--r--libparser-generate.c2
-rw-r--r--libparser.76
-rw-r--r--libparser.c1293
-rw-r--r--libparser.h6
-rw-r--r--print-syntax.c8
7 files changed, 1175 insertions, 145 deletions
diff --git a/Makefile b/Makefile
index 686ddeb..543e021 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,7 @@ calc-example/calc: calc-example/calc.o calc-example/calc-syntax.o libparser.a
$(CC) -o $@ calc-example/calc.o calc-example/calc-syntax.o libparser.a $(LDFLAGS)
calc-example/calc-syntax.c: libparser-generate calc-example/calc.syntax
- ./libparser-generate _expr < calc-example/calc.syntax > $@
+ ./libparser-generate _expr < calc-example/calc.syntax > $@ || (rm -f -- $@; false)
install: libparser.a libparser.$(LIBEXT) libparser-generate
mkdir -p -- "$(DESTDIR)$(PREFIX)/bin"
diff --git a/calc-example/calc.syntax b/calc-example/calc.syntax
index f739233..fb5a871 100644
--- a/calc-example/calc.syntax
+++ b/calc-example/calc.syntax
@@ -14,7 +14,8 @@ DIV = _, ("/" | "∕" | "÷"), _;
sign = ADD | SUB;
-unsigned = DIGIT, {DIGIT | _WHITESPACE | "_" | "'"};
+_digit = DIGIT | _WHITESPACE | "_" | "'";
+unsigned = DIGIT, {_digit}, !_digit;
_number = unsigned | "(", _expr, (")" | -);
diff --git a/libparser-generate.c b/libparser-generate.c
index a9a3d96..f72ed85 100644
--- a/libparser-generate.c
+++ b/libparser-generate.c
@@ -394,7 +394,7 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
high->token->lineno, high->token->column, high->token->character);
}
printf("static union libparser_sentence sentence_%zu_%zu = {.char_range = {"
- ".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhu, .high = %hhu"
+ ".type = LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, .low = %hhuU, .high = %hhuU"
"}};\n",
nrule_names, index,
(unsigned char)low->token->s[0], (unsigned char)high->token->s[0]);
diff --git a/libparser.7 b/libparser.7
index 5b5b9b3..7157769 100644
--- a/libparser.7
+++ b/libparser.7
@@ -123,8 +123,10 @@ out that it could not finish that branch. Whenever an
exception is reached, the parser will terminate there.
.PP
Repeated symbols may occur any number of times,
-including zero. The compiler is able to backtrack if it
-takes too much.
+including zero. The parser will try to take as much
+as possible.
+.PP
+Optional symbols are taken whenever possible.
.PP
Concatenation has higher precedence than alternation,
groups
diff --git a/libparser.c b/libparser.c
index 545aaee..56ce703 100644
--- a/libparser.c
+++ b/libparser.c
@@ -1,28 +1,480 @@
/* See LICENSE file for copyright and license details. */
#include "libparser.h"
+#include <errno.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#define IS_HIDDEN(RULE) (!(RULE) || (RULE)[0] == '_')
+#define PRINT_ACTIONS 0
+
+#if PRINT_ACTIONS
+# include <ctype.h>
+# include <stdio.h>
+#endif
+
+
+#define IS_ANONYMOUS(RULE) (!(RULE) || (RULE)[0] == '_')
/* NULL is used when evaluating a subsentence,
* and rule with the prefix "_" are embedded
* as subsentences */
+#define IS_COMPLETION(FOLLOWUP) (!!(FOLLOWUP).unit)
+
+#define EXHAUSTED SIZE_MAX
+
+
+/**
+ * Follow-up stack node
+ *
+ * `.unit` decides whether it is a completion
+ * marker or a subsequent sentence; it is a
+ * completion marker if and only iff `.unit`
+ * is non-`NULL`.
+ */
+struct followup {
+ /**
+ * The unit being completed; if `NULL`
+ * it is a subsequent sentence rather
+ * than a completion marker and,
+ * `.sentence` and `.rule` should be used
+ * instead
+ */
+ struct libparser_unit *unit;
+
+ /**
+ * Set if and only if `.unit` is non-`NULL`:
+ * the
+ * index of the node
+ */
+ size_t unit_index;
+
+ /* -- or -- */
+
+ /**
+ * Set if and only if `.unit` is `NULL`:
+ * the `rule` argument `add_unit` should
+ * be called with
+ */
+ const char *rule;
+
+ /**
+ * Set if and only if `.unit` is `NULL`:
+ * the `sentence` argument `add_unit`
+ * should be called with
+ */
+ const union libparser_sentence *sentence;
+};
+
+
+/**
+ * Branching choice
+ */
+struct choice {
+ /**
+ * The current choice value, `EXHAUSTED`
+ * when the choices are exhausted and the
+ * sentence cannot be matched
+ */
+ size_t choice;
+
+ /**
+ * The position in the input text
+ */
+ size_t position;
+
+ /**
+ * The `rule` argument `add_unit` was
+ * called with
+ */
+ const char *rule;
+
+ /**
+ * The `sentence` argument `add_unit`
+ * was called with
+ */
+ const union libparser_sentence *sentence;
+ /**
+ * The value `.units.count` of the `ctx`
+ * argument `add_unit` was called with
+ */
+ size_t unit_index;
+
+ /**
+ * Save state of the follow-up stack
+ */
+ struct {
+ /**
+ * The follow-ups
+ */
+ struct followup *followups;
+
+ /**
+ * The number of follow-ups; this is
+ * also the number of allocated elements
+ */
+ size_t count;
+ } followups;
+};
+
+
+/**
+ * Parsing state and context
+ */
struct context {
- const struct libparser_rule *const *rules; /* rule table */
- struct libparser_unit *cache; /* memory allocation cache */
- const char *data; /* text being parsed */
- size_t length; /* length of text */
- size_t position; /* current position in the text */
- char done; /* end reached or .exception or .error set */
- char exception; /* exception-statement reached */
- char error; /* error encountered */
+ /**
+ * The rule table
+ */
+ const struct libparser_rule *const *rules;
+
+ /**
+ * Memory pool for parsing units
+ */
+ struct libparser_unit *unit_pool;
+
+ /**
+ * Current choices
+ */
+ struct {
+ /**
+ * The branching points and the choices made there
+ */
+ struct choice *choices;
+
+ /**
+ * Number of allocated choices
+ */
+ size_t size;
+
+ /**
+ * Number of made choices (it is be temporarily
+ * one more then .next when amending a choice)
+ */
+ size_t count;
+
+ /**
+ * The index of the next choice
+ */
+ size_t next;
+ } choices;
+
+ /**
+ * Follow-up stack; it is popped any time
+ * a sentence (including literals and
+ * subsentences) has ended
+ */
+ struct {
+ /**
+ * The follow-ups
+ */
+ struct followup *followups;
+
+ /**
+ * The number of allocated slots
+ */
+ size_t size;
+
+ /**
+ * The number of used slots
+ */
+ size_t count;
+ } followups;
+
+ /**
+ * List of units leading up to the end
+ * of the input
+ */
+ struct {
+ /**
+ * The units
+ */
+ struct libparser_unit **units;
+
+ /**
+ * The number of allocated slots
+ */
+ size_t size;
+
+ /**
+ * The number of used slots
+ */
+ size_t count;
+ } units;
+
+ /**
+ * The text being parsed
+ */
+ const char *data;
+
+ /**
+ * The length of the text
+ */
+ size_t length;
+
+ /**
+ * The current position in the text
+ */
+ size_t position;
+
+ /**
+ * Set when an exception-statement reached is encountered;
+ * cleared when rejected the exception is rejected
+ */
+ char exception;
};
/**
- * Recursively place move a unit into
+ * Epsilon rule, used when en OPTIONAL is not take
+ */
+static union libparser_sentence epsilon = {.type = LIBPARSER_SENTENCE_TYPE_EPSILON};
+
+
+#if PRINT_ACTIONS
+
+
+static const char *
+sentence_type_string(enum libparser_sentence_type type)
+{
+ switch (type) {
+ case LIBPARSER_SENTENCE_TYPE_CONCATENATION: return "CONCATENATION";
+ case LIBPARSER_SENTENCE_TYPE_ALTERNATION: return "ALTERNATION";
+ case LIBPARSER_SENTENCE_TYPE_REJECTION: return "REJECTION";
+ case LIBPARSER_SENTENCE_TYPE_OPTIONAL: return "OPTIONAL";
+ case LIBPARSER_SENTENCE_TYPE_REPEATED: return "REPEATED";
+ case LIBPARSER_SENTENCE_TYPE_STRING: return "STRING";
+ case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE: return "CHAR_RANGE";
+ case LIBPARSER_SENTENCE_TYPE_RULE: return "RULE";
+ case LIBPARSER_SENTENCE_TYPE_EXCEPTION: return "EXCEPTION";
+ case LIBPARSER_SENTENCE_TYPE_EOF: return "EOF";
+ case LIBPARSER_SENTENCE_TYPE_EPSILON: return "EPSILON";
+ default: return "<invalid>";
+ }
+}
+
+
+static void
+print_tree(struct libparser_unit *unit, int indent)
+{
+ for (; unit; unit = unit->next) {
+ fprintf(stderr, "%*.s", indent, "");
+ fprintf(stderr, "%s: %s%s%s [%zu, %zu) {%p}\n",
+ unit->rule, sentence_type_string(unit->sentence->type),
+ unit->sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
+ unit->sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
+ ? unit->sentence->rule.rule : "",
+ unit->start, unit->end, (void *)unit);
+ print_tree(unit->in, indent + 2);
+ }
+}
+
+
+static void
+print_state(struct context *ctx)
+{
+ struct libparser_unit *unit;
+ const union libparser_sentence *sentence;
+ size_t i;
+
+ fprintf(stderr, "Position in text: %zu of %zu\n", ctx->position, ctx->length);
+ fprintf(stderr, "Exception state: %i\n", (int)ctx->exception);
+
+ fprintf(stderr, "Have %zu units:\n", ctx->units.count);
+ for (i = 0; i < ctx->units.count; i++) {
+ if (ctx->units.units[i]->next)
+ fprintf(stderr, " (\n");
+ print_tree(ctx->units.units[i], 4);
+ if (ctx->units.units[i]->next)
+ fprintf(stderr, " )\n");
+ }
+
+ fprintf(stderr, "Have %zu choices:\n", ctx->choices.count);
+ for (i = 0; i < ctx->choices.count; i++) {
+ fprintf(stderr, " For unit %zu with %zu followups at %zu: %s: %s, ",
+ ctx->choices.choices[i].unit_index,
+ ctx->choices.choices[i].followups.count,
+ ctx->choices.choices[i].position,
+ sentence_type_string(ctx->choices.choices[i].sentence->type),
+ ctx->choices.choices[i].rule);
+ if (ctx->choices.choices[i].choice == EXHAUSTED) {
+ fprintf(stderr, "choices EXHAUSTED\n");
+ } else {
+ fprintf(stderr, "using choice %zu\n", ctx->choices.choices[i].choice);
+ }
+ }
+
+ fprintf(stderr, "Have %zu follow-ups pending (outermost at top):\n",
+ ctx->followups.count);
+ for (i = 0; i < ctx->followups.count; i++) {
+ if (IS_COMPLETION(ctx->followups.followups[i])) {
+ unit = ctx->followups.followups[i].unit;
+ sentence = unit->sentence;
+ fprintf(stderr, " Completion: unit %zu: %s: %s%s%s [%zu, %zu)\n",
+ ctx->followups.followups[i].unit_index,
+ unit->rule, sentence_type_string(sentence->type),
+ sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
+ sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
+ ? sentence->rule.rule : "",
+ unit->start, unit->end);
+ } else {
+ sentence = ctx->followups.followups[i].sentence;
+ fprintf(stderr, " (%s, %s%s%s)\n",
+ ctx->followups.followups[i].rule,
+ sentence_type_string(sentence->type),
+ sentence->type == LIBPARSER_SENTENCE_TYPE_RULE ? " " : "",
+ sentence->type == LIBPARSER_SENTENCE_TYPE_RULE
+ ? sentence->rule.rule : "");
+ }
+ }
+}
+
+
+static int
+print_sentence(const union libparser_sentence *sentence, int indent)
+{
+ unsigned char low, high;
+ int len;
+
+ switch (sentence->type) {
+ case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
+ fprintf(stderr, "(");
+ indent = print_sentence(sentence->binary.left, indent + 1);
+ fprintf(stderr, ", ");
+ indent = print_sentence(sentence->binary.right, indent + 2);
+ fprintf(stderr, ")");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
+ fprintf(stderr, "(");
+ print_sentence(sentence->binary.left, indent + 1);
+ fprintf(stderr, " | \n%*.s", indent + 1, "");
+ indent = print_sentence(sentence->binary.right, indent + 1);
+ fprintf(stderr, ")");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_REJECTION:
+ fprintf(stderr, "!(");
+ indent = print_sentence(sentence->unary.sentence, indent + 2);
+ fprintf(stderr, ")");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
+ fprintf(stderr, "[");
+ indent = print_sentence(sentence->unary.sentence, indent + 1);
+ fprintf(stderr, "]");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_REPEATED:
+ fprintf(stderr, "{");
+ indent = print_sentence(sentence->unary.sentence, indent + 1);
+ fprintf(stderr, "}");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_STRING:
+ fprintf(stderr, "\"%.*s\"%n", (int)sentence->string.length, sentence->string.string, &len);
+ indent += len;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
+ low = sentence->char_range.low;
+ high = sentence->char_range.high;
+ if (isprint(low) && isprint(high))
+ fprintf(stderr, "<\"%c\", \"%c\">%n", low, high, &len);
+ else if (isprint(sentence->char_range.low))
+ fprintf(stderr, "<\"%c\", 0x%02x>%n", low, high, &len);
+ else if (isprint(sentence->char_range.high))
+ fprintf(stderr, "<0x%02x, \"%c\">%n", low, high, &len);
+ else
+ fprintf(stderr, "<0x%02x, 0x%02x>%n", low, high, &len);
+ indent += len;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_RULE:
+ fprintf(stderr, "%s%n", sentence->rule.rule, &len);
+ indent += len;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
+ fprintf(stderr, "-");
+ indent += 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_EOF:
+ fprintf(stderr, "%s%n", "!<0x00, 0xFF>", &len);
+ indent += len;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_EPSILON:
+ fprintf(stderr, "@EPSILON");
+ indent += 8;
+ break;
+
+ default:
+ abort();
+ }
+
+ return indent;
+}
+
+
+static void
+print_grammar(const struct libparser_rule *const *rules)
+{
+ size_t i;
+ int indent;
+ fprintf(stderr, "Rules:\n");
+ for (i = 0; rules[i]; i++) {
+ fprintf(stderr, " %s = %n", rules[i]->name, &indent);
+ print_sentence(rules[i]->sentence, indent);
+ fprintf(stderr, ";\n");
+ }
+}
+
+
+#endif
+
+
+/**
+ * Embed a rule's or subsentence's matches
+ * into where the rule or subsentence was
+ * matched
+ *
+ * @param where The pointer to the match
+ * @param last Optional output paramter for the
+ * last embedded, top-level unit
+ */
+static void
+embed_rule(struct libparser_unit **where, struct libparser_unit **last)
+{
+ struct libparser_unit *old = *where;
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Embedding %s: %s [%zu, %zu)\n", old->rule,
+ sentence_type_string(old->sentence->type), old->start, old->end);
+#endif
+
+ if (old->next)
+ abort();
+ if (old->in == old)
+ abort();
+ *where = old->in;
+ free(old);
+ if (last) {
+ *last = *where;
+ if (*last)
+ while ((*last)->next)
+ *last = (*last)->next;
+ }
+}
+
+
+/**
+ * Recursively move a unit into
* the memory allocation cache
*
* @param unit Unit to place in the cache
@@ -36,8 +488,8 @@ free_unit(struct libparser_unit *unit, struct context *ctx)
free_unit(unit->in, ctx);
prev = unit;
unit = unit->next;
- prev->next = ctx->cache;
- ctx->cache = prev;
+ prev->next = ctx->unit_pool;
+ ctx->unit_pool = prev;
}
}
@@ -64,9 +516,6 @@ dealloc_unit(struct libparser_unit *unit)
* new unit, but first try to reuse
* from the memory cache
*
- * On allocation failure, the the
- * error marked in the parsing context
- *
* @param ctx Parsing context
* @return Newly allocated (or polled from the
* cache) unit, `NULL` on failure
@@ -75,131 +524,341 @@ static struct libparser_unit *
alloc_unit(struct context *ctx)
{
struct libparser_unit *unit;
- if (!ctx->cache) {
+ if (!ctx->unit_pool) {
unit = malloc(sizeof(*unit));
- if (!unit) {
- ctx->done = 1;
- ctx->error = 1;
+ if (!unit)
return NULL;
- }
} else {
- unit = ctx->cache;
- ctx->cache = unit->next;
+ unit = ctx->unit_pool;
+ ctx->unit_pool = unit->next;
}
return unit;
}
/**
- * Embed a rule's or subsentence's matches
- * into where the rule or subsentence was
- * matched
+ * Grow the unit list (if ncessary) and return a
+ * pointer the next empty slot
*
- * @param where The pointer to the match
- * @param ctx Parsing context
+ * @param ctx Parsing context
+ * @return Pointer to the unit slot
*/
-static void
-embed_rule(struct libparser_unit **where, struct context *ctx)
+static struct libparser_unit **
+new_unit(struct context *ctx)
{
- /* remove matched unit */
- (*where)->next = ctx->cache;
- ctx->cache = *where;
- /* insert interior where matched */
- *where = (*where)->in;
+ void *new;
+ if (ctx->units.count == ctx->units.size) {
+ if (ctx->units.size > SIZE_MAX / sizeof(*ctx->units.units) - 64u) {
+ enomem:
+ errno = ENOMEM;
+ return NULL;
+ }
+ ctx->units.size += 64u;
+ new = realloc(ctx->units.units, ctx->units.size * sizeof(*ctx->units.units));
+ if (!new)
+ goto enomem;
+ ctx->units.units = new;
+ }
+#if PRINT_ACTIONS
+ fprintf(stderr, "New unit slot at %zu\n", ctx->units.count);
+#endif
+ return &ctx->units.units[ctx->units.count++];
}
-static struct libparser_unit *
-try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
+/**
+ * Create a new follow-up node and push it
+ * into the follow-up stack
+ *
+ * @param ctx Parsing context
+ * @return The follow-up node, `NULL` on failure
+ */
+static struct followup *
+new_followup(struct context *ctx)
{
- struct libparser_unit *unit, *next;
- struct libparser_unit **head;
- unsigned char c;
- size_t i;
+ void *new;
+ if (ctx->followups.count == ctx->followups.size) {
+ if (ctx->followups.size > SIZE_MAX / sizeof(*ctx->followups.followups) - 64u) {
+ enomem:
+ errno = ENOMEM;
+ return NULL;
+ }
+ ctx->followups.size += 64u;
+ new = realloc(ctx->followups.followups,
+ ctx->followups.size * sizeof(*ctx->followups.followups));
+ if (!new)
+ goto enomem;
+ ctx->followups.followups = new;
+ }
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "New follow-up slot at %zu\n", ctx->followups.count);
+#endif
+
+ return &ctx->followups.followups[ctx->followups.count++];
+}
+
+
+/**
+ * Push sentence into follow-up stack
+ *
+ * @param rule The name of the rule, or `NULL` if it is an unnamed subsentence
+ * @param sentence The sentence to push
+ * @param ctx Parsing context
+ * @return 0 on success, -1 on failure
+ */
+static int
+push(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
+{
+ struct followup *followup = new_followup(ctx);
+ if (!followup)
+ return -1;
+
+ followup->sentence = sentence;
+ followup->unit = NULL;
+ followup->rule = rule;
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Pushing follow-up sentence: %s: %s\n",
+ sentence_type_string(sentence->type), rule);
+#endif
+
+ return 0;
+}
+
+
+/**
+ * Create a unit for a sentence and push it into
+ * the follow-up stack to mark the sentence as
+ * completed when the follow-up node is processed
+ *
+ * @param rule The name of the rule, or `NULL` if it is an unnamed subsentence
+ * @param sentence The sentence to push
+ * @param start The position in the input text where the sentence begins
+ * @param ctx Parsing context
+ * @return 0 on success, -1 on failure
+ */
+static int
+incomplete(const char *rule, const union libparser_sentence *sentence, size_t start, struct context *ctx)
+{
+ struct followup *followup;
+ struct libparser_unit *unit;
+ struct libparser_unit **unit_slot;
unit = alloc_unit(ctx);
if (!unit)
- return NULL;
- unit->end = 0;
+ return -1;
unit->in = unit->next = NULL;
unit->rule = rule;
- unit->start = ctx->position;
+ unit->start = start;
+ unit->end = start;
+ unit->sentence = sentence;
- switch (sentence->type) {
- case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
- unit->in = try_match(NULL, sentence->binary.left, ctx);
- if (!unit->in)
- goto mismatch;
- if (ctx->done)
- break;
- unit->in->next = try_match(NULL, sentence->binary.right, ctx);
- if (!unit->in->next) {
- free_unit(unit->in, ctx);
- goto mismatch;
- }
- if (IS_HIDDEN(unit->in->next->rule))
- embed_rule(&unit->in->next, ctx);
- if (IS_HIDDEN(unit->in->rule)) {
- next = unit->in->next;
- embed_rule(&unit->in, ctx);
- /* rejoin with right-hand */
- if (unit->in) {
- for (head = &unit->in->next; *head; head = &(*head)->next);
- *head = next;
- } else {
- unit->in = next;
+ followup = new_followup(ctx);
+ if (!followup) {
+ free_unit(unit, ctx);
+ return -1;
+ }
+ followup->unit = unit;
+ followup->unit_index = ctx->units.count;
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Pushing follow-up completion for unit %zu: %s: %s\n",
+ ctx->units.count, sentence_type_string(sentence->type), rule);
+#endif
+
+ unit_slot = new_unit(ctx);
+ if (!unit_slot) {
+ free_unit(unit, ctx);
+ return -1;
+ }
+ *unit_slot = unit;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Writing unit to slot: %s: %s, spanning [%zu, %zu)\n",
+ unit->rule, sentence_type_string(unit->sentence->type), unit->start, unit->end);
+#endif
+
+ return 0;
+}
+
+
+/**
+ * Add branching point to list of choices,
+ * or if already added, just the get the
+ * current choice
+ *
+ * @param rule The name of the rule that is a brachning
+ * point, `NULL` if a subsentence
+ * @param sentence The sentence that is brachning point
+ * @param start The position in the input text where the
+ * brachning begins
+ * @param choice_out Output parameter for the current choice value
+ * @return 0 on success, -1 on failure
+ */
+static int
+braching(const char *rule, const union libparser_sentence *sentence, size_t start, struct context *ctx, size_t *choice_out)
+{
+ struct choice *choice;
+ size_t size;
+ void *new;
+ if (ctx->choices.next == ctx->choices.count) {
+ if (ctx->choices.count == ctx->choices.size) {
+ size = ctx->choices.size;
+ if (size > SIZE_MAX - 8u) {
+ enomem:
+ errno = ENOMEM;
+ return -1;
}
+ size += 8u;
+ if (size > SIZE_MAX / sizeof(*ctx->choices.choices))
+ goto enomem;
+ new = realloc(ctx->choices.choices, size * sizeof(*ctx->choices.choices));
+ if (!new)
+ goto enomem;
+ ctx->choices.choices = new;
+ ctx->choices.size = size;
}
- break;
+ choice = &ctx->choices.choices[ctx->choices.next];
+ choice->choice = 0;
+ choice->position = start;
+ choice->rule = rule;
+ choice->sentence = sentence;
+ choice->unit_index = ctx->units.count;
+ choice->followups.followups = NULL;
+ choice->followups.count = ctx->followups.count;
+ if (choice->followups.count) {
+ size = choice->followups.count * sizeof(*choice->followups.followups);
+ choice->followups.followups = malloc(size);
+ if (!choice->followups.followups)
+ goto enomem;
+ memcpy(choice->followups.followups, ctx->followups.followups, size);
+ }
+ ctx->choices.count++;
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Adding branching point at %zu for unit %zu: %s: %s, at %zu in text\n",
+ ctx->choices.next, ctx->units.count,
+ sentence_type_string(sentence->type), rule, start);
+ } else {
+ fprintf(stderr, "Revisiting branching point %zu at %zu in text for unit %zu: "
+ "%s: %s, at %zu in text; choice value: %zu\n",
+ ctx->choices.next, start,
+ ctx->choices.choices[ctx->choices.next].unit_index,
+ sentence_type_string(ctx->choices.choices[ctx->choices.next].sentence->type),
+ ctx->choices.choices[ctx->choices.next].rule,
+ ctx->choices.choices[ctx->choices.next].position,
+ ctx->choices.choices[ctx->choices.next].choice);
+#endif
+ }
+ *choice_out = ctx->choices.choices[ctx->choices.next++].choice;
+ return 0;
+}
+
+/**
+ * Add a sentence, as a unit, to the list of units,
+ * will adding to the follow-up stack, the subsentences
+ * the the completion of the sentence unless it is
+ * atomic, and for branching points, record the choice
+ * with backtracking data
+ *
+ * @param rule The name of the rule, `NULL` if it is a subsentence
+ * @param sentence The sentence to add
+ * @param ctx Parsing context
+ * @return 1 on matched or incomplete, 0 on mismatch, -1 on failure
+ */
+static int
+add_unit(const char *rule, const union libparser_sentence *sentence, struct context *ctx)
+{
+ struct libparser_unit *unit;
+ struct libparser_unit **unit_slot;
+ size_t start = ctx->position;
+ unsigned char c;
+ size_t i, choice;
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Visiting %s: %s, at %zu in text\n",
+ sentence_type_string(sentence->type), rule, start);
+#endif
+
+ switch (sentence->type) {
case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
- unit->in = try_match(NULL, sentence->binary.left, ctx);
- if (!unit->in) {
- unit->in = try_match(NULL, sentence->binary.right, ctx);
- if (!unit->in)
- goto mismatch;
- }
- prone:
- if (unit->in && IS_HIDDEN(unit->in->rule))
- embed_rule(&unit->in, ctx);
- break;
+ if (braching(rule, sentence, start, ctx, &choice))
+ return -1;
+ if (choice > 1u)
+ goto exhausted;
+ if (incomplete(rule, sentence, start, ctx) ||
+ push(NULL, choice ? sentence->binary.right : sentence->binary.left, ctx))
+ return -1;
+ return 1;
- case LIBPARSER_SENTENCE_TYPE_REJECTION:
- unit->in = try_match(NULL, sentence->unary.sentence, ctx);
- if (unit->in) {
- free_unit(unit->in, ctx);
- if (!ctx->exception)
- goto mismatch;
- ctx->exception = 0;
+ case LIBPARSER_SENTENCE_TYPE_REPEATED:
+ if (braching(rule, sentence, start, ctx, &choice))
+ return -1;
+ if (choice == 0u) {
+ if (incomplete(rule, sentence, start, ctx) ||
+ push(NULL, sentence, ctx) ||
+ push(NULL, sentence->unary.sentence, ctx))
+ return -1;
+ } else if (choice == 1u) {
+ if (incomplete(rule, sentence, start, ctx) ||
+ push("@epsilon", &epsilon, ctx) ||
+ push(NULL, sentence->unary.sentence, ctx))
+ return -1;
+ } else if (choice == 2u) {
+ if (incomplete(rule, sentence, start, ctx) ||
+ push("@epsilon", &epsilon, ctx) ||
+ push("@epsilon", &epsilon, ctx))
+ return -1;
+ } else {
+ goto exhausted;
}
- ctx->position = unit->start;
- unit->rule = NULL;
- break;
+ return 1;
case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
- unit->in = try_match(NULL, sentence->unary.sentence, ctx);
- goto prone;
+ if (braching(rule, sentence, start, ctx, &choice))
+ return -1;
+ if (choice == 0u) {
+ if (incomplete(rule, sentence, start, ctx) ||
+ push(NULL, sentence->unary.sentence, ctx))
+ return -1;
+ } else if (choice == 1u) {
+ if (incomplete(rule, sentence, start, ctx) ||
+ push("@epsilon", &epsilon, ctx))
+ return -1;
+ } else {
+ goto exhausted;
+ }
+ return 1;
- case LIBPARSER_SENTENCE_TYPE_REPEATED:
- head = &unit->in;
- while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) {
- if (IS_HIDDEN((*head)->rule)) {
- embed_rule(head, ctx);
- while (*head)
- head = &(*head)->next;
- } else {
- head = &(*head)->next;
- }
- if (ctx->done)
+ case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
+ if (incomplete(rule, sentence, start, ctx) ||
+ push(NULL, sentence->binary.right, ctx) ||
+ push(NULL, sentence->binary.left, ctx))
+ return -1;
+ return 1;
+
+ case LIBPARSER_SENTENCE_TYPE_REJECTION:
+ if (incomplete(NULL, sentence, start, ctx) ||
+ push(NULL, sentence->unary.sentence, ctx))
+ return -1;
+ return 1;
+
+ case LIBPARSER_SENTENCE_TYPE_RULE:
+ for (i = 0; ctx->rules[i]; i++)
+ if (!strcmp(ctx->rules[i]->name, sentence->rule.rule))
break;
- }
- break;
+ if (!ctx->rules[i])
+ abort();
+ if (incomplete(rule, sentence, start, ctx) ||
+ push(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx))
+ return -1;
+ return 1;
case LIBPARSER_SENTENCE_TYPE_STRING:
- if (sentence->string.length > ctx->length - ctx->position)
- goto mismatch;
- if (memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length))
- goto mismatch;
+ if (sentence->string.length > ctx->length - ctx->position ||
+ memcmp(&ctx->data[ctx->position], sentence->string.string, sentence->string.length))
+ return 0;
ctx->position += sentence->string.length;
break;
@@ -212,59 +871,249 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con
ctx->position += 1u;
break;
- case LIBPARSER_SENTENCE_TYPE_RULE:
- for (i = 0; ctx->rules[i]; i++)
- if (!strcmp(ctx->rules[i]->name, sentence->rule.rule))
- break;
- if (!ctx->rules[i])
- abort();
- unit->in = try_match(ctx->rules[i]->name, ctx->rules[i]->sentence, ctx);
- if (!unit->in)
- goto mismatch;
- goto prone;
-
case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
- ctx->done = 1;
ctx->exception = 1;
break;
case LIBPARSER_SENTENCE_TYPE_EOF:
if (ctx->position != ctx->length)
goto mismatch;
- ctx->done = 1;
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_EPSILON:
break;
default:
abort();
}
+ /* for atomic sentences */
+ unit = alloc_unit(ctx);
+ if (!unit)
+ return -1;
+ unit->in = unit->next = NULL;
+ unit->rule = rule;
+ unit->start = start;
unit->end = ctx->position;
- return unit;
+ unit->sentence = sentence;
+ unit_slot = new_unit(ctx);
+ if (!unit_slot) {
+ free_unit(unit, ctx);
+ return -1;
+ }
+ *unit_slot = unit;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Writing atomic unit to slot: %s: %s, spanning [%zu, %zu)\n",
+ sentence_type_string(unit->sentence->type), unit->rule, unit->start, unit->end);
+#endif
+ return 1;
+exhausted:
+ ctx->choices.choices[ctx->choices.next - 1U].choice = EXHAUSTED;
mismatch:
- /* On mismatch, restore position in text */
- ctx->position = unit->start;
- /* and place the unit in the memory cache */
- unit->next = ctx->cache;
- ctx->cache = unit;
- return NULL;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Returning mismatch\n");
+#endif
+ return 0;
+}
+
+
+/**
+ * Convert list of units into a tree
+ *
+ * @param ctx Parsing context
+ * @param unit_i Reference to current position in the unit list
+ * @param last Output parameter for the last unit on the same
+ * level as the returned node (used to deal with
+ * embedding of anonymous sentences)
+ * @return The first top level node from the current
+ * position in the list of units
+ */
+static struct libparser_unit *
+make_tree(struct context *ctx, size_t *unit_i, struct libparser_unit **last)
+{
+ /* TODO try to flatten to call stack */
+ struct libparser_unit *unit, *middle, **p;
+ size_t start;
+
+#if PRINT_ACTIONS
+ static int indent = 0;
+ fprintf(stderr, "%*.s", indent, "");
+ fprintf(stderr, "{ at %zu of %zu\n", *unit_i, ctx->units.count);
+ indent += 4;
+ fprintf(stderr, "%*.s", indent, "");
+#endif
+
+ if (*unit_i == ctx->units.count)
+ abort();
+
+ unit = ctx->units.units[(*unit_i)++];
+ unit->in = NULL;
+ unit->next = NULL;
+
+ switch (unit->sentence->type) {
+ case LIBPARSER_SENTENCE_TYPE_RULE:
+ case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
+ case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
+#if PRINT_ACTIONS
+ fprintf(stderr, "Unary unit: %s: %s [%zu, %zu)\n",
+ sentence_type_string(unit->sentence->type),
+ unit->rule, unit->start, unit->end);
+#endif
+ unit->in = make_tree(ctx, unit_i, last);
+ break;
+
+ case LIBPARSER_SENTENCE_TYPE_REJECTION:
+#if PRINT_ACTIONS
+ fprintf(stderr, "Unary lookahead unit: %s: %s [%zu, %zu)\n",
+ sentence_type_string(unit->sentence->type),
+ unit->rule, unit->start, unit->end);
+#endif
+ unit->in = make_tree(ctx, unit_i, last);
+ start = unit->start;
+ dealloc_unit(unit);
+ unit = malloc(sizeof(*unit));
+ unit->rule = "@epsilon";
+ unit->sentence = &epsilon;
+ unit->in = unit->next = NULL;
+ unit->start = unit->end = start;
+ *last = unit;
+ goto out;
+
+ case LIBPARSER_SENTENCE_TYPE_REPEATED:
+ case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
+#if PRINT_ACTIONS
+ fprintf(stderr, "Binary unit: %s: %s [%zu, %zu)\n",
+ sentence_type_string(unit->sentence->type),
+ unit->rule, unit->start, unit->end);
+#endif
+ unit->in = make_tree(ctx, unit_i, &middle);
+ if (IS_ANONYMOUS(unit->in->rule)) {
+ embed_rule(&unit->in, &middle);
+ if (!middle) {
+ p = &unit->in;
+ goto p_found;
+ }
+ }
+ p = &middle->next;
+ if (*p)
+ while ((*p)->next)
+ p = &(*p)->next;
+ p_found:
+ *p = make_tree(ctx, unit_i, last);
+ if (IS_ANONYMOUS((*p)->rule))
+ embed_rule(&*p, last);
+ goto out;
+
+ case LIBPARSER_SENTENCE_TYPE_STRING:
+ case LIBPARSER_SENTENCE_TYPE_CHAR_RANGE:
+ case LIBPARSER_SENTENCE_TYPE_EXCEPTION:
+ case LIBPARSER_SENTENCE_TYPE_EOF:
+ case LIBPARSER_SENTENCE_TYPE_EPSILON:
+ *last = unit;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Atomic unit: %s: %s [%zu, %zu)\n",
+ sentence_type_string(unit->sentence->type),
+ unit->rule, unit->start, unit->end);
+#endif
+ goto out;
+ default:
+ abort();
+ }
+
+ if (IS_ANONYMOUS(unit->in->rule))
+ embed_rule(&unit->in, last);
+
+out:
+#if PRINT_ACTIONS
+ indent -= 4;
+ fprintf(stderr, "%*.s", indent, "");
+ fprintf(stderr, "} at %zu of %zu\n", *unit_i, ctx->units.count);
+#endif
+ if (!unit)
+ abort();
+ return unit;
+}
+
+
+/**
+ * Remove all epsilons from the tree
+ *
+ * @param unit The tree
+ * @return The tree
+ */
+static struct libparser_unit *
+remove_epsilons(struct libparser_unit *unit)
+{
+ struct libparser_unit **node, *deleted;
+
+ if (!unit)
+ return NULL;
+
+ node = &unit;
+ while (*node) {
+ if ((*node)->sentence->type == LIBPARSER_SENTENCE_TYPE_EPSILON) {
+ deleted = *node;
+ *node = (*node)->next;
+ free(deleted);
+ } else {
+ (*node)->in = remove_epsilons((*node)->in);
+ node = &(*node)->next;
+ }
+ }
+
+ return unit;
+}
+
+
+/**
+ * Check whether the inner most branching-point
+ * contains a rejection
+ *
+ * @param ctx Parsing context
+ * @reutrn 1 if branching-point contains rejection,
+ * 0 otherwise
+ */
+#ifdef __GNUC__
+__attribute__((__pure__))
+#endif
+static int
+choice_contains_rejection(struct context *ctx)
+{
+ /* TODO we should keep track of our rejections so we can do this in constant time */
+ size_t i = ctx->choices.choices[ctx->choices.count - 1u].unit_index;
+ for (i += 1u; i < ctx->units.count; i++)
+ if (ctx->units.units[i]->sentence->type == LIBPARSER_SENTENCE_TYPE_REJECTION)
+ return 1;
+ return 0;
}
int
libparser_parse_file(const struct libparser_rule *const rules[], const char *data, size_t length, struct libparser_unit **rootp)
{
- struct libparser_unit *ret, *t;
+ const char *rule;
+ const union libparser_sentence *sentence;
+ struct libparser_unit *ret = NULL, *t, *unit;
struct context ctx;
- size_t i;
+ size_t i, j, n;
+ int match;
ctx.rules = rules;
- ctx.cache = NULL;
+ ctx.choices.choices = NULL;
+ ctx.choices.count = 0;
+ ctx.choices.size = 0;
+ ctx.choices.next = 0;
+ ctx.followups.followups = NULL;
+ ctx.followups.size = 0;
+ ctx.followups.count = 0;
+ ctx.units.units = NULL;
+ ctx.units.size = 0;
+ ctx.units.count = 0;
+ ctx.unit_pool = NULL;
ctx.data = data;
ctx.length = length;
ctx.position = 0;
- ctx.done = 0;
- ctx.error = 0;
ctx.exception = 0;
for (i = 0; rules[i]; i++)
@@ -272,16 +1121,188 @@ libparser_parse_file(const struct libparser_rule *const rules[], const char *dat
break;
if (!rules[i])
abort();
+ rule = rules[i]->name;
+ sentence = rules[i]->sentence;
+
+#if PRINT_ACTIONS
+ print_grammar(rules);
+#endif
- ret = try_match(rules[i]->name, rules[i]->sentence, &ctx);
+ /* TODO guard against left-side recursion */
+ /* TODO make branching opt-in ("?"), and add commit statements ("+") */
+ /* TODO break REPEATED on empty match */
+followup:
+#if PRINT_ACTIONS
+ print_state(&ctx);
+#endif
+ match = add_unit(rule, sentence, &ctx);
+ if (match < 0) {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Error encountered\n");
+#endif
+ goto out;
+ }
+ while (ctx.followups.count) {
+ i = --ctx.followups.count;
+ if (IS_COMPLETION(ctx.followups.followups[i])) {
+ unit = ctx.followups.followups[i].unit;
+ sentence = unit->sentence;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Popping completion of sentence: %s: %s, "
+ "for unit %zu, at %zu, consuming %zu subunits\n",
+ sentence_type_string(sentence->type), unit->rule,
+ ctx.followups.followups[i].unit_index, ctx.position,
+ ctx.units.count - ctx.followups.followups[i].unit_index - 1U);
+#endif
+ if (sentence->type == LIBPARSER_SENTENCE_TYPE_REJECTION) {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Checking REJECTION\n");
+#endif
+ if (!match) {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Match set, replacing REJECTION with EPSILON\n");
+ fprintf(stderr, "Before:\n");
+ print_state(&ctx);
+#endif
+ match = 1;
+ for (j = ctx.followups.followups[i].unit_index + 1u; j < ctx.units.count; j++)
+ free_unit(ctx.units.units[j], &ctx);
+ j = ctx.followups.followups[i].unit_index;
+ ctx.units.count = j + 1u;
+ if (unit != ctx.units.units[j])
+ abort();
+ if (unit->in)
+ abort();
+ if (unit->next)
+ abort();
+ unit->rule = "@epsilon";
+ unit->sentence = &epsilon;
+ while (ctx.choices.count && ctx.choices.choices[ctx.choices.count - 1u].unit_index >= j) {
+ ctx.choices.count--;
+ free(ctx.choices.choices[ctx.choices.count].followups.followups);
+ }
+#if PRINT_ACTIONS
+ fprintf(stderr, "After:\n");
+ print_state(&ctx);
+ } else if (ctx.exception) {
+ fprintf(stderr, "Match and exception cleared\n");
+ match = 0;
+#endif
+ } else {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Match cleared\n");
+#endif
+ match = 0;
+ }
+ ctx.exception = 0;
+ ctx.position = unit->start;
+ }
+ if (!match)
+ goto mismatch;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Setting end of popped unit to %zu\n", ctx.position);
+#endif
+ unit->end = ctx.position;
+ } else if (match) {
+ rule = ctx.followups.followups[i].rule;
+ sentence = ctx.followups.followups[i].sentence;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Popping follow-up sentence: %s: %s\n",
+ sentence_type_string(sentence->type), rule);
+#endif
+ goto followup;
+ } else mismatch: if (ctx.choices.count && !choice_contains_rejection(&ctx)) {
+ i = ctx.choices.count - 1u;
+ ctx.choices.next = i;
+ n = ctx.units.count;
+ ctx.units.count = ctx.choices.choices[i].unit_index;
+ for (j = ctx.units.count; j < n; j++)
+ free_unit(ctx.units.units[j], &ctx);
+ if (ctx.choices.choices[i].followups.count > ctx.followups.size)
+ abort();
+ ctx.followups.count = ctx.choices.choices[i].followups.count;
+ memcpy(ctx.followups.followups,
+ ctx.choices.choices[i].followups.followups,
+ ctx.followups.count * sizeof(*ctx.followups.followups));
+ if (ctx.choices.choices[i].choice == EXHAUSTED) {
+ free(ctx.choices.choices[i].followups.followups);
+ ctx.choices.count--;
+ goto mismatch;
+ }
+ ctx.choices.choices[i].choice++;
+ ctx.position = ctx.choices.choices[i].position;
+ rule = ctx.choices.choices[i].rule;
+ sentence = ctx.choices.choices[i].sentence;
+#if PRINT_ACTIONS
+ fprintf(stderr, "Mismatch: retrying from branching point %zu (%s, %s), "
+ "and unit %zu, from %zu, now with choice %zu\n",
+ i, sentence_type_string(sentence->type), rule, ctx.position,
+ ctx.units.count, ctx.choices.choices[i].choice);
+#endif
+ goto followup;
+#if PRINT_ACTIONS
+ } else {
+ fprintf(stderr, "Mismatch with %zu pending follow-ups and no choices, "
+ "continuing polling follow-ups\n",
+ ctx.followups.count);
+ print_state(&ctx);
+#endif
+ }
+ }
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Result determined: %s\n", match ? "match" : "mismatch");
+ fprintf(stderr, "Due to exception?: %s\n", ctx.exception ? "yes" : "no");
+ print_state(&ctx);
+#endif
+
+ if (match) {
+ if (!ctx.units.count)
+ abort();
+#if PRINT_ACTIONS
+ fprintf(stderr, "Converting into tree\n");
+#endif
+ i = 0;
+ ret = make_tree(&ctx, &i, &(struct libparser_unit *){NULL});
+ if (ret && IS_ANONYMOUS(ret->rule)) {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Embedding at top level:\n ");
+#endif
+ embed_rule(&ret, NULL);
+ }
+ if (i != ctx.units.count)
+ abort();
+ ctx.units.count = 0;
+
+ ret = remove_epsilons(ret);
+
+#if PRINT_ACTIONS
+ fprintf(stderr, "Match tree:\n");
+ print_tree(ret, 4);
+#endif
+ } else if (ctx.choices.count) {
+#if PRINT_ACTIONS
+ fprintf(stderr, "Result with mismatch but remaining choices, trying next alternative\n");
+#endif
+ ctx.exception = 0;
+ goto mismatch;
+ }
- while (ctx.cache) {
- t = ctx.cache;
- ctx.cache = t->next;
+out:
+ for (i = 0; i < ctx.choices.count; i++)
+ free(ctx.choices.choices[i].followups.followups);
+ free(ctx.choices.choices);
+ free(ctx.followups.followups);
+ for (i = 0; i < ctx.units.count; i++)
+ dealloc_unit(ctx.units.units[i]);
+ free(ctx.units.units);
+ while (ctx.unit_pool) {
+ t = ctx.unit_pool;
+ ctx.unit_pool = t->next;
free(t);
}
- if (ctx.error) {
+ if (match < 0) {
dealloc_unit(ret);
*rootp = NULL;
return -1;
diff --git a/libparser.h b/libparser.h
index d8b13d9..5f55b53 100644
--- a/libparser.h
+++ b/libparser.h
@@ -19,7 +19,8 @@ enum libparser_sentence_type {
LIBPARSER_SENTENCE_TYPE_CHAR_RANGE, /* .char_range */
LIBPARSER_SENTENCE_TYPE_RULE, /* .rule */
LIBPARSER_SENTENCE_TYPE_EXCEPTION, /* (none) */
- LIBPARSER_SENTENCE_TYPE_EOF /* (none) */
+ LIBPARSER_SENTENCE_TYPE_EOF, /* (none) */
+ LIBPARSER_SENTENCE_TYPE_EPSILON /* (none) */
};
struct libparser_sentence_binary {
@@ -73,6 +74,9 @@ struct libparser_unit {
struct libparser_unit *next;
size_t start;
size_t end;
+
+ /* internal: */
+ const union libparser_sentence *sentence;
};
diff --git a/print-syntax.c b/print-syntax.c
index f792348..1584e7b 100644
--- a/print-syntax.c
+++ b/print-syntax.c
@@ -88,6 +88,7 @@ print_sentence(const union libparser_sentence *sentence, int indent)
indent += len;
break;
+ case LIBPARSER_SENTENCE_TYPE_EPSILON:
default:
abort();
}
@@ -108,14 +109,15 @@ main(int argc, char *argv[])
}
for (i = 0; libparser_rule_table[i]; i++) {
+#if 1
if (libparser_rule_table[i]->name[0] == '@')
continue;
+#endif
- if (!first) {
+ if (!first)
printf("\n");
- } else {
+ else
first = 0;
- }
printf("%s = %n", libparser_rule_table[i]->name, &indent);
print_sentence(libparser_rule_table[i]->sentence, indent);