diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-03 04:43:28 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-03 05:14:22 +0100 |
| commit | d5e0d01eb24aab15b125da761618d8047a1afba8 (patch) | |
| tree | 0b681d3849b988c3f4fe715aad0487fc7a4460d1 | |
| parent | Portability-fix: NULL is not necessarily 0, so calloc cannot be used for assigning NULL (diff) | |
| download | libparser-master.tar.gz libparser-master.tar.bz2 libparser-master.tar.xz | |
Signed-off-by: Mattias Andrée <m@maandree.se>
| -rw-r--r-- | libparser.c | 113 |
1 files changed, 81 insertions, 32 deletions
diff --git a/libparser.c b/libparser.c index ce26273..545aaee 100644 --- a/libparser.c +++ b/libparser.c @@ -4,20 +4,30 @@ #include <string.h> #define IS_HIDDEN(RULE) (!(RULE) || (RULE)[0] == '_') +/* NULL is used when evaluating a subsentence, + * and rule with the prefix "_" are embedded + * as subsentences */ struct context { - const struct libparser_rule *const *rules; - struct libparser_unit *cache; - const char *data; - size_t length; - size_t position; - char done; - char exception; - char error; + const struct libparser_rule *const *rules; /* rule table */ + struct libparser_unit *cache; /* memory allocation cache */ + const char *data; /* text being parsed */ + size_t length; /* length of text */ + size_t position; /* current position in the text */ + char done; /* end reached or .exception or .error set */ + char exception; /* exception-statement reached */ + char error; /* error encountered */ }; +/** + * Recursively place move a unit into + * the memory allocation cache + * + * @param unit Unit to place in the cache + * @param ctx Parsing context (holds the cache) + */ static void free_unit(struct libparser_unit *unit, struct context *ctx) { @@ -32,6 +42,11 @@ free_unit(struct libparser_unit *unit, struct context *ctx) } +/** + * Recursively deallocate a unit + * + * @param unit Unit to deallocate + */ static void dealloc_unit(struct libparser_unit *unit) { @@ -44,14 +59,22 @@ dealloc_unit(struct libparser_unit *unit) } +/** + * Allocate, without initialising + * new unit, but first try to reuse + * from the memory cache + * + * On allocation failure, the the + * error marked in the parsing context + * + * @param ctx Parsing context + * @return Newly allocated (or polled from the + * cache) unit, `NULL` on failure + */ static struct libparser_unit * -try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx) +alloc_unit(struct context *ctx) { - struct libparser_unit *unit, *next; - struct libparser_unit **head; - unsigned char c; - size_t i; - + struct libparser_unit *unit; if (!ctx->cache) { unit = malloc(sizeof(*unit)); if (!unit) { @@ -59,12 +82,45 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con ctx->error = 1; return NULL; } - unit->end = 0; } else { unit = ctx->cache; ctx->cache = unit->next; } + return unit; +} + + +/** + * Embed a rule's or subsentence's matches + * into where the rule or subsentence was + * matched + * + * @param where The pointer to the match + * @param ctx Parsing context + */ +static void +embed_rule(struct libparser_unit **where, struct context *ctx) +{ + /* remove matched unit */ + (*where)->next = ctx->cache; + ctx->cache = *where; + /* insert interior where matched */ + *where = (*where)->in; +} + + +static struct libparser_unit * +try_match(const char *rule, const union libparser_sentence *sentence, struct context *ctx) +{ + struct libparser_unit *unit, *next; + struct libparser_unit **head; + unsigned char c; + size_t i; + unit = alloc_unit(ctx); + if (!unit) + return NULL; + unit->end = 0; unit->in = unit->next = NULL; unit->rule = rule; unit->start = ctx->position; @@ -81,16 +137,12 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con free_unit(unit->in, ctx); goto mismatch; } - if (IS_HIDDEN(unit->in->next->rule)) { - unit->in->next->next = ctx->cache; - ctx->cache = unit->in->next; - unit->in->next = unit->in->next->in; - } + if (IS_HIDDEN(unit->in->next->rule)) + embed_rule(&unit->in->next, ctx); if (IS_HIDDEN(unit->in->rule)) { next = unit->in->next; - unit->in->next = ctx->cache; - ctx->cache = unit->in; - unit->in = unit->in->in; + embed_rule(&unit->in, ctx); + /* rejoin with right-hand */ if (unit->in) { for (head = &unit->in->next; *head; head = &(*head)->next); *head = next; @@ -108,11 +160,8 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con goto mismatch; } prone: - if (unit->in && IS_HIDDEN(unit->in->rule)) { - unit->in->next = ctx->cache; - ctx->cache = unit->in; - unit->in = unit->in->in; - } + if (unit->in && IS_HIDDEN(unit->in->rule)) + embed_rule(&unit->in, ctx); break; case LIBPARSER_SENTENCE_TYPE_REJECTION: @@ -135,9 +184,7 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con head = &unit->in; while ((*head = try_match(NULL, sentence->unary.sentence, ctx))) { if (IS_HIDDEN((*head)->rule)) { - (*head)->next = ctx->cache; - ctx->cache = *head; - *head = (*head)->in; + embed_rule(head, ctx); while (*head) head = &(*head)->next; } else { @@ -162,7 +209,7 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con c = ((const unsigned char *)ctx->data)[ctx->position]; if (sentence->char_range.low > c || c > sentence->char_range.high) goto mismatch; - ctx->position += 1; + ctx->position += 1u; break; case LIBPARSER_SENTENCE_TYPE_RULE: @@ -195,7 +242,9 @@ try_match(const char *rule, const union libparser_sentence *sentence, struct con return unit; mismatch: + /* On mismatch, restore position in text */ ctx->position = unit->start; + /* and place the unit in the memory cache */ unit->next = ctx->cache; ctx->cache = unit; return NULL; |
