From e12b1cc372e0c64a7c2fb76bc08a72381808d89d Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 28 Dec 2025 14:00:33 +0100 Subject: Some fixes and generic automaton MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 6 ++++++ common.h | 1 + libautomata.h | 43 +++++++++++++++++++++++++++++++++++-- libautomata_clone_automaton.c | 28 ++++++++++++++++++++++++ libautomata_compile_kmp_automaton.c | 14 ++++++------ libautomata_compile_mp_automaton.c | 2 +- libautomata_destroy_automaton.c | 17 +++++++++++++++ libautomata_execute_kmp_automaton.c | 2 +- libautomata_execute_mp_automaton.c | 4 ++-- libautomata_free_automaton.c | 10 +++++++++ libautomata_grow_automaton.c | 27 +++++++++++++++++++++++ libautomata_reset_automaton.c | 5 +++++ test_kmp_automaton.c | 18 ++++++++-------- test_mp_automaton.c | 18 ++++++++-------- 14 files changed, 164 insertions(+), 31 deletions(-) create mode 100644 libautomata_clone_automaton.c create mode 100644 libautomata_destroy_automaton.c create mode 100644 libautomata_free_automaton.c create mode 100644 libautomata_grow_automaton.c create mode 100644 libautomata_reset_automaton.c diff --git a/Makefile b/Makefile index 285f70e..7a182bf 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,12 @@ LIB_NAME = automata OBJ =\ + libautomata_grow_automaton.o\ + libautomata_reset_automaton.o\ + libautomata_clone_automaton.o\ + libautomata_destroy_automaton.o\ + libautomata_free_automaton.o\ + libautomata_execute_automaton.o\ libautomata_reset_kmp_automaton.o\ libautomata_clone_kmp_automaton.o\ libautomata_compile_kmp_automaton.o\ diff --git a/common.h b/common.h index 0e132a1..b720a03 100644 --- a/common.h +++ b/common.h @@ -1,6 +1,7 @@ /* See LICENSE file for copyright and license details. */ #include "libautomata.h" +#include #include #include #include diff --git a/libautomata.h b/libautomata.h index 4f6caeb..82d4cdd 100644 --- a/libautomata.h +++ b/libautomata.h @@ -3,6 +3,45 @@ #define LIBAUTOMATA_H #include +#include + + +/* Generic automaton */ +enum libautomata_automaton_state { + LIBAUTOMATA_AUTOMATON_RUNNING, + LIBAUTOMATA_AUTOMATON_ABORTED, + LIBAUTOMATA_AUTOMATON_FINISHED +}; +union libautomata_node_state { + intmax_t sint; + uintmax_t uint; + void *ptr; +}; +struct libautomata_node { + struct libautomata_node *(*execute)(struct libautomata_automaton *, const void *input, void **input_out, size_t *lengthp); + void (*reset)(struct libautomata_node *); + int (*clone)(const struct libautomata_node *, struct libautomata_node *clone); + void (*destory)(struct libautomata_node *); + union libautomata_node_state state; + size_t index; +}; +struct libautomata_automaton { + struct libautomata_node *start; + struct libautomata_node *current; + struct libautomata_node *nodes; + size_t nnodes; + enum libautomata_automaton_state state; +}; +int libautomata_grow_automaton(struct libautomata_automaton *automaton, size_t n); +void libautomata_reset_automaton(struct libautomata_automaton *automaton); +int libautomata_clone_automaton(const struct libautomata_automaton *automaton, struct libautomata_automaton *clone); +void libautomata_destroy_automaton(struct libautomata_automaton *automaton); +void libautomata_free_automaton(struct libautomata_automaton *automaton); +inline void +libautomata_execute_automaton(struct libautomata_automaton *automaton, const void *input, void **input_out, size_t *lengthp) +{ + automaton->current->execute(automaton, input, input_out, lengthp); +} /* Knuth–Morris–Pratt substring search (finds end of substring) */ @@ -10,7 +49,7 @@ typedef struct libautomata_kmp_automaton LIBAUTOMATA_KMP_AUTOMATON; void libautomata_reset_kmp_automaton(LIBAUTOMATA_KMP_AUTOMATON *automaton); LIBAUTOMATA_KMP_AUTOMATON *libautomata_clone_kmp_automaton(const LIBAUTOMATA_KMP_AUTOMATON *automaton); LIBAUTOMATA_KMP_AUTOMATON *libautomata_compile_kmp_automaton(const void *pattern, size_t length, size_t elemsize); -void *libautomata_execute_kmp_automaton(const void *haystack, size_t length, LIBAUTOMATA_KMP_AUTOMATON *automaton); +void *libautomata_execute_kmp_automaton(LIBAUTOMATA_KMP_AUTOMATON *automaton, const void *haystack, size_t length); /* Morris–Pratt substring search (finds end of substring) */ @@ -18,7 +57,7 @@ typedef struct libautomata_mp_automaton LIBAUTOMATA_MP_AUTOMATON; void libautomata_reset_mp_automaton(LIBAUTOMATA_MP_AUTOMATON *automaton); LIBAUTOMATA_MP_AUTOMATON *libautomata_clone_mp_automaton(const LIBAUTOMATA_MP_AUTOMATON *automaton); LIBAUTOMATA_MP_AUTOMATON *libautomata_compile_mp_automaton(const void *pattern, size_t length, size_t elemsize); -void *libautomata_execute_mp_automaton(const void *haystack, size_t length, LIBAUTOMATA_MP_AUTOMATON *automaton); +void *libautomata_execute_mp_automaton(,LIBAUTOMATA_MP_AUTOMATON *automaton, const void *haystack, size_t length); #endif diff --git a/libautomata_clone_automaton.c b/libautomata_clone_automaton.c new file mode 100644 index 0000000..eb772e4 --- /dev/null +++ b/libautomata_clone_automaton.c @@ -0,0 +1,28 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libautomata_clone_automaton(const struct libautomata_automaton *automaton, struct libautomata_automaton *clone) +{ + clone->nnodes = 0; + if (libautomata_grow_automaton(clone, automaton->nnodes)) + goto fail; + + for (clone->nnodes = 0; clone->nnodes < automaton->nnodes; clone->nnodes++) { + if (automaton->nodes[clone->nnodes].clone) { + if ((*automaton->nodes[clone->nnodes].clone)(&automaton->nodes[clone->nnodes], + &clone->nodes[clone->nnodes])) + goto fail; + } else { + clone->nodes[clone->nnodes] = automaton->nodes[clone->nnodes]; + } + } + + clone->start = &clone->nodes[automaton->start->index]; + clone->current = &clone->nodes[automaton->current->index]; + return 0; +fail: + libautomata_destroy_automaton(clone); + return -1; +} diff --git a/libautomata_compile_kmp_automaton.c b/libautomata_compile_kmp_automaton.c index 3917b00..46b0430 100644 --- a/libautomata_compile_kmp_automaton.c +++ b/libautomata_compile_kmp_automaton.c @@ -24,15 +24,15 @@ libautomata_compile_kmp_automaton(const void *pattern, size_t length, size_t ele (!memcmp(&((const char *)pattern)[i * (WIDTH)], &((const char *)pattern)[j * (WIDTH)], (WIDTH))) #define IMPLEMENT(EQ, EQ_PARAM, CASE)\ - while (i < length) {\ - if (j != SIZE_MAX && !(EQ(EQ_PARAM)))\ - j = ret->next[j];\ - i++;\ - j++;\ - if (EQ(EQ_PARAM))\ + for (; i < length; i++, j++) {\ + if (EQ(EQ_PARAM)) {\ ret->next[i] = ret->next[j];\ - else CASE:\ + } else {\ + CASE:\ ret->next[i] = j;\ + while (j != SIZE_MAX && !(EQ(EQ_PARAM)))\ + j = ret->next[j];\ + }\ }\ return ret diff --git a/libautomata_compile_mp_automaton.c b/libautomata_compile_mp_automaton.c index b9e8739..49cd33d 100644 --- a/libautomata_compile_mp_automaton.c +++ b/libautomata_compile_mp_automaton.c @@ -25,7 +25,7 @@ libautomata_compile_mp_automaton(const void *pattern, size_t length, size_t elem #define IMPLEMENT(EQ, EQ_PARAM, CASE)\ while (i < length) {\ - if (j != SIZE_MAX && !(EQ(EQ_PARAM)))\ + while (j != SIZE_MAX && !(EQ(EQ_PARAM)))\ j = ret->next[j];\ i++;\ j++;\ diff --git a/libautomata_destroy_automaton.c b/libautomata_destroy_automaton.c new file mode 100644 index 0000000..ef27bc1 --- /dev/null +++ b/libautomata_destroy_automaton.c @@ -0,0 +1,17 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +libautomata_destroy_automaton(struct libautomata_automaton *automaton) +{ + size_t i; + if (automaton->nodes) + for (i = 0; i < automaton->nnodes; i++) + if (automaton->nodes[i] && automaton->nodes[i].destory) + (*automaton->nodes[i].destory)(&automaton->nodes[i]); + automaton->start = NULL; + automaton->current = NULL; + automaton->nodes = NULL; + automaton->nnodes = 0; +} diff --git a/libautomata_execute_kmp_automaton.c b/libautomata_execute_kmp_automaton.c index 69ffcce..0d447d9 100644 --- a/libautomata_execute_kmp_automaton.c +++ b/libautomata_execute_kmp_automaton.c @@ -3,7 +3,7 @@ void * -libautomata_execute_kmp_automaton(const void *haystack, size_t length, LIBAUTOMATA_KMP_AUTOMATON *automaton) +libautomata_execute_kmp_automaton(LIBAUTOMATA_KMP_AUTOMATON *automaton, const void *haystack, size_t length) { const void *pattern = (void *)&automaton->next[automaton->length + 1U]; size_t i = 0; diff --git a/libautomata_execute_mp_automaton.c b/libautomata_execute_mp_automaton.c index 13692dd..a38cbfd 100644 --- a/libautomata_execute_mp_automaton.c +++ b/libautomata_execute_mp_automaton.c @@ -3,7 +3,7 @@ void * -libautomata_execute_mp_automaton(const void *haystack, size_t length, LIBAUTOMATA_MP_AUTOMATON *automaton) +libautomata_execute_mp_automaton(LIBAUTOMATA_MP_AUTOMATON *automaton, const void *haystack, size_t length) { - return libautomata_execute_kmp_automaton(haystack, length, (void *)automaton); + return libautomata_execute_kmp_automaton((void *)automaton, haystack, length); } diff --git a/libautomata_free_automaton.c b/libautomata_free_automaton.c new file mode 100644 index 0000000..448ac39 --- /dev/null +++ b/libautomata_free_automaton.c @@ -0,0 +1,10 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +libautomata_free_automaton(struct libautomata_automaton *automaton) +{ + libautomata_destroy_automaton(automaton); + free(automaton); +} diff --git a/libautomata_grow_automaton.c b/libautomata_grow_automaton.c new file mode 100644 index 0000000..98ce12f --- /dev/null +++ b/libautomata_grow_automaton.c @@ -0,0 +1,27 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libautomata_grow_automaton(struct libautomata_automaton *automaton, size_t n) +{ + struct libautomata_node *new; + size_t size; + + if (automaton->nnodes > SIZE_MAX - n) + goto enomem; + size = automaton->nnodes + n; + if (size > SIZE_MAX / sizeof(*automaton->nodes)) + goto enomem; + size *= sizeof(*automaton->nodes); + + new = realloc(automaton->nodes, size); + if (!new) { + enomem: + errno = ENOMEM; + return -1; + } + automaton->nodes = new; + + return 0; +} diff --git a/libautomata_reset_automaton.c b/libautomata_reset_automaton.c new file mode 100644 index 0000000..24cf924 --- /dev/null +++ b/libautomata_reset_automaton.c @@ -0,0 +1,5 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +extern inline void libautomata_execute_automaton(struct libautomata_automaton *, const void *, void **, size_t *); diff --git a/test_kmp_automaton.c b/test_kmp_automaton.c index fa824e4..0b7a3e2 100644 --- a/test_kmp_automaton.c +++ b/test_kmp_automaton.c @@ -11,35 +11,35 @@ main(void) EXPECT((a1 = libautomata_compile_kmp_automaton(MEM("es"), 1))); EXPECT((a2 = libautomata_clone_kmp_automaton(a1))); - r = libautomata_execute_kmp_automaton(MEM("test"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_kmp_automaton(MEM("test"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); libautomata_reset_kmp_automaton(a1); - r = libautomata_execute_kmp_automaton(MEM("test"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_kmp_automaton(MEM("test"), a2); + r = libautomata_execute_kmp_automaton(a2, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_kmp_automaton(MEM("te"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("te")); EXPECT(!r); - r = libautomata_execute_kmp_automaton(MEM("sting"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("sting")); EXPECT(r); EXPECT(!strcmp(r, "ting")); - r = libautomata_execute_kmp_automaton(MEM("te"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("te")); EXPECT(!r); libautomata_reset_kmp_automaton(a1); - r = libautomata_execute_kmp_automaton(MEM("sting"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("sting")); EXPECT(!r); free(a1); free(a2); EXPECT((a1 = libautomata_compile_kmp_automaton(MEM("nano"), 1))); - r = libautomata_execute_kmp_automaton(MEM("searching for a nanostring"), a1); + r = libautomata_execute_kmp_automaton(a1, MEM("searching for a nanostring")); EXPECT(r); EXPECT(!strcmp(r, "string")); free(a1); diff --git a/test_mp_automaton.c b/test_mp_automaton.c index f6b41ff..29eef76 100644 --- a/test_mp_automaton.c +++ b/test_mp_automaton.c @@ -11,35 +11,35 @@ main(void) EXPECT((a1 = libautomata_compile_mp_automaton(MEM("es"), 1))); EXPECT((a2 = libautomata_clone_mp_automaton(a1))); - r = libautomata_execute_mp_automaton(MEM("test"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_mp_automaton(MEM("test"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); libautomata_reset_mp_automaton(a1); - r = libautomata_execute_mp_automaton(MEM("test"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_mp_automaton(MEM("test"), a2); + r = libautomata_execute_mp_automaton(a2, MEM("test")); EXPECT(r); EXPECT(!strcmp(r, "t")); - r = libautomata_execute_mp_automaton(MEM("te"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("te")); EXPECT(!r); - r = libautomata_execute_mp_automaton(MEM("sting"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("sting")); EXPECT(r); EXPECT(!strcmp(r, "ting")); - r = libautomata_execute_mp_automaton(MEM("te"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("te")); EXPECT(!r); libautomata_reset_mp_automaton(a1); - r = libautomata_execute_mp_automaton(MEM("sting"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("sting")); EXPECT(!r); free(a1); free(a2); EXPECT((a1 = libautomata_compile_mp_automaton(MEM("nano"), 1))); - r = libautomata_execute_mp_automaton(MEM("searching for a nanostring"), a1); + r = libautomata_execute_mp_automaton(a1, MEM("searching for a nanostring")); EXPECT(r); EXPECT(!strcmp(r, "string")); free(a1); -- cgit v1.2.3-70-g09d2