From 4e67edeb151aafb50099e628ea0158084cb5d8a8 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Fri, 10 Sep 2021 16:30:15 +0200 Subject: Add numtext-strip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- .gitignore | 4 ++ Makefile | 30 ++++++----- TODO | 1 - common.h | 1 + numtext-strip.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 13 deletions(-) create mode 100644 numtext-strip.c diff --git a/.gitignore b/.gitignore index 0ee6f2e..c0404b1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ *.gcda *.test *.dylib +/numtext-strip +/num2text +/text2num +/card2ord diff --git a/Makefile b/Makefile index 95be8c0..4d963c0 100644 --- a/Makefile +++ b/Makefile @@ -16,15 +16,15 @@ LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR) LANG =\ swedish -OBJ =\ +CMD =\ + numtext-strip + +LIB_OBJ =\ libnumtext_card2ord.o\ libnumtext_num2text.o\ libnumtext_remove_separators.o\ $(LANG:=.o) -LOBJ =\ - $(OBJ:.o=.lo) - HDR =\ libnumtext.h\ common.h @@ -32,16 +32,22 @@ HDR =\ TEST =\ $(LANG:=.test) -TEST_OBJ =\ - $(TEST:=.o) +OBJ = $(LIB_OBJ) $(CMD_OBJ) +LOBJ = $(LIB_LOBJ) +CMD_OBJ = $(CMD:=.o) +LIB_LOBJ = $(LIB_OBJ:.o=.lo) +TEST_OBJ = $(TEST:=.o) -all: libnumtext.a libnumtext.$(LIBEXT) +all: libnumtext.a libnumtext.$(LIBEXT) $(CMD) $(OBJ): $(HDR) $(LOBJ): $(HDR) $(TEST_OBJ): $(HDR) $(TEST): libnumtext.a +.o: + $(CC) -o $@ $< libnumtext.a $(LDFLAGS) + .c.o: $(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) @@ -51,12 +57,12 @@ $(TEST): libnumtext.a .test.o.test: $(CC) -o $@ $< libnumtext.a $(LDFLAGS) -libnumtext.a: $(OBJ) +libnumtext.a: $(LIB_OBJ) @rm -f -- $@ - $(AR) rc $@ $(OBJ) + $(AR) rc $@ $(LIB_OBJ) -libnumtext.$(LIBEXT): $(LOBJ) - $(CC) $(LIBFLAGS) -o $@ $(LOBJ) $(LDFLAGS) +libnumtext.$(LIBEXT): $(LIB_LOBJ) + $(CC) $(LIBFLAGS) -o $@ $(LIB_LOBJ) $(LDFLAGS) check: $(TEST) @set -e &&\ @@ -79,7 +85,7 @@ uninstall: -rm -f -- "$(DESTDIR)$(PREFIX)/include/libnumtext.h" clean: - -rm -f -- *.o *.a *.lo *.su *.so *.so.* *.gch *.gcov *.gcno *.gcda *.test *.dylib + -rm -f -- *.o *.a *.lo *.su *.so *.so.* *.gch *.gcov *.gcno *.gcda *.test *.dylib $(CMD) .SUFFIXES: .SUFFIXES: .lo .o .c .test .test.o diff --git a/TODO b/TODO index dc1d21b..2b07738 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,6 @@ Add [libnumtext_]text2num: text to numerals, with analysis (possible flags from num2text), use support mixed in numerals Add card2ord: conversion utility that wraps libnumtext_card2ord Add num2text: conversion utility that wraps libnumtext_num2text -Add numtext-strip: utility that wraps libnumtext_remove_separators Add opposite of libnumtext_remove_separators and numtext-strip Add man pages and README Add IPA output support? diff --git a/common.h b/common.h index 0a39799..1f99622 100644 --- a/common.h +++ b/common.h @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/numtext-strip.c b/numtext-strip.c new file mode 100644 index 0000000..b9b453e --- /dev/null +++ b/numtext-strip.c @@ -0,0 +1,161 @@ +#include "common.h" +#include + +USAGE("-l language"); + + +static enum libnumtext_language lang; + + +static int +run(int argc, char *argv[], ssize_t (*callback)(char *, size_t, const char *, size_t)) +{ + char *line = NULL; + size_t size = 0; + ssize_t len; + int ret = 0; + char *outbuf = NULL; + size_t outbuf_size = 0; + size_t num_len; + + if (argc) { + for (; *argv; argv++) { + num_len = strlen(*argv); + len = callback(outbuf, outbuf_size, *argv, num_len); + if (len < 0) { + ret = 1; + continue; + } + if ((size_t)len > outbuf_size) { + outbuf_size = (size_t)len; + outbuf = realloc(outbuf, outbuf_size); + if (!outbuf) { + fprintf(stderr, "%s: realloc %zu: %s\n", argv0, outbuf_size, strerror(errno)); + exit(1); + } + } + len = callback(outbuf, outbuf_size, *argv, num_len); + if (len < 0) { + ret = 1; + continue; + } + printf("%s\n", outbuf); + } + } else { + for (;;) { + len = getline(&line, &size, stdin); + if (len == -1) + break; + if (len && line[len - 1] == '\n') + line[--len] = '\0'; + if (!len) + continue; + num_len = (size_t)len; + len = callback(outbuf, outbuf_size, line, num_len); + if (len < 0) { + ret = 1; + continue; + } + if ((size_t)len > outbuf_size) { + outbuf_size = (size_t)len; + outbuf = realloc(outbuf, outbuf_size); + if (!outbuf) { + fprintf(stderr, "%s: realloc %zu: %s\n", argv0, outbuf_size, strerror(errno)); + exit(1); + } + } + len = callback(outbuf, outbuf_size, line, num_len); + if (len < 0) { + ret = 1; + continue; + } + printf("%s\n", outbuf); + } + if (ferror(stdin)) { + fprintf(stderr, "%s: getline : %s\n", argv0, strerror(errno)); + ret = 1; + } + free(line); + } + + if (fflush(stdout) || fclose(stdout)) { + fprintf(stderr, "%s: printf: %s\n", argv0, strerror(errno)); + ret = 1; + } + + free(outbuf); + return ret; +} + + +static int +get_language(const char *arg, enum libnumtext_language *langp, int *have_langp) +{ + static const struct language { + enum libnumtext_language value; + const char *code; + const char *name; + } languages[] = { + {LIBNUMTEXT_SWEDISH, "sv", "swedish"} + }; + + size_t i; + + if (*have_langp) + return 0; + *have_langp = 1; + + if (!strcmp(arg, "?")) { + for (i = 0; i < sizeof(languages) / sizeof(*languages); i++) { + printf("Languages:\n"); + printf("\t%s %s\n", languages[i].code, languages[i].name); + } + exit(0); + } else { + for (i = 0; i < sizeof(languages) / sizeof(*languages); i++) { + if (!strcasecmp(arg, languages[i].code) || !strcasecmp(arg, languages[i].name)) { + *langp = languages[i].value; + return 1; + } + } + fprintf(stderr, "%s: unrecognised language, use ? to list available languages: %s\n", argv0, arg); + exit(1); + } +} + + +static ssize_t +process(char *outbuf, size_t outbuf_size, const char *num, size_t num_len) +{ + ssize_t ret; + ret = libnumtext_remove_separators(outbuf, outbuf_size, num, num_len, lang); + if (ret < 0) { + fprintf(stderr, "%s: libnumtext_remove_separators %s: %s\n", argv0, num, strerror(errno)); + } else { + if ((size_t)ret < outbuf_size) + outbuf[ret] = '\0'; + ret += 1; + } + return ret; +} + + +int +main(int argc, char *argv[]) +{ + int have_lang = 0; + + ARGBEGIN { + case 'l': + if (!get_language(ARG(), &lang, &have_lang)) + usage(); + break; + default: + usage(); + } ARGEND; + + if (!have_lang) + usage(); + + return run(argc, argv, process); +} -- cgit v1.2.3-70-g09d2