From 000dd684722d34b06ab1d77c826af7e4922d40ee Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Fri, 10 Sep 2021 18:03:52 +0200 Subject: Add num2text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 1 + TODO | 1 - common.h | 1 + num2text.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ numtext.c | 11 +++-- 5 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 num2text.c diff --git a/Makefile b/Makefile index eb187ed..ce1193a 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ LANG =\ CMD =\ card2ord\ + num2text\ numtext-strip LIB_OBJ =\ diff --git a/TODO b/TODO index 8e94350..52c52b5 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,4 @@ Add [libnumtext_]text2num: text to numerals, with analysis (possible flags from num2text), use support mixed in numerals -Add num2text: conversion utility that wraps libnumtext_num2text Add opposite of libnumtext_remove_separators and numtext-strip Add man pages and README Add IPA output support? diff --git a/common.h b/common.h index 1404b35..e51652e 100644 --- a/common.h +++ b/common.h @@ -63,6 +63,7 @@ void process_options(char **optss, size_t n_optss, struct option *options, uint3 int numtext_strip_main(int argc, char *argv[]); int card2ord_main(int argc, char *argv[]); +int num2text_main(int argc, char *argv[]); ssize_t libnumtext_num2text_swedish__(struct common_num2text_params *params, const char *num, size_t num_len, uint32_t flags); diff --git a/num2text.c b/num2text.c new file mode 100644 index 0000000..d9d4046 --- /dev/null +++ b/num2text.c @@ -0,0 +1,160 @@ +/* See LICENSE file for copyright and license details. */ +#define LIBSIMPLY_CONFIG_MULTICALL_BINARY +#include "common.h" +#include + +USAGE("-l language [-o options ...] [cardinal ...]"); + + +static enum libnumtext_language lang; +static uint32_t flags = 0; + + +static struct option swedish_options[] = { + + {"[type=]c[ard[inal]]", "type=cardinal", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {NULL, "type=card", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {NULL, "type=c", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {NULL, "cardinal", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {NULL, "card", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {NULL, "c", LIBNUMTEXT_N2T_SWEDISH_CARDINAL, UINT32_C(0x00000001)}, + {"[type=]o[rd[inal]]", "type=ordinal", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + {NULL, "type=ord", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + {NULL, "type=o", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + {NULL, "ordinal", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + {NULL, "ord", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + {NULL, "o", LIBNUMTEXT_N2T_SWEDISH_ORDINAL, UINT32_C(0x00000001)}, + + {"numerator|denominator=n[o]", "denominator=no", LIBNUMTEXT_N2T_SWEDISH_NUMERATOR, UINT32_C(0x00000002)}, + {NULL, "denominator=n", LIBNUMTEXT_N2T_SWEDISH_NUMERATOR, UINT32_C(0x00000002)}, + {NULL, "numerator", LIBNUMTEXT_N2T_SWEDISH_NUMERATOR, UINT32_C(0x00000002)}, + {"denominator[=y[es]]", "denominator=yes", LIBNUMTEXT_N2T_SWEDISH_DENOMINATOR, UINT32_C(0x00000002)}, + {NULL, "denominator=y", LIBNUMTEXT_N2T_SWEDISH_DENOMINATOR, UINT32_C(0x00000002)}, + {NULL, "denominator", LIBNUMTEXT_N2T_SWEDISH_DENOMINATOR, UINT32_C(0x00000002)}, + + {"singular|plural=n[o]", "plural=no", LIBNUMTEXT_N2T_SWEDISH_SINGULAR_FORM, UINT32_C(0x00000004)}, + {NULL, "plural=n", LIBNUMTEXT_N2T_SWEDISH_SINGULAR_FORM, UINT32_C(0x00000004)}, + {NULL, "singular", LIBNUMTEXT_N2T_SWEDISH_SINGULAR_FORM, UINT32_C(0x00000004)}, + {"plural[=y[es]]", "plural=yes", LIBNUMTEXT_N2T_SWEDISH_PLURAL_FORM, UINT32_C(0x00000004)}, + {NULL, "plural=y", LIBNUMTEXT_N2T_SWEDISH_PLURAL_FORM, UINT32_C(0x00000004)}, + {NULL, "plural", LIBNUMTEXT_N2T_SWEDISH_PLURAL_FORM, UINT32_C(0x00000004)}, + + {"indefinite|definite=n[o]", "definite=no", LIBNUMTEXT_N2T_SWEDISH_INDEFINITE_FORM, UINT32_C(0x00000008)}, + {NULL, "definite=n", LIBNUMTEXT_N2T_SWEDISH_INDEFINITE_FORM, UINT32_C(0x00000008)}, + {NULL, "indefinite", LIBNUMTEXT_N2T_SWEDISH_INDEFINITE_FORM, UINT32_C(0x00000008)}, + {"definite[=y[es]]", "definite=yes", LIBNUMTEXT_N2T_SWEDISH_DEFINITE_FORM, UINT32_C(0x00000008)}, + {NULL, "definite=y", LIBNUMTEXT_N2T_SWEDISH_DEFINITE_FORM, UINT32_C(0x00000008)}, + {NULL, "definite", LIBNUMTEXT_N2T_SWEDISH_DEFINITE_FORM, UINT32_C(0x00000008)}, + + {"gender=c[ommon]|u|t", "gender=common", LIBNUMTEXT_N2T_SWEDISH_COMMON_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=c", LIBNUMTEXT_N2T_SWEDISH_COMMON_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=u", LIBNUMTEXT_N2T_SWEDISH_COMMON_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=t", LIBNUMTEXT_N2T_SWEDISH_COMMON_GENDER, UINT32_C(0x00000030)}, + {"gender=n[euter]", "gender=neuter", LIBNUMTEXT_N2T_SWEDISH_NEUTER_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=n", LIBNUMTEXT_N2T_SWEDISH_NEUTER_GENDER, UINT32_C(0x00000030)}, + {"gender=m[asculine]", "gender=masculine", LIBNUMTEXT_N2T_SWEDISH_MASCULINE_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=m", LIBNUMTEXT_N2T_SWEDISH_MASCULINE_GENDER, UINT32_C(0x00000030)}, + {"gender=f[eminine]", "gender=feminine", LIBNUMTEXT_N2T_SWEDISH_FEMININE_GENDER, UINT32_C(0x00000030)}, + {NULL, "gender=f", LIBNUMTEXT_N2T_SWEDISH_FEMININE_GENDER, UINT32_C(0x00000030)}, + + {"one=e[xplicit]", "one=explicit", LIBNUMTEXT_N2T_SWEDISH_EXPLICIT_ONE, UINT32_C(0x00000040)}, + {NULL, "one=e", LIBNUMTEXT_N2T_SWEDISH_EXPLICIT_ONE, UINT32_C(0x00000040)}, + {"one=i[mplicit]", "one=implicit", LIBNUMTEXT_N2T_SWEDISH_IMPLICIT_ONE, UINT32_C(0x00000040)}, + {NULL, "one=i", LIBNUMTEXT_N2T_SWEDISH_IMPLICIT_ONE, UINT32_C(0x00000040)}, + + {"hyphenated=n[o]", "hyphenated=no", LIBNUMTEXT_N2T_SWEDISH_NOT_HYPHENATED, UINT32_C(0x00000080)}, + {NULL, "hyphenated=n", LIBNUMTEXT_N2T_SWEDISH_NOT_HYPHENATED, UINT32_C(0x00000080)}, + {"hyphenated[=y[es]]", "hyphenated=yes", LIBNUMTEXT_N2T_SWEDISH_HYPHENATED, UINT32_C(0x00000080)}, + {NULL, "hyphenated=y", LIBNUMTEXT_N2T_SWEDISH_HYPHENATED, UINT32_C(0x00000080)}, + {NULL, "hyphenated", LIBNUMTEXT_N2T_SWEDISH_HYPHENATED, UINT32_C(0x00000080)}, + + {"case=l[ower]", "case=lower", LIBNUMTEXT_N2T_SWEDISH_LOWER_CASE, UINT32_C(0x00000300)}, + {NULL, "case=l", LIBNUMTEXT_N2T_SWEDISH_LOWER_CASE, UINT32_C(0x00000300)}, + {"case=u[pper]", "case=upper", LIBNUMTEXT_N2T_SWEDISH_UPPER_CASE, UINT32_C(0x00000300)}, + {NULL, "case=u", LIBNUMTEXT_N2T_SWEDISH_UPPER_CASE, UINT32_C(0x00000300)}, + {"case=m[oney]", "case=money", LIBNUMTEXT_N2T_SWEDISH_MONEY_CASE, UINT32_C(0x00000300)}, + {NULL, "case=m", LIBNUMTEXT_N2T_SWEDISH_MONEY_CASE, UINT32_C(0x00000300)}, + {"case=s[entence]", "case=sentence", LIBNUMTEXT_N2T_SWEDISH_SENTENCE_CASE, UINT32_C(0x00000300)}, + {NULL, "case=s", LIBNUMTEXT_N2T_SWEDISH_SENTENCE_CASE, UINT32_C(0x00000300)}, + + {"hyphenation=n[o[ne]]", "hyphenation=none", LIBNUMTEXT_N2T_SWEDISH_NO_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=no", LIBNUMTEXT_N2T_SWEDISH_NO_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=n", LIBNUMTEXT_N2T_SWEDISH_NO_HYPHENATION, UINT32_C(0x00000C00)}, + {"hyphenation=c[omponent[s]]", "hyphenation=components", LIBNUMTEXT_N2T_SWEDISH_COMPONENT_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=component", LIBNUMTEXT_N2T_SWEDISH_COMPONENT_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=c", LIBNUMTEXT_N2T_SWEDISH_COMPONENT_HYPHENATION, UINT32_C(0x00000C00)}, + {"hyphenation=sy[llable[s]]", "hyphenation=syllables", LIBNUMTEXT_N2T_SWEDISH_SYLLABLE_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=syllable", LIBNUMTEXT_N2T_SWEDISH_SYLLABLE_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=sy", LIBNUMTEXT_N2T_SWEDISH_SYLLABLE_HYPHENATION, UINT32_C(0x00000C00)}, + {"hyphenation=se[condary]|2", "hyphenation=secondary", LIBNUMTEXT_N2T_SWEDISH_SECONDARY_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=se", LIBNUMTEXT_N2T_SWEDISH_SECONDARY_HYPHENATION, UINT32_C(0x00000C00)}, + {NULL, "hyphenation=2", LIBNUMTEXT_N2T_SWEDISH_SECONDARY_HYPHENATION, UINT32_C(0x00000C00)}, + + {"triplets=r[educed]", "triplets=reduced", LIBNUMTEXT_N2T_SWEDISH_REDUCED_TRIPLETS, UINT32_C(0x00003000)}, + {NULL, "triplets=r", LIBNUMTEXT_N2T_SWEDISH_REDUCED_TRIPLETS, UINT32_C(0x00003000)}, + {"triplets=l[atex]", "triplets=latex", LIBNUMTEXT_N2T_SWEDISH_LATEX_TRIPLETS, UINT32_C(0x00003000)}, + {NULL, "triplets=l", LIBNUMTEXT_N2T_SWEDISH_LATEX_TRIPLETS, UINT32_C(0x00003000)}, + {"triplets=e[xplicit]", "triplets=explicit", LIBNUMTEXT_N2T_SWEDISH_EXPLICIT_TRIPLETS, UINT32_C(0x00003000)}, + {NULL, "triplets=e", LIBNUMTEXT_N2T_SWEDISH_EXPLICIT_TRIPLETS, UINT32_C(0x00003000)}, + + {NULL, NULL, 0, 0} +}; + +static struct option *options[] = { + [LIBNUMTEXT_SWEDISH] = swedish_options +}; + + +static ssize_t +process(char *outbuf, size_t outbuf_size, const char *num, size_t num_len) +{ + ssize_t ret; + ret = libnumtext_num2text(outbuf, outbuf_size, num, num_len, lang, flags); + if (ret < 0) + fprintf(stderr, "%s: libnumtext_num2text %s: %s\n", argv0, num, strerror(errno)); + return ret; +} + + +int +num2text_main(int argc, char *argv[]) +{ + int have_lang = 0; + char **optionses; + size_t n_optionses = 0; + uint32_t flag_mask; + + optionses = calloc((size_t)argc, sizeof(optionses)); + if (!optionses) { + fprintf(stderr, "%s: calloc %zu %zu: %s\n", argv0, (size_t)argc, sizeof(optionses), strerror(errno)); + return 1; + } + + ARGBEGIN { + case 'l': + if (!get_language(ARG(), &lang, &have_lang)) + usage(); + break; + case 'o': + optionses[n_optionses++] = ARG(); + break; + default: + usage(); + } ARGEND; + + if (!have_lang) + usage(); + + process_options(optionses, n_optionses, options[lang], &flags, &flag_mask); + free(optionses); + + if (lang == LIBNUMTEXT_SWEDISH) { + if ((flag_mask & UINT32_C(0x0000000C)) && !(flags & UINT32_C(0x00000003))) { + fprintf(stderr, "%s: singular/plural and indefinite/definite" + " cannot be used with numerator cardinals\n", argv0); + return 1; + } + } + + return run(argc, argv, process); +} diff --git a/numtext.c b/numtext.c index 3b78756..18d6c2c 100644 --- a/numtext.c +++ b/numtext.c @@ -20,6 +20,8 @@ main(int argc, char *argv[]) return numtext_strip_main(argc, argv); else if (!strcmp(argv0, "card2ord")) return card2ord_main(argc, argv); + else if (!strcmp(argv0, "num2text")) + return num2text_main(argc, argv); fprintf(stderr, "%s: not a recognised command for numtext multicall binary\n", argv[0]); return 1; @@ -145,6 +147,7 @@ get_language(const char *arg, enum libnumtext_language *langp, int *have_langp) static char * process_option(char *opt, struct option *options, uint32_t *flagsp, uint32_t *maskedp) { + uint32_t flag, mask; size_t len; if (opt[0] == '?' && (!opt[1] || opt[1] == ',')) { @@ -158,12 +161,14 @@ process_option(char *opt, struct option *options, uint32_t *flagsp, uint32_t *ma for (; options->option; options++) { len = strlen(options->option); if (!strncmp(opt, options->option, len) && (!opt[len] || opt[len] == ',')) { - if (options->mask & *maskedp) { + flag = options->flag; + mask = options->mask ? options->mask : options->flag; + if ((mask & *maskedp) && (*flagsp & mask) != flag) { fprintf(stderr, "%s: option conflicts with previously specified option: %.*s\n", argv0, (int)len, opt); exit(1); } - *flagsp |= options->flag; - *maskedp |= options->mask; + *flagsp |= flag; + *maskedp |= mask; return &opt[len]; } } -- cgit v1.2.3-70-g09d2