diff options
author | Mattias Andrée <maandree@kth.se> | 2021-09-09 20:56:12 +0200 |
---|---|---|
committer | Mattias Andrée <maandree@kth.se> | 2021-09-09 20:56:12 +0200 |
commit | c70673f4e73c7a071cff12735789eb6d45380d74 (patch) | |
tree | d222c092b7695d3b7c6bb775c9f5696eb599df0b | |
parent | misc (diff) | |
download | libnumtext-c70673f4e73c7a071cff12735789eb6d45380d74.tar.gz libnumtext-c70673f4e73c7a071cff12735789eb6d45380d74.tar.bz2 libnumtext-c70673f4e73c7a071cff12735789eb6d45380d74.tar.xz |
Add libnumtext_card2ord
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | TODO | 3 | ||||
-rw-r--r-- | common.h | 1 | ||||
-rw-r--r-- | libnumtext.h | 11 | ||||
-rw-r--r-- | libnumtext_card2ord.c | 18 | ||||
-rw-r--r-- | swedish.c | 98 | ||||
-rw-r--r-- | swedish.test.c | 14 |
7 files changed, 140 insertions, 6 deletions
@@ -17,6 +17,7 @@ LANG =\ swedish OBJ =\ + libnumtext_card2ord.o\ libnumtext_num2text.o\ libnumtext_remove_separators.o\ $(LANG:=.o) @@ -1,7 +1,8 @@ Add [libnumtext_]text2num: text to numerals, with analysis (possible flags from num2text), use support mixed in numerals -Add [libnumtext_]card2ord: convert cardinal in digits to ordinal in digits with necessary text (like superiors in English) +Add card2ord: conversion utility that wraps libnumtext_card2ord Add num2text: conversion utility that wraps libnumtext_num2text Add numtext-strip: utility that wraps libnumtext_remove_separators +Add opposite of libnumtext_remove_separators and numtext-strip Add man pages and README Add IPA output support? Add support for decimal points? @@ -49,3 +49,4 @@ struct common_num2text_params { ssize_t libnumtext_num2text_swedish__(struct common_num2text_params *params, const char *num, size_t num_len, uint32_t flags); +ssize_t libnumtext_card2ord_swedish__(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, uint32_t flags); diff --git a/libnumtext.h b/libnumtext.h index 2db0a0d..7bfaf3f 100644 --- a/libnumtext.h +++ b/libnumtext.h @@ -53,6 +53,14 @@ enum libnumtext_language { /* [1] If used, also use LIBNUMTEXT_N2T_SWEDISH_IMPLICIT_ONE, otherwise the Swedish becomes odd, and arguably incorrect */ /* [2] Requires LIBNUMTEXT_N2T_SWEDISH_ORDINAL (no effect) or LIBNUMTEXT_N2T_SWEDISH_DENOMINATOR */ +#define LIBNUMTEXT_C2O_SWEDISH_COMMON_GENDER UINT32_C(0x00000000) /* 1:a, 2:a, 3:e, … */ +#define LIBNUMTEXT_C2O_SWEDISH_NEUTER_GENDER UINT32_C(0x00000001) /* 1:a, 2:a, 3:e, … */ +#define LIBNUMTEXT_C2O_SWEDISH_MASCULINE_GENDER UINT32_C(0x00000002) /* 1:e, 2:e, 3:e, … */ +#define LIBNUMTEXT_C2O_SWEDISH_FEMININE_GENDER UINT32_C(0x00000003) /* 1:a, 2:a, 3:e, … */ + +#define LIBNUMTEXT_C2O_SWEDISH_LOWER_CASE UINT32_C(0) /* 1:a, 2:a, 3:e, … */ +#define LIBNUMTEXT_C2O_SWEDISH_UPPER_CASE UINT32_C(0x00000004) /* 1:A, 2:A, 3:E, … */ + /* input to libnumtext_num2text may not contain separators */ ssize_t libnumtext_remove_separators(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, @@ -61,5 +69,8 @@ ssize_t libnumtext_remove_separators(char *outbuf, size_t outbuf_size, const cha ssize_t libnumtext_num2text(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, enum libnumtext_language lang, uint32_t flags, ...); +ssize_t libnumtext_card2ord(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, + enum libnumtext_language lang, uint32_t flags, ...); + #endif diff --git a/libnumtext_card2ord.c b/libnumtext_card2ord.c new file mode 100644 index 0000000..eb99156 --- /dev/null +++ b/libnumtext_card2ord.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +ssize_t +libnumtext_card2ord(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, + enum libnumtext_language lang, uint32_t flags, ...) +{ + switch (lang) { + + case LIBNUMTEXT_SWEDISH: + return libnumtext_card2ord_swedish__(outbuf, outbuf_size, num, num_len, flags); + + default: + errno = EINVAL; + return -1; + } +} @@ -552,3 +552,101 @@ einval: errno = EINVAL; return -1; } + +#undef TYPE_INDEX +#undef FORM_INDEX +#undef CARDINAL +#undef ORDINAL +#undef NUMERATOR +#undef DENOMINATOR +#undef SINGULAR_FORM +#undef PLURAL_FORM +#undef INDEFINITE_FORM +#undef DEFINITE_FORM +#undef GENDER +#undef COMMON_GENDER +#undef NEUTER_GENDER +#undef MASCULINE_GENDER +#undef FEMININE_GENDER +#undef EXPLICIT_ONE +#undef IMPLICIT_ONE +#undef NOT_HYPHENATED +#undef HYPHENATED +#undef CASE +#undef LOWER_CASE +#undef PASCAL_CASE +#undef UPPER_CASE +#undef SENTENCE_CASE +#undef HYPHENATION +#undef NO_HYPHENATION +#undef COMPONENT_HYPHENATION +#undef SYLLABLE_HYPHENATION +#undef SECONDARY_HYPHENATION +#undef TRIPLETS +#undef REDUCED_TRIPLETS +#undef EXPLICIT_TRIPLETS +#undef LATEX_TRIPLETS +#undef X_INVALID_TRIPLETS +#undef INVALID_BITS + + + +#define GENDER(F) ((F) & UINT32_C(0x00000003)) +#define MASCULINE_GENDER(F) (GENDER(F) == LIBNUMTEXT_C2O_SWEDISH_MASCULINE_GENDER) +#define UPPER_CASE(F) ((F) & LIBNUMTEXT_C2O_SWEDISH_UPPER_CASE) +#define INVALID_BITS(F) ((F) & (uint32_t)~UINT32_C(0x00000007)) + +ssize_t +libnumtext_card2ord_swedish__(char *outbuf, size_t outbuf_size, const char *num, size_t num_len, uint32_t flags) +{ + size_t i = 0, length = 0; + char last_digit; + + if (INVALID_BITS(flags)) + goto einval; + + if (num_len) { + if (num[0] == '+' || num[0] == '-') + i += 1; + else if (IS_UNICODE_MINUS(num, num_len)) + i += sizeof(UNICODE_MINUS) - 1; + } + + while (i < num_len) { + if (isdigit(num[i])) + length = i += 1; + else if (IS_UNICODE_NBSP(&num[i], num_len - i)) + i += sizeof(UNICODE_NBSP) - 1; + else if (num[i] != ' ' && num[i] != '\'' && num[i] != '.') + goto einval; + } + + if (!length) + goto einval; + + last_digit = num[length - 1]; + + memcpy(outbuf, num, length < outbuf_size ? length : outbuf_size); + if (length < outbuf_size) + outbuf[length] = ':'; + length += 1; + + if (length < outbuf_size) { + if (MASCULINE_GENDER(flags) || last_digit == '0' || last_digit > '2') + outbuf[length] = UPPER_CASE(flags) ? 'E' : 'e'; + else + outbuf[length] = UPPER_CASE(flags) ? 'A' : 'a'; + } + length += 1; + + return (ssize_t)length; + +einval: + errno = EINVAL; + return -1; +} + +#undef GENDER +#undef MASCULINE_GENDER +#undef UPPER_CASE +#undef INVALID_BITS diff --git a/swedish.test.c b/swedish.test.c index 4e2d00b..c1cddc5 100644 --- a/swedish.test.c +++ b/swedish.test.c @@ -49,13 +49,15 @@ {N, Z, E, (FLAGS) | F(MASCULINE_GENDER), ERR},\ {N, Z, E, (FLAGS) | F(FEMININE_GENDER), ERR} -static struct test { +struct test { const char *num; size_t zeroes; const char *expect; uint32_t flags; int errnum; -} tests[] = { +}; + +static struct test n2t_tests[] = { {"0", 0, "noll", 0, 0}, {"00", 0, "noll", 0, 0}, {"00000000", 0, "noll", 0, 0}, @@ -757,9 +759,11 @@ static struct test { {"+-0", 0, NULL, 0, EINVAL}, {"x0", 0, NULL, 0, EINVAL}, {"0x", 0, NULL, 0, EINVAL}, - {"0x0", 0, NULL, 0, EINVAL}, + {"0x0", 0, NULL, 0, EINVAL} }; +/* TODO test libnumtext_card2ord */ + int main(void) @@ -770,9 +774,9 @@ main(void) size_t i, j, z, numlen, failures = 0; struct test *t; - for (i = 0; i < sizeof(tests) / sizeof(*tests); i++) { + for (i = 0; i < sizeof(n2t_tests) / sizeof(*n2t_tests); i++) { j = i + 1; - t = &tests[i]; + t = &n2t_tests[i]; numlen = strlen(t->num); memcpy(numbuf, t->num, numlen); for (z = 0; z < t->zeroes; z++) |