From 84b8038689a1b289aeaf0efb2c0545dec79c1888 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Wed, 8 Sep 2021 23:24:34 +0200 Subject: misc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 2 +- common.h | 8 +++++++- libnumtext_num2text.c | 10 ++++------ libnumtext_remove_separators.c | 5 +++++ swedish.test.c | 10 +++++----- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index f17e587..aee6e74 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ TEST_OBJ =\ all: libnumtext.a libnumtext.$(LIBEXT) $(OBJ): $(HDR) $(LOBJ): $(HDR) -$(TEST_OBJ): libnumtext.h +$(TEST_OBJ): $(HDR) $(TEST): libnumtext.a .c.o: diff --git a/common.h b/common.h index 7fb6784..51e7689 100644 --- a/common.h +++ b/common.h @@ -28,7 +28,13 @@ #endif -#define UNICODE_MINUS "−" +#define UNICODE_MINUS "−" /* \xe2\x88\x92 */ +#define UNICODE_NBSP " " /* \xc2\xa0 */ + +#define IS_CHAR__(S, LEN, C) ((LEN) >= sizeof(C) - 1 && !memcmp((S), (C), sizeof(C) - 1)) + +#define IS_UNICODE_MINUS(S, LEN) IS_CHAR__(S, LEN, UNICODE_MINUS) +#define IS_UNICODE_NBSP(S, LEN) IS_CHAR__(S, LEN, UNICODE_NBSP) #define IS_UTF8_CHAR_CONTINUATION(B) (((B) & 0xC0) == 0x80) diff --git a/libnumtext_num2text.c b/libnumtext_num2text.c index 50dc6ca..25c5429 100644 --- a/libnumtext_num2text.c +++ b/libnumtext_num2text.c @@ -14,13 +14,11 @@ libnumtext_num2text(char *outbuf, size_t outbuf_size, const char *num, size_t nu params.trailing_zeroes = 0; i = 0; - if (i < num_len) { - if (num[i] == '+' || num[i] == '-') { + if (num_len) { + if (num[0] == '+' || num[0] == '-') i += 1; - } else if (num_len >= sizeof(UNICODE_MINUS)) { - if (!strncmp(&num[0], UNICODE_MINUS, sizeof(UNICODE_MINUS) - 1)) - i += sizeof(UNICODE_MINUS) - 1; - } + else if (IS_UNICODE_MINUS(num, num_len)) + i += sizeof(UNICODE_MINUS) - 1; } params.sign_length = i; if (i == num_len) diff --git a/libnumtext_remove_separators.c b/libnumtext_remove_separators.c index e92c800..cbeb80a 100644 --- a/libnumtext_remove_separators.c +++ b/libnumtext_remove_separators.c @@ -13,6 +13,11 @@ libnumtext_remove_separators(char *outbuf, size_t outbuf_size, const char *num, case LIBNUMTEXT_SWEDISH: for (; num_len--; num++) { if (*num != ' ' && *num != '\'' && *num != '.') { + if (IS_UNICODE_NBSP(num, num_len)) { + num = &num[sizeof(UNICODE_NBSP) - 2]; + num_len -= sizeof(UNICODE_NBSP) - 2; + continue; + } if (outbuf_size) { *p++ = *num; outbuf_size--; diff --git a/swedish.test.c b/swedish.test.c index d59013d..4e2d00b 100644 --- a/swedish.test.c +++ b/swedish.test.c @@ -590,7 +590,7 @@ static struct test { {"100000", 0, "hundratusende", F(IMPLICIT_ONE) | ORD, 0}, {"101000", 0, "hundraettusen", F(IMPLICIT_ONE), 0}, {"101000", 0, "hundraettusende", F(IMPLICIT_ONE) | ORD, 0}, - {"300000", 0, "trehundratusen", F(IMPLICIT_ONE), 0}, + {"300"UNICODE_NBSP"000", 0, "trehundratusen", F(IMPLICIT_ONE), 0}, {"1""000000", 0, "enmiljon", F(IMPLICIT_ONE), 0}, {"1'000 000", 0, "enmiljonte", F(IMPLICIT_ONE) | ORD, 0}, {"1.200.000", 0, "enmiljontvåhundratusen", F(IMPLICIT_ONE), 0}, @@ -602,17 +602,17 @@ static struct test { CARD_DENOMS("1""000000""000000", 0, "enbiljon", F(IMPLICIT_ONE), 0), {"1", 4 * 6, "enkvadriljon", F(IMPLICIT_ONE), 0}, {"1", 100 * 6, "encentiljon", F(IMPLICIT_ONE), 0}, - {"+0", 0, "Plus Noll", F(PASCAL_CASE), 0}, + {"+0", 0, "Plus Noll", F(MONEY_CASE), 0}, {"+0", 0, "PLUS NOLL", F(UPPER_CASE), 0}, {"+0", 0, "Plus noll", F(SENTENCE_CASE), 0}, - {"-0", 0, "Minus Noll", F(PASCAL_CASE), 0}, + {"-0", 0, "Minus Noll", F(MONEY_CASE), 0}, {"-0", 0, "MINUS NOLL", F(UPPER_CASE), 0}, {"-0", 0, "Minus noll", F(SENTENCE_CASE), 0}, - {"54 248", 0, "FemtioFyratusenTvåhundraFyrtioÅtta", F(PASCAL_CASE), 0}, + {"54 248", 0, "FemtioFyratusenTvåhundraFyrtioÅtta", F(MONEY_CASE), 0}, {"54 248", 0, "FEMTIOFYRATUSENTVÅHUNDRAFYRTIOÅTTA", F(UPPER_CASE), 0}, {"54'248", 0, "Femtiofyratusentvåhundrafyrtioåtta", F(SENTENCE_CASE), 0}, {"54'248", 0, "femtio-fyratusen-tvåhundra-fyrtio-åtta", F(HYPHENATED), 0}, - {"54.248", 0, "Femtio-Fyratusen-Tvåhundra-Fyrtio-Åtta", F(HYPHENATED) | F(PASCAL_CASE), 0}, + {"54.248", 0, "Femtio-Fyratusen-Tvåhundra-Fyrtio-Åtta", F(HYPHENATED) | F(MONEY_CASE), 0}, {"54.248", 0, "FEMTIO-FYRATUSEN-TVÅHUNDRA-FYRTIO-ÅTTA", F(HYPHENATED) | F(UPPER_CASE), 0}, {"54248", 0, "Femtio-fyratusen-tvåhundra-fyrtio-åtta", F(HYPHENATED) | F(SENTENCE_CASE), 0}, {"100", 0, "etthundra", F(EXPLICIT_TRIPLETS), 0}, -- cgit v1.2.3-70-g09d2