aboutsummaryrefslogtreecommitdiffstats
path: root/libcharconv_latin.c
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2026-01-24 17:27:23 +0100
committerMattias Andrée <m@maandree.se>2026-01-24 17:27:23 +0100
commitbe3e7dec7e19a8ddc527a188306c097900a6da99 (patch)
tree094d0c9bd936dc1c5853210f3ae94455b670748d /libcharconv_latin.c
parentMake UTF-8 decoding function available to the entire library (diff)
downloadcharconv-be3e7dec7e19a8ddc527a188306c097900a6da99.tar.gz
charconv-be3e7dec7e19a8ddc527a188306c097900a6da99.tar.bz2
charconv-be3e7dec7e19a8ddc527a188306c097900a6da99.tar.xz
Add bracketed
Signed-off-by: Mattias Andrée <m@maandree.se>
Diffstat (limited to 'libcharconv_latin.c')
-rw-r--r--libcharconv_latin.c93
1 files changed, 90 insertions, 3 deletions
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 052ab7b..25aae91 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -7,17 +7,21 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
{
enum libcharconv_result ret = LIBCHARCONV_CONVERTED;
uint_least32_t c;
- char c1, c2, c3;
+ char c1, c2, c3, c4;
size_t clen;
*n = 0;
- for (; slen; s++) {
+ while (slen) {
clen = libcharconv_decode_utf8_(s, slen, &c);
- if (clen > slen)
+ if (clen > slen) {
+ if (*n)
+ goto no_conv;
return LIBCHARCONV_INDETERMINATE;
+ }
if (!clen) {
*n += 1u;
slen -= 1u;
+ s = &s[1];
continue;
}
slen -= clen;
@@ -202,6 +206,73 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a';
goto conv;
+ } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) {
+ /* bracketed (parenthesised capital) */
+ c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x249C) <= c && c <= UINT32_C(0x24B5)) {
+ /* bracketed (parenthesised small) */
+ c -= (uint_least32_t)UINT32_C(0x249C) - (uint_least32_t)'a';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x2474) <= c && c <= UINT32_C(0x247C)) {
+ /* bracketed (parenthesised number) */
+ c -= (uint_least32_t)UINT32_C(0x2474) - (uint_least32_t)'1';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x247D) <= c && c <= UINT32_C(0x2486)) {
+ /* bracketed (parenthesised number) */
+ c -= (uint_least32_t)UINT32_C(0x247D) - (uint_least32_t)'0';
+ c1 = '(';
+ c2 = '1';
+ c3 = (char)c;
+ c4 = ')';
+ goto conv4;
+ } else if (c == UINT32_C(0x2487)) {
+ /* bracketed (parenthesised number) */
+ c1 = '(';
+ c2 = '2';
+ c3 = '0';
+ c4 = ')';
+ goto conv4;
+ } else if (UINT32_C(0x1F101) <= c && c <= UINT32_C(0x1F10A)) {
+ /* bracketed (number comma) */
+ c -= (uint_least32_t)UINT32_C(0x1F101) - (uint_least32_t)'0';
+ c1 = (char)c;
+ c2 = ',';
+ goto conv2;
+ } else if (c == UINT32_C(0x1F100)) {
+ /* bracketed (number full stop) */
+ c1 = '0';
+ c2 = '.';
+ goto conv2;
+ } else if (UINT32_C(0x2488) <= c && c <= UINT32_C(0x2490)) {
+ /* bracketed (number full stop) */
+ c -= (uint_least32_t)UINT32_C(0x2488) - (uint_least32_t)'1';
+ c1 = (char)c;
+ c2 = '.';
+ goto conv2;
+ } else if (UINT32_C(0x2491) <= c && c <= UINT32_C(0x249A)) {
+ /* bracketed (number full stop) */
+ c -= (uint_least32_t)UINT32_C(0x2491) - (uint_least32_t)'0';
+ c1 = '1';
+ c2 = (char)c;
+ c3 = '.';
+ goto conv3;
+ } else if (c == UINT32_C(0x249B)) {
+ /* bracketed (number full stop) */
+ c1 = '2';
+ c2 = '0';
+ c3 = '.';
+ goto conv3;
+
} else {
switch (c) {
/* shogi */
@@ -334,6 +405,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
default:
no_match:
*n += clen;
+ s = &s[clen];
break;
}
}
@@ -375,4 +447,19 @@ conv3_prechecked:
cp[2] = (uint_least32_t)c3;
*ncp = 3u;
return ret;
+
+conv4:
+ if (*n)
+ goto no_conv;
+ *n += clen;
+ if (*ncp >= 1u)
+ cp[0] = (uint_least32_t)c1;
+ if (*ncp >= 2u)
+ cp[1] = (uint_least32_t)c2;
+ if (*ncp >= 3u)
+ cp[2] = (uint_least32_t)c3;
+ if (*ncp >= 4u)
+ cp[3] = (uint_least32_t)c4;
+ *ncp = 4u;
+ return ret;
}