aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rw-r--r--convert-to-bracketed.c18
-rw-r--r--libcharconv.h32
-rw-r--r--libcharconv_bracketed.c141
-rw-r--r--libcharconv_decode_utf8_.c4
-rw-r--r--libcharconv_latin.c93
6 files changed, 289 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 026f4c0..adda3d0 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,8 @@ BIN =\
convert-to-script\
convert-to-bold-script\
convert-to-buhid\
- convert-to-replacement
+ convert-to-replacement\
+ convert-to-bracketed
LIBOBJ =\
libcharconv_decode_utf8_.o\
@@ -72,7 +73,8 @@ LIBOBJ =\
libcharconv_script.o\
libcharconv_bold_script.o\
libcharconv_buhid.o\
- libcharconv_replacement.o
+ libcharconv_replacement.o\
+ libcharconv_bracketed.o
LOBJ = $(LIBOBJ:.o=.lo)
diff --git a/convert-to-bracketed.c b/convert-to-bracketed.c
new file mode 100644
index 0000000..1314677
--- /dev/null
+++ b/convert-to-bracketed.c
@@ -0,0 +1,18 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+USAGE("");
+
+
+int
+main(int argc, char *argv[])
+{
+ ARGBEGIN {
+ default:
+ usage();
+ } ARGEND;
+ if (argc)
+ usage();
+
+ return convert(&libcharconv_bracketed);
+}
diff --git a/libcharconv.h b/libcharconv.h
index 8e3c4bc..76bdfd7 100644
--- a/libcharconv.h
+++ b/libcharconv.h
@@ -901,4 +901,36 @@ enum libcharconv_result libcharconv_buhid(const char *s, size_t slen, size_t *n,
enum libcharconv_result libcharconv_replacement(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+/**
+ * Convert alphanumerics to bracketed form
+ *
+ * @param s Text to convert
+ * @param slen The number of bytes available in `s`
+ * @param n Output parameter for the number of consumed bytes
+ * @param cp Output buffer for the codepoints
+ * @param ncp Input parameter for the number of codepoints that
+ * fit in `cp`, and output parameter for the number
+ * of output codepoints (if it exceeds the original
+ * value of `ncp`, a larger buffer is needed)
+ * @return LIBCHARCONV_NO_CONVERT:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that cannot be converted
+ * LIBCHARCONV_CONVERTED:
+ * `*n` is the number of bytes from the beginning
+ * of `s` that was converted to a codepoint which
+ * is stored in `*cp`
+ * LIBCHARCONV_INDETERMINATE:
+ * If all text has been input, no more can be
+ * converted, otherwise more of the text most
+ * be made available before the function can
+ * determine whether the beginning of `s` can be
+ * converted or what it should be converted to
+ * LIBCHARCONV_CONVERT_IF_END:
+ * As LIBCHARCONV_CONVERTED the entire text has
+ * been input, as LIBCHARCONV_INDETERMINATE
+ * otherwise
+ */
+enum libcharconv_result libcharconv_bracketed(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp);
+
+
#endif
diff --git a/libcharconv_bracketed.c b/libcharconv_bracketed.c
new file mode 100644
index 0000000..cff49c9
--- /dev/null
+++ b/libcharconv_bracketed.c
@@ -0,0 +1,141 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+enum libcharconv_result
+libcharconv_bracketed(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp)
+{
+ uint_least32_t c;
+ *n = 0;
+ for (; slen--; s++) {
+ if ('0' <= s[0] && s[0] <= '9') {
+ if (!slen)
+ goto indeterminate;
+ if (s[1] == ',') {
+ c = UINT32_C(0x1F101) + (unsigned)(s[0] - '0');
+ goto conv2;
+ } else if (s[1] == '.') {
+ if (s[0] == '0')
+ c = UINT32_C(0x1F100);
+ else
+ c = UINT32_C(0x2488) + (unsigned)(s[0] - '1');
+ goto conv2;
+ } else if (s[0] == '1' && '0' <= s[1] && s[1] <= '9') {
+ if (slen < 2u)
+ goto indeterminate;
+ if (s[2] == '.')
+ c = UINT32_C(0x2491) + (unsigned)(s[1] - '0');
+ else
+ goto no_match;
+ goto conv3;
+ } else if (s[0] == '2' && s[1] == '0') {
+ if (slen < 2u)
+ goto indeterminate;
+ if (s[2] == '.')
+ c = UINT32_C(0x249B);
+ else
+ goto no_match;
+ goto conv3;
+ } else {
+ goto no_match;
+ }
+ } else if (s[0] == '(') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[1] == '1') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[2] == ')') {
+ c = UINT32_C(0x2474);
+ goto conv3;
+ } else if ('0' <= s[2] && s[2] <= '9') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[3] != ')')
+ goto no_match;
+ c = UINT32_C(0x247D) + (unsigned)(s[2] - '0');
+ goto conv4;
+ } else {
+ goto no_match;
+ }
+ } else if (s[1] == '2') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[2] == ')') {
+ c = UINT32_C(0x2475);
+ goto conv3;
+ } else if (s[2] == '0') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[3] != ')')
+ goto no_match;
+ c = UINT32_C(0x2487);
+ goto conv4;
+ } else {
+ goto no_match;
+ }
+ } else if ('3' <= s[1] && s[1] <= '9') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[2] != ')')
+ goto no_match;
+ c = UINT32_C(0x2474) + (unsigned)(s[1] - '0');
+ goto conv3;
+ } else if ('a' <= s[1] && s[1] <= 'z') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[2] != ')')
+ goto no_match;
+ c = UINT32_C(0x249C) + (unsigned)(s[1] - 'a');
+ goto conv3;
+ } else if ('A' <= s[1] && s[1] <= 'Z') {
+ if (!slen--)
+ goto indeterminate;
+ if (s[2] != ')')
+ goto no_match;
+ c = UINT32_C(0x1F110) + (unsigned)(s[1] - 'A');
+ goto conv3;
+ } else {
+ goto no_match;
+ }
+ } else {
+ no_match:
+ *n += 1u;
+ break;
+ }
+ }
+no_conv:
+ return LIBCHARCONV_NO_CONVERT;
+
+indeterminate:
+ if (*n)
+ goto no_conv;
+ return LIBCHARCONV_INDETERMINATE;
+
+conv2:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = c;
+ *n += 2u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+
+conv3:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = c;
+ *n += 3u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+
+conv4:
+ if (*n)
+ goto no_conv;
+ if (*ncp)
+ *cp = c;
+ *n += 4u;
+ *ncp = 1u;
+ return LIBCHARCONV_CONVERTED;
+}
diff --git a/libcharconv_decode_utf8_.c b/libcharconv_decode_utf8_.c
index db66040..7b488e3 100644
--- a/libcharconv_decode_utf8_.c
+++ b/libcharconv_decode_utf8_.c
@@ -1,5 +1,7 @@
/* See LICENSE file for copyright and license details. */
#include "lib-common.h"
+#include <stdlib.h>
+#include <stdio.h>
size_t
@@ -37,6 +39,8 @@ libcharconv_decode_utf8_(const char *s, size_t slen, uint_least32_t *cp)
return n;
for (i = 1u; i < n; i++) {
+ if ((s[i] & 0xC0) != 0x80)
+ return 0u;
*cp <<= 6;
*cp |= (uint_least32_t)s[i] & 0x3Fu;
}
diff --git a/libcharconv_latin.c b/libcharconv_latin.c
index 052ab7b..25aae91 100644
--- a/libcharconv_latin.c
+++ b/libcharconv_latin.c
@@ -7,17 +7,21 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
{
enum libcharconv_result ret = LIBCHARCONV_CONVERTED;
uint_least32_t c;
- char c1, c2, c3;
+ char c1, c2, c3, c4;
size_t clen;
*n = 0;
- for (; slen; s++) {
+ while (slen) {
clen = libcharconv_decode_utf8_(s, slen, &c);
- if (clen > slen)
+ if (clen > slen) {
+ if (*n)
+ goto no_conv;
return LIBCHARCONV_INDETERMINATE;
+ }
if (!clen) {
*n += 1u;
slen -= 1u;
+ s = &s[1];
continue;
}
slen -= clen;
@@ -202,6 +206,73 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a';
goto conv;
+ } else if (UINT32_C(0x1F110) <= c && c <= UINT32_C(0x1F129)) {
+ /* bracketed (parenthesised capital) */
+ c -= (uint_least32_t)UINT32_C(0x1F110) - (uint_least32_t)'A';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x249C) <= c && c <= UINT32_C(0x24B5)) {
+ /* bracketed (parenthesised small) */
+ c -= (uint_least32_t)UINT32_C(0x249C) - (uint_least32_t)'a';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x2474) <= c && c <= UINT32_C(0x247C)) {
+ /* bracketed (parenthesised number) */
+ c -= (uint_least32_t)UINT32_C(0x2474) - (uint_least32_t)'1';
+ c1 = '(';
+ c2 = (char)c;
+ c3 = ')';
+ goto conv3;
+ } else if (UINT32_C(0x247D) <= c && c <= UINT32_C(0x2486)) {
+ /* bracketed (parenthesised number) */
+ c -= (uint_least32_t)UINT32_C(0x247D) - (uint_least32_t)'0';
+ c1 = '(';
+ c2 = '1';
+ c3 = (char)c;
+ c4 = ')';
+ goto conv4;
+ } else if (c == UINT32_C(0x2487)) {
+ /* bracketed (parenthesised number) */
+ c1 = '(';
+ c2 = '2';
+ c3 = '0';
+ c4 = ')';
+ goto conv4;
+ } else if (UINT32_C(0x1F101) <= c && c <= UINT32_C(0x1F10A)) {
+ /* bracketed (number comma) */
+ c -= (uint_least32_t)UINT32_C(0x1F101) - (uint_least32_t)'0';
+ c1 = (char)c;
+ c2 = ',';
+ goto conv2;
+ } else if (c == UINT32_C(0x1F100)) {
+ /* bracketed (number full stop) */
+ c1 = '0';
+ c2 = '.';
+ goto conv2;
+ } else if (UINT32_C(0x2488) <= c && c <= UINT32_C(0x2490)) {
+ /* bracketed (number full stop) */
+ c -= (uint_least32_t)UINT32_C(0x2488) - (uint_least32_t)'1';
+ c1 = (char)c;
+ c2 = '.';
+ goto conv2;
+ } else if (UINT32_C(0x2491) <= c && c <= UINT32_C(0x249A)) {
+ /* bracketed (number full stop) */
+ c -= (uint_least32_t)UINT32_C(0x2491) - (uint_least32_t)'0';
+ c1 = '1';
+ c2 = (char)c;
+ c3 = '.';
+ goto conv3;
+ } else if (c == UINT32_C(0x249B)) {
+ /* bracketed (number full stop) */
+ c1 = '2';
+ c2 = '0';
+ c3 = '.';
+ goto conv3;
+
} else {
switch (c) {
/* shogi */
@@ -334,6 +405,7 @@ libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, siz
default:
no_match:
*n += clen;
+ s = &s[clen];
break;
}
}
@@ -375,4 +447,19 @@ conv3_prechecked:
cp[2] = (uint_least32_t)c3;
*ncp = 3u;
return ret;
+
+conv4:
+ if (*n)
+ goto no_conv;
+ *n += clen;
+ if (*ncp >= 1u)
+ cp[0] = (uint_least32_t)c1;
+ if (*ncp >= 2u)
+ cp[1] = (uint_least32_t)c2;
+ if (*ncp >= 3u)
+ cp[2] = (uint_least32_t)c3;
+ if (*ncp >= 4u)
+ cp[3] = (uint_least32_t)c4;
+ *ncp = 4u;
+ return ret;
}