diff options
| author | Mattias Andrée <m@maandree.se> | 2026-01-24 14:11:01 +0100 |
|---|---|---|
| committer | Mattias Andrée <m@maandree.se> | 2026-01-24 14:11:01 +0100 |
| commit | c8b7fdc7294329dc5eaf9f089f83184ece7d098c (patch) | |
| tree | 1059fd3ab69fb9219ef5a6be3f53cd8208015c40 | |
| download | charconv-c8b7fdc7294329dc5eaf9f089f83184ece7d098c.tar.gz charconv-c8b7fdc7294329dc5eaf9f089f83184ece7d098c.tar.bz2 charconv-c8b7fdc7294329dc5eaf9f089f83184ece7d098c.tar.xz | |
First commit
Signed-off-by: Mattias Andrée <m@maandree.se>
61 files changed, 3218 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9bdfa88 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +*\#* +*~ +*.o +*.a +*.lo +*.su +*.so +*.so.* +*.dll +*.dylib +*.gch +*.gcov +*.gcno +*.gcda +/convert-to-* +!/convert-to-*.c @@ -0,0 +1,15 @@ +ISC License + +© 2026 Mattias Andrée <m@maandree.se> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..61f9469 --- /dev/null +++ b/Makefile @@ -0,0 +1,136 @@ +.POSIX: + +CONFIGFILE = config.mk +include $(CONFIGFILE) + +OS = linux +# Linux: linux +# Mac OS: macos +# Windows: windows +include mk/$(OS).mk + + +LIB_MAJOR = 1 +LIB_MINOR = 0 +LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR) +LIB_NAME = charconv + + +BIN =\ + convert-to-shogi\ + convert-to-dice\ + convert-to-go\ + convert-to-draughts\ + convert-to-region-indicators\ + convert-to-tags\ + convert-to-gender-symbols\ + convert-to-latin\ + convert-to-cypriot\ + convert-to-bold\ + convert-to-italic\ + convert-to-bold-italic\ + convert-to-monospace\ + convert-to-segmented\ + convert-to-sans-serif\ + convert-to-sans-serif-bold\ + convert-to-sans-serif-italic\ + convert-to-sans-serif-bold-italic\ + convert-to-double-struck\ + convert-to-double-struck-italic\ + convert-to-fraktur\ + convert-to-bold-fraktur\ + convert-to-script\ + convert-to-bold-script\ + convert-to-buhid + +LIBOBJ =\ + libcharconv_shogi.o\ + libcharconv_dice.o\ + libcharconv_go_white.o\ + libcharconv_go_black.o\ + libcharconv_draughts.o\ + libcharconv_region_indicators.o\ + libcharconv_tags.o\ + libcharconv_gender_symbols.o\ + libcharconv_latin.o\ + libcharconv_cypriot.o\ + libcharconv_bold.o\ + libcharconv_italic.o\ + libcharconv_bold_italic.o\ + libcharconv_monospace.o\ + libcharconv_segmented.o\ + libcharconv_sans_serif.o\ + libcharconv_sans_serif_bold.o\ + libcharconv_sans_serif_italic.o\ + libcharconv_sans_serif_bold_italic.o\ + libcharconv_double_struck.o\ + libcharconv_double_struck_italic.o\ + libcharconv_fraktur.o\ + libcharconv_bold_fraktur.o\ + libcharconv_script.o\ + libcharconv_bold_script.o\ + libcharconv_buhid.o + +HDR =\ + libcharconv.h + +LOBJ = $(LIBOBJ:.o=.lo) + +BINOBJ = $(BIN:=.o) common.o + + +all: libcharconv.a libcharconv.$(LIBEXT) $(BIN) +common.o: $(HDR) +$(LIBOBJ): $(HDR) +$(BINOBJ): $(HDR) common.h +$(LOBJ): $(HDR) +$(BIN): common.o libcharconv.a + +.c.o: + $(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) + +.c.lo: + $(CC) -fPIC -c -o $@ $< $(CFLAGS) $(CPPFLAGS) + +.o: + $(CC) -fPIC -o $@ $< common.o libcharconv.a $(LDFLAGS) + +.c: + $(CC) -fPIC -o $@ $< common.o libcharconv.a $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) + +libcharconv.a: $(LIBOBJ) + @rm -f -- $@ + $(AR) rc $@ $(LIBOBJ) + $(AR) ts $@ > /dev/null + +libcharconv.$(LIBEXT): $(LOBJ) + $(CC) $(LIBFLAGS) -o $@ $(LOBJ) $(LDFLAGS) + +install: libcharconv.a libcharconv.$(LIBEXT) $(BIN) + mkdir -p -- "$(DESTDIR)$(PREFIX)/lib" + mkdir -p -- "$(DESTDIR)$(PREFIX)/bin" + mkdir -p -- "$(DESTDIR)$(PREFIX)/include" + cp -- libcharconv.a "$(DESTDIR)$(PREFIX)/lib/" + cp -- libcharconv.$(LIBEXT) "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBMINOREXT)" + $(FIX_INSTALL_NAME) "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBMINOREXT)" + ln -sf -- libcharconv.$(LIBMINOREXT) "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBMAJOREXT)" + ln -sf -- libcharconv.$(LIBMAJOREXT) "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBEXT)" + cp -- libcharconv.h "$(DESTDIR)$(PREFIX)/include/" + cp -- $(BIN) "$(DESTDIR)$(PREFIX)/bin/" + +uninstall: + -rm -f -- "$(DESTDIR)$(PREFIX)/lib/libcharconv.a" + -rm -f -- "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBMAJOREXT)" + -rm -f -- "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBMINOREXT)" + -rm -f -- "$(DESTDIR)$(PREFIX)/lib/libcharconv.$(LIBEXT)" + -rm -f -- "$(DESTDIR)$(PREFIX)/include/libcharconv.h" + -cd -- "$(DESTDIR)$(PREFIX)/bin/" && rm -f -- $(BIN) + +clean: + -rm -f -- *.o *.a *.lo *.su *.so *.so.* *.dll *.dylib + -rm -f -- *.gch *.gcov *.gcno *.gcda *.$(LIBEXT) $(BIN) + +.SUFFIXES: +.SUFFIXES: .lo .o .c + +.PHONY: all install uninstall clean diff --git a/common.c b/common.c new file mode 100644 index 0000000..11d72ac --- /dev/null +++ b/common.c @@ -0,0 +1,169 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +writeall(const char *s, size_t n) +{ + ssize_t r; + while (n) { + r = write(STDOUT_FILENO, s, n); + if (r < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: write <stdout>: %s\n", argv0, strerror(errno)); + exit(1); + } + s = &s[r]; + n -= (size_t)r; + } +} + + +void +writechar(uint_least32_t cp) +{ + char buf[4]; + + if (cp < UINT32_C(0x80)) { + buf[0] = (char)cp; + writeall(buf, 1u); + } else if (cp < UINT32_C(0x800)) { + buf[0] = (char)(( cp >> 6) | 0xC0u); + buf[1] = (char)(((cp >> 0) & 0x3Fu) | 0x80u); + writeall(buf, 2u); + } else if (cp < UINT32_C(0x10000)) { + buf[0] = (char)(( cp >> 12) | 0xE0u); + buf[1] = (char)(((cp >> 6) & 0x3Fu) | 0x80u); + buf[2] = (char)(((cp >> 0) & 0x3Fu) | 0x80u); + writeall(buf, 3u); + } else if (cp < UINT32_C(0x110000)) { + buf[0] = (char)(( cp >> 18) | 0xF0u); + buf[1] = (char)(((cp >> 12) & 0x3Fu) | 0x80u); + buf[2] = (char)(((cp >> 6) & 0x3Fu) | 0x80u); + buf[3] = (char)(((cp >> 0) & 0x3Fu) | 0x80u); + writeall(buf, 4u); + } else { + abort(); + } +} + + +int +convert(enum libcharconv_result (*conv)(const char *, size_t, size_t *, uint_least32_t *, size_t *)) +{ + char *buf = NULL; + size_t bufsize = 0; + size_t head = 0; + size_t tail = 0; + ssize_t r; + enum libcharconv_result res; + size_t n; + uint_least32_t *cps; + size_t ncps; + size_t cps_size = 16u; + size_t i; + + cps = malloc(cps_size * sizeof(*cps)); + if (!cps) { + fprintf(stderr, "%s: out of memory\n", argv0); + exit(1); + } + + for (;;) { + if (head == bufsize) { + if (tail) { + memmove(&buf[0], &buf[tail], head -= tail); + tail = 0; + } else { + buf = realloc(buf, bufsize += 8 << 10); + if (!buf) { + fprintf(stderr, "%s: out of memory\n", argv0); + exit(1); + } + } + } + r = read(STDIN_FILENO, &buf[head], bufsize - head); + if (r <= 0) { + if (!r) + break; + if (errno == EINTR) + continue; + fprintf(stderr, "%s: read <stdin>: %s\n", argv0, strerror(errno)); + exit(1); + } + head += (size_t)r; + conv_again: + ncps = cps_size; + res = (*conv)(&buf[tail], head - tail, &n, cps, &ncps); + if (ncps > cps_size) { + cps_size = ncps; + cps = realloc(cps, cps_size * sizeof(*cps)); + if (!cps) { + fprintf(stderr, "%s: out of memory\n", argv0); + exit(1); + } + goto conv_again; + } + switch (res) { + case LIBCHARCONV_NO_CONVERT: + writeall(&buf[tail], n); + tail += n; + if (tail != head) + goto conv_again; + break; + case LIBCHARCONV_CONVERTED: + for (i = 0u; i < ncps; i++) + writechar(cps[i]); + tail += n; + if (tail != head) + goto conv_again; + break; + case LIBCHARCONV_INDETERMINATE: + case LIBCHARCONV_CONVERT_IF_END: + break; + default: + abort(); + } + } + + while (tail < head) { + ncps = cps_size; + res = (*conv)(&buf[tail], head - tail, &n, cps, &ncps); + if (ncps > cps_size) { + cps_size = ncps; + cps = realloc(cps, cps_size * sizeof(*cps)); + if (!cps) { + fprintf(stderr, "%s: out of memory\n", argv0); + exit(1); + } + continue; + } + switch (res) { + case LIBCHARCONV_INDETERMINATE: + n = head - tail; + /* fall through */ + case LIBCHARCONV_NO_CONVERT: + writeall(&buf[tail], n); + tail += n; + break; + case LIBCHARCONV_CONVERTED: + case LIBCHARCONV_CONVERT_IF_END: + for (i = 0u; i < ncps; i++) + writechar(cps[i]); + tail += n; + break; + default: + abort(); + } + } + + if (close(STDOUT_FILENO)) { + fprintf(stderr, "%s: write <stdout>: %s\n", argv0, strerror(errno)); + exit(1); + } + + free(buf); + free(cps); + return 0; +} diff --git a/common.h b/common.h new file mode 100644 index 0000000..9b30011 --- /dev/null +++ b/common.h @@ -0,0 +1,13 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libsimple-arg.h> + + +void writeall(const char *s, size_t n); +void writechar(uint_least32_t cp); +int convert(enum libcharconv_result (*conv)(const char *, size_t, size_t *, uint_least32_t *, size_t *)); diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..f4adf12 --- /dev/null +++ b/config.mk @@ -0,0 +1,8 @@ +PREFIX = /usr +MANPREFIX = $(PREFIX)/share/man + +CC = c99 + +CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE +CFLAGS = +LDFLAGS = diff --git a/convert-to-bold-fraktur.c b/convert-to-bold-fraktur.c new file mode 100644 index 0000000..b9a302e --- /dev/null +++ b/convert-to-bold-fraktur.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_bold_fraktur); +} diff --git a/convert-to-bold-italic.c b/convert-to-bold-italic.c new file mode 100644 index 0000000..95a8832 --- /dev/null +++ b/convert-to-bold-italic.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_bold_italic); +} diff --git a/convert-to-bold-script.c b/convert-to-bold-script.c new file mode 100644 index 0000000..f3a374e --- /dev/null +++ b/convert-to-bold-script.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_bold_script); +} diff --git a/convert-to-bold.c b/convert-to-bold.c new file mode 100644 index 0000000..fc91c7d --- /dev/null +++ b/convert-to-bold.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_bold); +} diff --git a/convert-to-buhid.c b/convert-to-buhid.c new file mode 100644 index 0000000..639943f --- /dev/null +++ b/convert-to-buhid.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_buhid); +} diff --git a/convert-to-cypriot.c b/convert-to-cypriot.c new file mode 100644 index 0000000..6a981a7 --- /dev/null +++ b/convert-to-cypriot.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_cypriot); +} diff --git a/convert-to-dice.c b/convert-to-dice.c new file mode 100644 index 0000000..02c84bd --- /dev/null +++ b/convert-to-dice.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_dice); +} diff --git a/convert-to-double-struck-italic.c b/convert-to-double-struck-italic.c new file mode 100644 index 0000000..2be3e61 --- /dev/null +++ b/convert-to-double-struck-italic.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_double_struck_italic); +} diff --git a/convert-to-double-struck.c b/convert-to-double-struck.c new file mode 100644 index 0000000..ef962cc --- /dev/null +++ b/convert-to-double-struck.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_double_struck); +} diff --git a/convert-to-draughts.c b/convert-to-draughts.c new file mode 100644 index 0000000..b93814e --- /dev/null +++ b/convert-to-draughts.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_draughts); +} diff --git a/convert-to-fraktur.c b/convert-to-fraktur.c new file mode 100644 index 0000000..9e5b72e --- /dev/null +++ b/convert-to-fraktur.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_fraktur); +} diff --git a/convert-to-gender-symbols.c b/convert-to-gender-symbols.c new file mode 100644 index 0000000..1ec89ab --- /dev/null +++ b/convert-to-gender-symbols.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_gender_symbols); +} diff --git a/convert-to-go.c b/convert-to-go.c new file mode 100644 index 0000000..4d449e8 --- /dev/null +++ b/convert-to-go.c @@ -0,0 +1,22 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE("[-b | -w]"); + + +int +main(int argc, char *argv[]) +{ + int black = 0; + + ARGBEGIN { + case 'b': black = 1; break; + case 'w': black = 0; break; + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(black ? &libcharconv_go_black : &libcharconv_go_white); +} diff --git a/convert-to-italic.c b/convert-to-italic.c new file mode 100644 index 0000000..88edcad --- /dev/null +++ b/convert-to-italic.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_italic); +} diff --git a/convert-to-latin.c b/convert-to-latin.c new file mode 100644 index 0000000..c992b5f --- /dev/null +++ b/convert-to-latin.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_latin); +} diff --git a/convert-to-monospace.c b/convert-to-monospace.c new file mode 100644 index 0000000..33e114c --- /dev/null +++ b/convert-to-monospace.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_monospace); +} diff --git a/convert-to-region-indicators.c b/convert-to-region-indicators.c new file mode 100644 index 0000000..38758db --- /dev/null +++ b/convert-to-region-indicators.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_region_indicators); +} diff --git a/convert-to-sans-serif-bold-italic.c b/convert-to-sans-serif-bold-italic.c new file mode 100644 index 0000000..9b711f5 --- /dev/null +++ b/convert-to-sans-serif-bold-italic.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_sans_serif_bold_italic); +} diff --git a/convert-to-sans-serif-bold.c b/convert-to-sans-serif-bold.c new file mode 100644 index 0000000..88e9627 --- /dev/null +++ b/convert-to-sans-serif-bold.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_sans_serif_bold); +} diff --git a/convert-to-sans-serif-italic.c b/convert-to-sans-serif-italic.c new file mode 100644 index 0000000..1b1013e --- /dev/null +++ b/convert-to-sans-serif-italic.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_sans_serif_italic); +} diff --git a/convert-to-sans-serif.c b/convert-to-sans-serif.c new file mode 100644 index 0000000..c3d822f --- /dev/null +++ b/convert-to-sans-serif.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_sans_serif); +} diff --git a/convert-to-script.c b/convert-to-script.c new file mode 100644 index 0000000..730efd4 --- /dev/null +++ b/convert-to-script.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_script); +} diff --git a/convert-to-segmented.c b/convert-to-segmented.c new file mode 100644 index 0000000..b3b783c --- /dev/null +++ b/convert-to-segmented.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_segmented); +} diff --git a/convert-to-shogi.c b/convert-to-shogi.c new file mode 100644 index 0000000..1b45b90 --- /dev/null +++ b/convert-to-shogi.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_shogi); +} diff --git a/convert-to-tags.c b/convert-to-tags.c new file mode 100644 index 0000000..000d4b9 --- /dev/null +++ b/convert-to-tags.c @@ -0,0 +1,18 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + +USAGE(""); + + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + default: + usage(); + } ARGEND; + if (argc) + usage(); + + return convert(&libcharconv_tags); +} diff --git a/libcharconv.h b/libcharconv.h new file mode 100644 index 0000000..aa86a16 --- /dev/null +++ b/libcharconv.h @@ -0,0 +1,870 @@ +/* See LICENSE file for copyright and license details. */ +#ifndef LIBCHARCONV_H +#define LIBCHARCONV_H + +#include <stddef.h> +#include <stdint.h> + + +enum libcharconv_result { + LIBCHARCONV_NO_CONVERT, + LIBCHARCONV_CONVERTED, + LIBCHARCONV_INDETERMINATE, + LIBCHARCONV_CONVERT_IF_END +}; + + +/** + * Convert + * 'w' to WHITE SHOGI PIECE, + * 'b' to BLACK SHOGI PIECE, + * 'W' to TURNED WHITE SHOGI PIECE, and + * 'B' to TURNED BLACK SHOGI PIECE + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_shogi(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert '1' through '6' to DIE FACE-1 through DIE FACE-6 + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_dice(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert + * '0' to WHITE CIRCLE, + * '1' to WHITE CIRCLE WITH DOT RIGHT, and + * '2' to WHITE CIRCLE WITH TWO DOTS + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_go_white(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert + * '0' to BLACK CIRCLE, + * '1' to BLACK CIRCLE WITH WHITE DOT RIGHT, and + * '2' to BLACK CIRCLE WITH TWO WHITE DOTS + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_go_black(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert + * 'm' and '1' to WHITE DRAUGHTS MAN, + * 'k' and '2' to WHITE DRAUGHTS KING, + * 'M' to BLACK DRAUGHTS MAN, and + * 'K' to BLACK DRAUGHTS KING + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_draughts(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert 'a' through 'z' and 'A' through 'Z' + * to REGION INDICATOR SYMBOL LETTER A through + * REGION INDICATOR SYMBOL LETTER Z + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_region_indicators(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert ' ' through '~' to + * TAG SPACE through TAG TILDE + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_tags(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Convert + * 'f', 'F', 'w', and 'W' to FEMALE SIGN, + * 'm' and 'M' to MALE SIGN, and + * 'i' and 'I' to MERCURY (intersex sign) + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_gender_symbols(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion in opposite direction + * of the other functions + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to Cypriot + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_cypriot(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL BOLD + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_bold(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL ITALIC + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL BOLD ITALIC + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_bold_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL MONOSPACE + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_monospace(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from DIGITs to SEGMENTED DIGITs + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_segmented(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL SANS-SERIF + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_sans_serif(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL SANS-SERIF BOLD + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_sans_serif_bold(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL SANS-SERIF ITALIC + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_sans_serif_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL SANS-SERIF BOLD ITALIC + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_sans_serif_bold_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL DOUBLE-STRUCK + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_double_struck(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to DOUBLE-STRUCK ITALIC + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_double_struck_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL FRAKTUR + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_fraktur(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL BOLD FRAKTUR + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_bold_fraktur(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL SCRIPT + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_script(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to MATHEMATICAL BOLD SCRIPT + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_bold_script(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +/** + * Preforms convertion from Latin to Buhid + * + * @param s Text to convert + * @param slen The number of bytes available in `s` + * @param n Output parameter for the number of consumed bytes + * @param cp Output buffer for the codepoints + * @param ncp Input parameter for the number of codepoints that + * fit in `cp`, and output parameter for the number + * of output codepoints (if it exceeds the original + * value of `ncp`, a larger buffer is needed) + * @return LIBCHARCONV_NO_CONVERT: + * `*n` is the number of bytes from the beginning + * of `s` that cannot be converted + * LIBCHARCONV_CONVERTED: + * `*n` is the number of bytes from the beginning + * of `s` that was converted to a codepoint which + * is stored in `*cp` + * LIBCHARCONV_INDETERMINATE: + * If all text has been input, no more can be + * converted, otherwise more of the text most + * be made available before the function can + * determine whether the beginning of `s` can be + * converted or what it should be converted to + * LIBCHARCONV_CONVERT_IF_END: + * As LIBCHARCONV_CONVERTED the entire text has + * been input, as LIBCHARCONV_INDETERMINATE + * otherwise + */ +enum libcharconv_result libcharconv_buhid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp); + + +#endif diff --git a/libcharconv_bold.c b/libcharconv_bold.c new file mode 100644 index 0000000..c405368 --- /dev/null +++ b/libcharconv_bold.c @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_bold(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D400) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D41A) + (unsigned)(*s - 'a')); + goto conv; + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1D7CE) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_bold_fraktur.c b/libcharconv_bold_fraktur.c new file mode 100644 index 0000000..614ff3d --- /dev/null +++ b/libcharconv_bold_fraktur.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_bold_fraktur(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D56C) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D586) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_bold_italic.c b/libcharconv_bold_italic.c new file mode 100644 index 0000000..e0bbe6e --- /dev/null +++ b/libcharconv_bold_italic.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_bold_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D468) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D482) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_bold_script.c b/libcharconv_bold_script.c new file mode 100644 index 0000000..ba8b026 --- /dev/null +++ b/libcharconv_bold_script.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_bold_script(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D4D0) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D4EA) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_buhid.c b/libcharconv_buhid.c new file mode 100644 index 0000000..812a1c0 --- /dev/null +++ b/libcharconv_buhid.c @@ -0,0 +1,108 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_buhid(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (s[0]) { + case 'A': case 'a': c = UINT32_C(0x1740); goto conv1; + case 'I': case 'i': c = UINT32_C(0x1741); goto conv1; + case 'U': case 'u': c = UINT32_C(0x1742); goto conv1; + case '^': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'I': case 'i': c = UINT32_C(0x1752); goto conv2; + case 'U': case 'u': c = UINT32_C(0x1753); goto conv2; + default: + goto no_match; + } + goto no_match; + case 'B': case 'b': c = UINT32_C(0x174A); goto conv; + case 'D': case 'd': c = UINT32_C(0x1747); goto conv; + case 'G': case 'g': c = UINT32_C(0x1744); goto conv; + case 'H': case 'h': c = UINT32_C(0x1751); goto conv; + case 'K': case 'k': c = UINT32_C(0x1743); goto conv; + case 'L': case 'l': c = UINT32_C(0x174E); goto conv; + case 'M': case 'm': c = UINT32_C(0x174B); goto conv; + case 'N': case 'n': c = UINT32_C(0x1748); goto conv; + case 'P': case 'p': c = UINT32_C(0x1749); goto conv; + case 'R': case 'r': c = UINT32_C(0x174D); goto conv; + case 'S': case 's': c = UINT32_C(0x1750); goto conv; + case 'T': case 't': c = UINT32_C(0x1746); goto conv; + case 'Y': case 'y': c = UINT32_C(0x174C); goto conv; + case 'W': case 'w': c = UINT32_C(0x174F); goto conv; + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; +conv_again: + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': + if (*ncp >= 1u) + cp[0] = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + case 'I': case 'i': + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = UINT32_C(0x1752); + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + case 'U': case 'u': + if (*ncp >= 1u) + cp[0] = c; + if (*ncp >= 2u) + cp[1] = UINT32_C(0x1753); + *n += 2u; + *ncp = 2u; + return LIBCHARCONV_CONVERTED; + case 'G': case 'g': + if (c == UINT32_C(0x1745)) + goto no_match; + if (s[0] != 'N' && s[0] != 'n') + goto no_match; + c = UINT32_C(0x1745); + *n += 1u; + slen--; + s++; + goto conv_again; + default: + goto no_match; + } + +conv1: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_cypriot.c b/libcharconv_cypriot.c new file mode 100644 index 0000000..d7fa5af --- /dev/null +++ b/libcharconv_cypriot.c @@ -0,0 +1,177 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_cypriot(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (s[0]) { + case 'A': case 'a': c = UINT32_C(0x10800); goto conv1; + case 'E': case 'e': c = UINT32_C(0x10801); goto conv1; + case 'I': case 'i': c = UINT32_C(0x10802); goto conv1; + case 'O': case 'o': c = UINT32_C(0x10803); goto conv1; + case 'U': case 'u': c = UINT32_C(0x10804); goto conv1; + case 'J': case 'j': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10805); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10808); goto conv2; + default: + goto no_match; + } + case 'K': case 'k': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x1080A); goto conv2; + case 'E': case 'e': c = UINT32_C(0x1080B); goto conv2; + case 'I': case 'i': c = UINT32_C(0x1080C); goto conv2; + case 'O': case 'o': c = UINT32_C(0x1080D); goto conv2; + case 'U': case 'u': c = UINT32_C(0x1080E); goto conv2; + default: + goto no_match; + } + case 'L': case 'l': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x1080F); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10810); goto conv2; + case 'I': case 'i': c = UINT32_C(0x10811); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10812); goto conv2; + case 'U': case 'u': c = UINT32_C(0x10813); goto conv2; + default: + goto no_match; + } + case 'M': case 'm': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10814); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10815); goto conv2; + case 'I': case 'i': c = UINT32_C(0x10816); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10817); goto conv2; + case 'U': case 'u': c = UINT32_C(0x10818); goto conv2; + default: + goto no_match; + } + case 'N': case 'n': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10819); goto conv2; + case 'E': case 'e': c = UINT32_C(0x1081A); goto conv2; + case 'I': case 'i': c = UINT32_C(0x1081B); goto conv2; + case 'O': case 'o': c = UINT32_C(0x1081C); goto conv2; + case 'U': case 'u': c = UINT32_C(0x1081D); goto conv2; + default: + goto no_match; + } + case 'P': case 'p': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x1081E); goto conv2; + case 'E': case 'e': c = UINT32_C(0x1081F); goto conv2; + case 'I': case 'i': c = UINT32_C(0x10820); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10821); goto conv2; + case 'U': case 'u': c = UINT32_C(0x10822); goto conv2; + default: + goto no_match; + } + case 'R': case 'r': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10823); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10824); goto conv2; + case 'I': case 'i': c = UINT32_C(0x10825); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10826); goto conv2; + case 'U': case 'u': c = UINT32_C(0x10827); goto conv2; + default: + goto no_match; + } + case 'S': case 's': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10828); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10829); goto conv2; + case 'I': case 'i': c = UINT32_C(0x1082A); goto conv2; + case 'O': case 'o': c = UINT32_C(0x1082B); goto conv2; + case 'U': case 'u': c = UINT32_C(0x1082C); goto conv2; + default: + goto no_match; + } + case 'T': case 't': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x1082D); goto conv2; + case 'E': case 'e': c = UINT32_C(0x1082E); goto conv2; + case 'I': case 'i': c = UINT32_C(0x1082F); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10831); goto conv2; + case 'U': case 'u': c = UINT32_C(0x10830); goto conv2; + default: + goto no_match; + } + case 'W': case 'w': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10832); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10833); goto conv2; + case 'I': case 'i': c = UINT32_C(0x10834); goto conv2; + case 'O': case 'o': c = UINT32_C(0x10835); goto conv2; + default: + goto no_match; + } + case 'X': case 'x': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x10837); goto conv2; + case 'E': case 'e': c = UINT32_C(0x10838); goto conv2; + default: + goto no_match; + } + case 'Z': case 'z': + if (!slen) + return LIBCHARCONV_INDETERMINATE; + switch (s[1]) { + case 'A': case 'a': c = UINT32_C(0x1083C); goto conv2; + case 'O': case 'o': c = UINT32_C(0x1083F); goto conv2; + default: + goto no_match; + } + default: + no_match: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv1: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; + +conv2: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 2u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_dice.c b/libcharconv_dice.c new file mode 100644 index 0000000..941a837 --- /dev/null +++ b/libcharconv_dice.c @@ -0,0 +1,34 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_dice(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case '1': c = UINT32_C(0x2680); goto conv; + case '2': c = UINT32_C(0x2681); goto conv; + case '3': c = UINT32_C(0x2682); goto conv; + case '4': c = UINT32_C(0x2683); goto conv; + case '5': c = UINT32_C(0x2684); goto conv; + case '6': c = UINT32_C(0x2685); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_double_struck.c b/libcharconv_double_struck.c new file mode 100644 index 0000000..de29727 --- /dev/null +++ b/libcharconv_double_struck.c @@ -0,0 +1,56 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_double_struck(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if (*s == 'C') { + c = (uint_least32_t)UINT32_C(0x2102); + goto conv; + } else if (*s == 'H') { + c = (uint_least32_t)UINT32_C(0x210D); + goto conv; + } else if (*s == 'N') { + c = (uint_least32_t)UINT32_C(0x2115); + goto conv; + } else if (*s == 'P') { + c = (uint_least32_t)UINT32_C(0x2119); + goto conv; + } else if (*s == 'Q') { + c = (uint_least32_t)UINT32_C(0x211A); + goto conv; + } else if (*s == 'R') { + c = (uint_least32_t)UINT32_C(0x211D); + goto conv; + } else if (*s == 'Z') { + c = (uint_least32_t)UINT32_C(0x2124); + goto conv; + } else if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D538) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D552) + (unsigned)(*s - 'a')); + goto conv; + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1D7D8) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_double_struck_italic.c b/libcharconv_double_struck_italic.c new file mode 100644 index 0000000..5165362 --- /dev/null +++ b/libcharconv_double_struck_italic.c @@ -0,0 +1,33 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_double_struck_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case 'D': c = UINT32_C(0x2145); goto conv; + case 'd': c = UINT32_C(0x2146); goto conv; + case 'e': c = UINT32_C(0x2147); goto conv; + case 'i': c = UINT32_C(0x2148); goto conv; + case 'j': c = UINT32_C(0x2149); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_draughts.c b/libcharconv_draughts.c new file mode 100644 index 0000000..0f717e7 --- /dev/null +++ b/libcharconv_draughts.c @@ -0,0 +1,34 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_draughts(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case '1': + case 'm': c = UINT32_C(0x26C0); goto conv; + case '2': + case 'k': c = UINT32_C(0x26C1); goto conv; + case 'M': c = UINT32_C(0x26C2); goto conv; + case 'K': c = UINT32_C(0x26C3); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_fraktur.c b/libcharconv_fraktur.c new file mode 100644 index 0000000..4e9932f --- /dev/null +++ b/libcharconv_fraktur.c @@ -0,0 +1,47 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_fraktur(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if (*s == 'C') { + c = (uint_least32_t)UINT32_C(0x212D); + goto conv; + } else if (*s == 'H') { + c = (uint_least32_t)UINT32_C(0x210C); + goto conv; + } else if (*s == 'I') { + c = (uint_least32_t)UINT32_C(0x2111); + goto conv; + } else if (*s == 'R') { + c = (uint_least32_t)UINT32_C(0x211C); + goto conv; + } else if (*s == 'Z') { + c = (uint_least32_t)UINT32_C(0x2128); + goto conv; + } else if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D504) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D51E) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_gender_symbols.c b/libcharconv_gender_symbols.c new file mode 100644 index 0000000..47dbd89 --- /dev/null +++ b/libcharconv_gender_symbols.c @@ -0,0 +1,36 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_gender_symbols(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case 'F': + case 'f': + case 'W': + case 'w': c = UINT32_C(0x2640); goto conv; + case 'M': + case 'm': c = UINT32_C(0x2642); goto conv; + case 'I': + case 'i': c = UINT32_C(0x263F); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_go_black.c b/libcharconv_go_black.c new file mode 100644 index 0000000..e7bf8d5 --- /dev/null +++ b/libcharconv_go_black.c @@ -0,0 +1,31 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_go_black(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case '0': c = UINT32_C(0x25CF); goto conv; + case '1': c = UINT32_C(0x2688); goto conv; + case '2': c = UINT32_C(0x2689); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_go_white.c b/libcharconv_go_white.c new file mode 100644 index 0000000..233d2bc --- /dev/null +++ b/libcharconv_go_white.c @@ -0,0 +1,31 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_go_white(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case '0': c = UINT32_C(0x25CB); goto conv; + case '1': c = UINT32_C(0x2686); goto conv; + case '2': c = UINT32_C(0x2687); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_italic.c b/libcharconv_italic.c new file mode 100644 index 0000000..e9fde22 --- /dev/null +++ b/libcharconv_italic.c @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D434) + (unsigned)(*s - 'A')); + goto conv; + } else if (*s == 'h') { + c = (uint_least32_t)UINT32_C(0x210E); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D44E) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_latin.c b/libcharconv_latin.c new file mode 100644 index 0000000..d682b17 --- /dev/null +++ b/libcharconv_latin.c @@ -0,0 +1,410 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +static size_t +decode_utf8(const char *s, size_t slen, uint_least32_t *cp) +{ + uint_least32_t min, max; + size_t i, n; + + if (slen < 1u) + return 0u; + + if (!(s[0] & 0x80)) { + *cp = (uint_least32_t)s[0]; + return 1u; + } else if ((s[0] & 0xE0) == 0xC0) { + *cp = (uint_least32_t)s[0] & 0x3Fu; + n = 2u; + min = UINT32_C(0x80); + max = UINT32_C(0x800); + } else if ((s[0] & 0xF0) == 0xE0) { + *cp = (uint_least32_t)s[0] & 0x1Fu; + n = 3u; + min = UINT32_C(0x800); + max = UINT32_C(0x10000); + } else if ((s[0] & 0xF8) == 0xF0) { + *cp = (uint_least32_t)s[0] & 0x0Fu; + n = 4u; + min = UINT32_C(0x10000); + max = UINT32_C(0x110000); + } else { + return 0u; + } + + if (slen < n) + return n; + + for (i = 1u; i < n; i++) { + *cp <<= 6; + *cp |= (uint_least32_t)s[i] & 0x3Fu; + } + + if (min > *cp || *cp >= max) + return 0u; + + return n; +} + + +enum libcharconv_result +libcharconv_latin(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + enum libcharconv_result ret = LIBCHARCONV_CONVERTED; + uint_least32_t c; + char c1, c2, c3; + size_t clen; + + *n = 0; + for (; slen; s++) { + clen = decode_utf8(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) { + *n += 1u; + slen -= 1u; + continue; + } + slen -= clen; + + if (UINT32_C(0x2680) <= c && c <= UINT32_C(0x2685)) { + /* dice */ + c -= (uint_least32_t)UINT32_C(0x2680) - (uint_least32_t)'1'; + goto conv; + + } else if (UINT32_C(0x1F1E6) <= c && c <= UINT32_C(0x1F1FF)) { + /* region indicators */ + c -= (uint_least32_t)UINT32_C(0x1F1E6) - (uint_least32_t)'A'; + goto conv; + + } else if (UINT32_C(0xE0020) <= c && c <= UINT32_C(0xE007E)) { + /* tags */ + c -= (uint_least32_t)UINT32_C(0xE0000); + goto conv; + + } else if (UINT32_C(0x10800) <= c && c <= UINT32_C(0x1083F)) { + /* cypriot */ + c -= UINT32_C(0x10800); + c1 = "_jklmnprstwxz"[c / 5]; + c2 = "aeiou"[c % 5]; + if (c1 == '_') { + c = (uint_least32_t)c2; + goto conv; + } + if (c1 == 'j' && c2 != 'a' && c2 != 'o') + goto no_match; + if (c1 == 'w' && c2 == 'u') + goto no_match; + if (c1 == 'x' && c2 != 'a' && c2 != 'e') + goto no_match; + if (c1 == 'z' && c2 != 'a' && c2 != 'o') + goto no_match; + goto conv2; + + } else if (UINT32_C(0x1D400) <= c && c <= UINT32_C(0x1D419)) { + /* bold (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D400) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D41A) <= c && c <= UINT32_C(0x1D433)) { + /* bold (small) */ + c -= (uint_least32_t)UINT32_C(0x1D41A) - (uint_least32_t)'a'; + goto conv; + } else if (UINT32_C(0x1D7CE) <= c && c <= UINT32_C(0x1D7D7)) { + /* bold (digit) */ + c -= (uint_least32_t)UINT32_C(0x1D7CE) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1D434) <= c && c <= UINT32_C(0x1D44D)) { + /* italic (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D434) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D44E) <= c && c <= UINT32_C(0x1D467)) { + /* italic (small) */ + c -= (uint_least32_t)UINT32_C(0x1D44E) - (uint_least32_t)'a'; + goto conv; + } else if (c == UINT32_C(0x210E)) { + /* italic (small h) */ + c = (uint_least32_t)'h'; + goto conv; + + } else if (UINT32_C(0x1D468) <= c && c <= UINT32_C(0x1D481)) { + /* bold italic (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D468) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D482) <= c && c <= UINT32_C(0x1D49B)) { + /* bold italic (small) */ + c -= (uint_least32_t)UINT32_C(0x1D482) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D670) <= c && c <= UINT32_C(0x1D689)) { + /* monospace (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D670) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D68A) <= c && c <= UINT32_C(0x1D6A3)) { + /* monospace (small) */ + c -= (uint_least32_t)UINT32_C(0x1D68A) - (uint_least32_t)'a'; + goto conv; + } else if (UINT32_C(0x1D7F6) <= c && c <= UINT32_C(0x1D7FF)) { + /* monospace (digit) */ + c -= (uint_least32_t)UINT32_C(0x1D7F6) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1FBF0) <= c && c <= UINT32_C(0x1FBF9)) { + /* segmented */ + c -= (uint_least32_t)UINT32_C(0x1FBF0) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1D5A0) <= c && c <= UINT32_C(0x1D5B9)) { + /* sans-serif (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D5A0) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D5BA) <= c && c <= UINT32_C(0x1D5D3)) { + /* sans-serif (small) */ + c -= (uint_least32_t)UINT32_C(0x1D5BA) - (uint_least32_t)'a'; + goto conv; + } else if (UINT32_C(0x1D7E2) <= c && c <= UINT32_C(0x1D7EB)) { + /* sans-serif (digit) */ + c -= (uint_least32_t)UINT32_C(0x1D7E2) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1D5D4) <= c && c <= UINT32_C(0x1D5ED)) { + /* sans-serif bold (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D5D4) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D5EE) <= c && c <= UINT32_C(0x1D607)) { + /* sans-serif bold (small) */ + c -= (uint_least32_t)UINT32_C(0x1D5EE) - (uint_least32_t)'a'; + goto conv; + } else if (UINT32_C(0x1D7EC) <= c && c <= UINT32_C(0x1D7F5)) { + /* sans-serif bold (digit) */ + c -= (uint_least32_t)UINT32_C(0x1D7EC) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1D608) <= c && c <= UINT32_C(0x1D621)) { + /* sans-serif italic (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D608) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D622) <= c && c <= UINT32_C(0x1D63B)) { + /* sans-serif italic (small) */ + c -= (uint_least32_t)UINT32_C(0x1D622) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D63C) <= c && c <= UINT32_C(0x1D655)) { + /* sans-serif bold italic (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D63C) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D656) <= c && c <= UINT32_C(0x1D66F)) { + /* sans-serif bold italic (small) */ + c -= (uint_least32_t)UINT32_C(0x1D656) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D538) <= c && c <= UINT32_C(0x1D551)) { + /* double-struck (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D538) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D552) <= c && c <= UINT32_C(0x1D56B)) { + /* double-struck (small) */ + c -= (uint_least32_t)UINT32_C(0x1D552) - (uint_least32_t)'a'; + goto conv; + } else if (UINT32_C(0x1D7D8) <= c && c <= UINT32_C(0x1D7E1)) { + /* double-struck (digit) */ + c -= (uint_least32_t)UINT32_C(0x1D7D8) - (uint_least32_t)'0'; + goto conv; + + } else if (UINT32_C(0x1D504) <= c && c <= UINT32_C(0x1D51D)) { + /* fraktur (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D504) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D51E) <= c && c <= UINT32_C(0x1D537)) { + /* fraktur (small) */ + c -= (uint_least32_t)UINT32_C(0x1D51E) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D56C) <= c && c <= UINT32_C(0x1D585)) { + /* bold fraktur (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D56C) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D586) <= c && c <= UINT32_C(0x1D59F)) { + /* bold fraktur (small) */ + c -= (uint_least32_t)UINT32_C(0x1D586) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D49C) <= c && c <= UINT32_C(0x1D4B5)) { + /* script (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D49C) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D4B6) <= c && c <= UINT32_C(0x1D4CF)) { + /* script (small) */ + c -= (uint_least32_t)UINT32_C(0x1D4B6) - (uint_least32_t)'a'; + goto conv; + + } else if (UINT32_C(0x1D4D0) <= c && c <= UINT32_C(0x1D4E9)) { + /* bold script (captial) */ + c -= (uint_least32_t)UINT32_C(0x1D4D0) - (uint_least32_t)'A'; + goto conv; + } else if (UINT32_C(0x1D4EA) <= c && c <= UINT32_C(0x1D503)) { + /* bold script (small) */ + c -= (uint_least32_t)UINT32_C(0x1D4EA) - (uint_least32_t)'a'; + goto conv; + + } else { + switch (c) { + /* shogi */ + case UINT32_C(0x2616): c = (uint_least32_t)'w'; goto conv; + case UINT32_C(0x2617): c = (uint_least32_t)'b'; goto conv; + case UINT32_C(0x26C9): c = (uint_least32_t)'W'; goto conv; + case UINT32_C(0x26CA): c = (uint_least32_t)'B'; goto conv; + + /* go (white) */ + case UINT32_C(0x25CB): c = (uint_least32_t)'0'; goto conv; + case UINT32_C(0x2686): c = (uint_least32_t)'1'; goto conv; + case UINT32_C(0x2687): c = (uint_least32_t)'2'; goto conv; + + /* go (black) */ + case UINT32_C(0x25CF): c = (uint_least32_t)'0'; goto conv; + case UINT32_C(0x2688): c = (uint_least32_t)'1'; goto conv; + case UINT32_C(0x2689): c = (uint_least32_t)'2'; goto conv; + + /* draughts */ + case UINT32_C(0x26C0): c = (uint_least32_t)'m'; goto conv; + case UINT32_C(0x26C1): c = (uint_least32_t)'k'; goto conv; + case UINT32_C(0x26C2): c = (uint_least32_t)'M'; goto conv; + case UINT32_C(0x26C3): c = (uint_least32_t)'K'; goto conv; + + /* gender symbols */ + case UINT32_C(0x2640): c = (uint_least32_t)'f'; goto conv; + case UINT32_C(0x2642): c = (uint_least32_t)'m'; goto conv; + case UINT32_C(0x263F): c = (uint_least32_t)'i'; goto conv; + + /* double-struck */ + case UINT32_C(0x2102): c = (uint_least32_t)'C'; goto conv; + case UINT32_C(0x210D): c = (uint_least32_t)'H'; goto conv; + case UINT32_C(0x2115): c = (uint_least32_t)'N'; goto conv; + case UINT32_C(0x2119): c = (uint_least32_t)'P'; goto conv; + case UINT32_C(0x211A): c = (uint_least32_t)'Q'; goto conv; + case UINT32_C(0x211D): c = (uint_least32_t)'R'; goto conv; + case UINT32_C(0x2124): c = (uint_least32_t)'Z'; goto conv; + + /* double-struck italic */ + case UINT32_C(0x2145): c = (uint_least32_t)'D'; goto conv; + case UINT32_C(0x2146): c = (uint_least32_t)'d'; goto conv; + case UINT32_C(0x2147): c = (uint_least32_t)'e'; goto conv; + case UINT32_C(0x2148): c = (uint_least32_t)'i'; goto conv; + case UINT32_C(0x2149): c = (uint_least32_t)'j'; goto conv; + + /* fraktur */ + case UINT32_C(0x212D): c = (uint_least32_t)'C'; goto conv; + case UINT32_C(0x210C): c = (uint_least32_t)'H'; goto conv; + case UINT32_C(0x2111): c = (uint_least32_t)'I'; goto conv; + case UINT32_C(0x211C): c = (uint_least32_t)'R'; goto conv; + case UINT32_C(0x2128): c = (uint_least32_t)'Z'; goto conv; + + /* script */ + case UINT32_C(0x212C): c = (uint_least32_t)'B'; goto conv; + case UINT32_C(0x2130): c = (uint_least32_t)'E'; goto conv; + case UINT32_C(0x2131): c = (uint_least32_t)'F'; goto conv; + case UINT32_C(0x210B): c = (uint_least32_t)'H'; goto conv; + case UINT32_C(0x2110): c = (uint_least32_t)'I'; goto conv; + case UINT32_C(0x2112): c = (uint_least32_t)'L'; goto conv; + case UINT32_C(0x2133): c = (uint_least32_t)'M'; goto conv; + case UINT32_C(0x211B): c = (uint_least32_t)'R'; goto conv; + case UINT32_C(0x212F): c = (uint_least32_t)'e'; goto conv; + case UINT32_C(0x210A): c = (uint_least32_t)'g'; goto conv; + case UINT32_C(0x2134): c = (uint_least32_t)'o'; goto conv; + + /* buhid */ + case UINT32_C(0x1740): c = (uint_least32_t)'a'; goto conv; + case UINT32_C(0x1741): c = (uint_least32_t)'i'; goto conv; + case UINT32_C(0x1742): c = (uint_least32_t)'u'; goto conv; + case UINT32_C(0x1752): c2 = 'i'; goto budih_combining; + case UINT32_C(0x1753): c2 = 'u'; goto budih_combining; + budih_combining: + c1 = '^'; + goto conv2; + case UINT32_C(0x174A): c1 = 'b'; goto budih; + case UINT32_C(0x1747): c1 = 'd'; goto budih; + case UINT32_C(0x1744): c1 = 'g'; goto budih; + case UINT32_C(0x1751): c1 = 'h'; goto budih; + case UINT32_C(0x1743): c1 = 'k'; goto budih; + case UINT32_C(0x174E): c1 = 'l'; goto budih; + case UINT32_C(0x174B): c1 = 'm'; goto budih; + case UINT32_C(0x1748): c1 = 'n'; goto budih; + case UINT32_C(0x1749): c1 = 'p'; goto budih; + case UINT32_C(0x174D): c1 = 'r'; goto budih; + case UINT32_C(0x1750): c1 = 's'; goto budih; + case UINT32_C(0x1746): c1 = 't'; goto budih; + case UINT32_C(0x174C): c1 = 'y'; goto budih; + case UINT32_C(0x174F): c1 = 'w'; goto budih; + case UINT32_C(0x1745): c1 = '-'; goto budih; + budih: + if (*n) + goto no_conv; + c2 = 'a'; + s = &s[clen]; + *n += clen; + if (!slen) { + ret = LIBCHARCONV_CONVERT_IF_END; + goto budih_conv; + } + clen = decode_utf8(s, slen, &c); + if (clen > slen) + return LIBCHARCONV_INDETERMINATE; + if (!clen) + goto budih_conv; + switch (c) { + case UINT32_C(0x1752): c2 = 'i'; *n += clen; break; + case UINT32_C(0x1753): c2 = 'u'; *n += clen; break; + default: + break; + } + budih_conv: + if (c1 == '-') { + c3 = c2; + c2 = 'g'; + c1 = 'n'; + goto conv3_prechecked; + } + goto conv2_prechecked; + + default: + no_match: + *n += clen; + break; + } + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + *n += clen; + if (*ncp) + *cp = c; + *ncp = 1u; + return ret; + +conv2: + if (*n) + goto no_conv; + *n += clen; +conv2_prechecked: + if (*ncp >= 1u) + cp[0] = (uint_least32_t)c1; + if (*ncp >= 2u) + cp[1] = (uint_least32_t)c2; + *ncp = 2u; + return ret; + +conv3_prechecked: + if (*ncp >= 1u) + cp[0] = (uint_least32_t)c1; + if (*ncp >= 2u) + cp[1] = (uint_least32_t)c2; + if (*ncp >= 3u) + cp[2] = (uint_least32_t)c3; + *ncp = 3u; + return ret; +} diff --git a/libcharconv_monospace.c b/libcharconv_monospace.c new file mode 100644 index 0000000..1bd1405 --- /dev/null +++ b/libcharconv_monospace.c @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_monospace(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D670) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D68A) + (unsigned)(*s - 'a')); + goto conv; + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1D7F6) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_region_indicators.c b/libcharconv_region_indicators.c new file mode 100644 index 0000000..00a6ec8 --- /dev/null +++ b/libcharconv_region_indicators.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_region_indicators(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1F1E6) + (unsigned)(*s - 'a')); + goto conv; + } else if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1F1E6) + (unsigned)(*s - 'A')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_sans_serif.c b/libcharconv_sans_serif.c new file mode 100644 index 0000000..b24e0a7 --- /dev/null +++ b/libcharconv_sans_serif.c @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_sans_serif(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D5A0) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D5BA) + (unsigned)(*s - 'a')); + goto conv; + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1D7E2) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_sans_serif_bold.c b/libcharconv_sans_serif_bold.c new file mode 100644 index 0000000..527da1e --- /dev/null +++ b/libcharconv_sans_serif_bold.c @@ -0,0 +1,35 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_sans_serif_bold(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D5D4) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D5EE) + (unsigned)(*s - 'a')); + goto conv; + } else if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1D7EC) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_sans_serif_bold_italic.c b/libcharconv_sans_serif_bold_italic.c new file mode 100644 index 0000000..6018e77 --- /dev/null +++ b/libcharconv_sans_serif_bold_italic.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_sans_serif_bold_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D63C) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D656) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_sans_serif_italic.c b/libcharconv_sans_serif_italic.c new file mode 100644 index 0000000..b0661ab --- /dev/null +++ b/libcharconv_sans_serif_italic.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_sans_serif_italic(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D608) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D622) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_script.c b/libcharconv_script.c new file mode 100644 index 0000000..5a5736b --- /dev/null +++ b/libcharconv_script.c @@ -0,0 +1,65 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_script(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if (*s == 'B') { + c = (uint_least32_t)UINT32_C(0x212C); + goto conv; + } else if (*s == 'E') { + c = (uint_least32_t)UINT32_C(0x2130); + goto conv; + } else if (*s == 'F') { + c = (uint_least32_t)UINT32_C(0x2131); + goto conv; + } else if (*s == 'H') { + c = (uint_least32_t)UINT32_C(0x210B); + goto conv; + } else if (*s == 'I') { + c = (uint_least32_t)UINT32_C(0x2110); + goto conv; + } else if (*s == 'L') { + c = (uint_least32_t)UINT32_C(0x2112); + goto conv; + } else if (*s == 'M') { + c = (uint_least32_t)UINT32_C(0x2133); + goto conv; + } else if (*s == 'R') { + c = (uint_least32_t)UINT32_C(0x211B); + goto conv; + } else if (*s == 'e') { + c = (uint_least32_t)UINT32_C(0x212F); + goto conv; + } else if (*s == 'g') { + c = (uint_least32_t)UINT32_C(0x210A); + goto conv; + } else if (*s == 'o') { + c = (uint_least32_t)UINT32_C(0x2134); + goto conv; + } else if ('A' <= *s && *s <= 'Z') { + c = (uint_least32_t)(UINT32_C(0x1D49C) + (unsigned)(*s - 'A')); + goto conv; + } else if ('a' <= *s && *s <= 'z') { + c = (uint_least32_t)(UINT32_C(0x1D4B6) + (unsigned)(*s - 'a')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_segmented.c b/libcharconv_segmented.c new file mode 100644 index 0000000..22dd7df --- /dev/null +++ b/libcharconv_segmented.c @@ -0,0 +1,29 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_segmented(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('0' <= *s && *s <= '9') { + c = (uint_least32_t)(UINT32_C(0x1FBF0) + (unsigned)(*s - '0')); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_shogi.c b/libcharconv_shogi.c new file mode 100644 index 0000000..3c073ec --- /dev/null +++ b/libcharconv_shogi.c @@ -0,0 +1,32 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_shogi(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + switch (*s) { + case 'w': c = UINT32_C(0x2616); goto conv; + case 'b': c = UINT32_C(0x2617); goto conv; + case 'W': c = UINT32_C(0x26C9); goto conv; + case 'B': c = UINT32_C(0x26CA); goto conv; + default: + *n += 1u; + break; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/libcharconv_tags.c b/libcharconv_tags.c new file mode 100644 index 0000000..f673406 --- /dev/null +++ b/libcharconv_tags.c @@ -0,0 +1,29 @@ +/* See LICENSE file for copyright and license details. */ +#include "libcharconv.h" + + +enum libcharconv_result +libcharconv_tags(const char *s, size_t slen, size_t *n, uint_least32_t *cp, size_t *ncp) +{ + uint_least32_t c; + *n = 0; + for (; slen--; s++) { + if ('\x20' <= *s && *s <= '\x7E') { + c = (uint_least32_t)(UINT32_C(0xE0000) + (unsigned)*s); + goto conv; + } else { + *n += 1u; + } + } +no_conv: + return LIBCHARCONV_NO_CONVERT; + +conv: + if (*n) + goto no_conv; + if (*ncp) + *cp = c; + *n += 1u; + *ncp = 1u; + return LIBCHARCONV_CONVERTED; +} diff --git a/mk/linux.mk b/mk/linux.mk new file mode 100644 index 0000000..ad58f69 --- /dev/null +++ b/mk/linux.mk @@ -0,0 +1,6 @@ +LIBEXT = so +LIBFLAGS = -shared -Wl,-soname,lib$(LIB_NAME).$(LIBEXT).$(LIB_MAJOR) +LIBMAJOREXT = $(LIBEXT).$(LIB_MAJOR) +LIBMINOREXT = $(LIBEXT).$(LIB_VERSION) + +FIX_INSTALL_NAME = : diff --git a/mk/macos.mk b/mk/macos.mk new file mode 100644 index 0000000..b5e2fef --- /dev/null +++ b/mk/macos.mk @@ -0,0 +1,6 @@ +LIBEXT = dylib +LIBFLAGS = -dynamiclib -Wl,-compatibility_version,$(LIB_MAJOR) -Wl,-current_version,$(LIB_VERSION) +LIBMAJOREXT = $(LIB_MAJOR).$(LIBEXT) +LIBMINOREXT = $(LIB_VERSION).$(LIBEXT) + +FIX_INSTALL_NAME = install_name_tool -id "$(PREFIX)/lib/libcharconv.$(LIBMAJOREXT)" diff --git a/mk/windows.mk b/mk/windows.mk new file mode 100644 index 0000000..ed5ec8d --- /dev/null +++ b/mk/windows.mk @@ -0,0 +1,6 @@ +LIBEXT = dll +LIBFLAGS = -shared +LIBMAJOREXT = $(LIB_MAJOR).$(LIBEXT) +LIBMINOREXT = $(LIB_VERSION).$(LIBEXT) + +FIX_INSTALL_NAME = : |
