diff options
-rw-r--r-- | Makefile | 12 | ||||
-rw-r--r-- | common.h | 68 | ||||
-rw-r--r-- | libterminput_check_utf8_char__.c | 36 | ||||
-rw-r--r-- | libterminput_clear_flags.c | 11 | ||||
-rw-r--r-- | libterminput_encode_utf8__.c | 44 | ||||
-rw-r--r-- | libterminput_is_ready.c | 5 | ||||
-rw-r--r-- | libterminput_read.c (renamed from libterminput.c) | 287 | ||||
-rw-r--r-- | libterminput_read_bracketed_paste__.c | 108 | ||||
-rw-r--r-- | libterminput_set_flags.c | 10 | ||||
-rw-r--r-- | libterminput_utf8_decode__.c | 79 |
10 files changed, 380 insertions, 280 deletions
@@ -16,10 +16,18 @@ LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR) OBJ =\ - libterminput.o + libterminput_read.o\ + libterminput_is_ready.o\ + libterminput_set_flags.o\ + libterminput_clear_flags.o\ + libterminput_encode_utf8__.o\ + libterminput_check_utf8_char__.o\ + libterminput_utf8_decode__.o\ + libterminput_read_bracketed_paste__.o\ HDR =\ - libterminput.h + libterminput.h\ + common.h TESTS =\ interactive-test\ diff --git a/common.h b/common.h new file mode 100644 index 0000000..05b5269 --- /dev/null +++ b/common.h @@ -0,0 +1,68 @@ +/* See LICENSE file for copyright and license details. */ +#include "libterminput.h" + +#include <alloca.h> +#include <ctype.h> +#include <limits.h> +#include <string.h> +#include <unistd.h> + + +#if defined(__GNUC__) +# define HIDDEN __attribute__((__visibility__("hidden"))) +#else +# define HIDDEN +#endif + + +struct input { + enum libterminput_mod mods; + char symbol[7]; +}; + + +/** + * Encode a Unicode codepoint in UTF-8 + * + * @param codepoint The codepoint to encode + * @param buffer Output buffer for the NUL-byte terminated UTF-8 encoding of `codepoint` + */ +HIDDEN void libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7]); + +/** + * Validate an UTF-8 byte sequence, up to one codepoint encoding + * + * @param s The buffer to read from + * @param size The number of bytes available in `s` + * @param len_out Output parameter for the encoding length of the + * codepoint encoded at the beginning of `s` + * @return 1 if `s` begins with a valid codepoint, + * 0 if `size` is too small to determine the validity, + * -1 if the byte sequence is illegal + */ +HIDDEN int libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out); + +/** + * Decode a Unicode codepoint encoded in UTF-8 + * + * @param s The buffer to read from + * @param ip Pointer to the current position in `s`, will be updated + * @return The first encode codepoint, 0 if invalid (or if 0) + */ +HIDDEN unsigned long long int libterminput_utf8_decode__(const char *s, size_t *ip); + +/** + * Get input, from the terminal that, that appear after + * the start marker for a bracketed paste + * + * @param fd The file descriptor to the terminal + * @param input Output parameter for input + * @param ctx State for the terminal, parts of the state may be stored in `input` + * @return 1 normally, 0 on end of input, -1 on error + * + * @throws Any reason specified for read(3) + */ +HIDDEN int libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx); + + +#undef HIDDEN diff --git a/libterminput_check_utf8_char__.c b/libterminput_check_utf8_char__.c new file mode 100644 index 0000000..d2884cf --- /dev/null +++ b/libterminput_check_utf8_char__.c @@ -0,0 +1,36 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out) +{ + size_t i; + *len_out = 0; + if (!size) { + return 0; + } else if ((*s & 0x80) == 0) { + *len_out = 1U; + return 1; + } else if ((*s & 0xE0) == 0xC0) { + *len_out = 2U; + } else if ((*s & 0xF0) == 0xE0) { + *len_out = 3U; + } else if ((*s & 0xF8) == 0xF0) { + *len_out = 4U; + } else if ((*s & 0xFC) == 0xF8) { + *len_out = 5U; + } else if ((*s & 0xFE) == 0xFC) { + *len_out = 6U; + } else { + *len_out = 0U; + return -1; + } + for (i = 1; i < *len_out; i++) { + if (i == size) + return 0; + if ((s[i] & 0xC0) != 0x80) + return -1; + } + return 1; +} diff --git a/libterminput_clear_flags.c b/libterminput_clear_flags.c new file mode 100644 index 0000000..9eba361 --- /dev/null +++ b/libterminput_clear_flags.c @@ -0,0 +1,11 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags) +{ + ctx->flags |= flags; + ctx->flags ^= flags; + return 0; +} diff --git a/libterminput_encode_utf8__.c b/libterminput_encode_utf8__.c new file mode 100644 index 0000000..7e83a04 --- /dev/null +++ b/libterminput_encode_utf8__.c @@ -0,0 +1,44 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7]) +{ + static const char masks[6] = { + (char)0x00, /* 1 byte = 0 high set bits, */ + (char)0xC0, /* 2 bytes = 2 high set bits, */ + (char)0xE0, /* 3 bytes = 3 high set bits, ... */ + (char)0xF0, + (char)0xF8, + (char)0xFC /* 6 bytes = 3 high set bits */ + }; + static const unsigned long long int limits[6] = { + 1ULL << (7 + 0 * 6), /* 1 byte has room for 7 codepoint encoding bits, */ + 1ULL << (5 + 1 * 6), /* 2 bytes has room for 5 bits in the first by and 6 bits the rest, */ + 1ULL << (4 + 2 * 6), /* 3 bytes has room for 4 bits in the first by and 6 bits the rest, ... */ + 1ULL << (3 + 3 * 6), + 1ULL << (2 + 4 * 6), + 1ULL << (1 + 5 * 6) /* 6 bytes has room for 1 bits in the first by and 6 bits the rest */ + }; + + size_t len; + + /* Get encoding length for codepoint */ + for (len = 0; codepoint >= limits[len]; len++); + + /* Set the `len` (but 0 if 1) high bits in the first byte + * to encode the encoding length of the codepoint */ + buffer[0] = masks[len]; + + /* NUL terminate the encoding buffer, + * to mark the encode of the encoding */ + buffer[++len] = '\0'; + + /* Encode the bites representing the code point + * and the length continuation marker bits in + * the non-first bytes */ + for (; --len; codepoint >>= 6) + buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL); + buffer[0] |= (char)codepoint; +} diff --git a/libterminput_is_ready.c b/libterminput_is_ready.c new file mode 100644 index 0000000..9c8af91 --- /dev/null +++ b/libterminput_is_ready.c @@ -0,0 +1,5 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx); diff --git a/libterminput.c b/libterminput_read.c index 86666c5..50095bd 100644 --- a/libterminput.c +++ b/libterminput_read.c @@ -1,17 +1,5 @@ /* See LICENSE file for copyright and license details. */ -#include "libterminput.h" - -#include <alloca.h> -#include <ctype.h> -#include <limits.h> -#include <string.h> -#include <unistd.h> - - -struct input { - enum libterminput_mod mods; - char symbol[7]; -}; +#include "common.h" static int @@ -122,137 +110,6 @@ again: static void -encode_utf8(unsigned long long int codepoint, char buffer[7]) -{ - static const char masks[6] = {(char)0x00, (char)0xC0, (char)0xE0, (char)0xF0, (char)0xF8, (char)0xFC}; - static const unsigned long long int limits[6] = { - 1ULL << (7 + 0 * 6), - 1ULL << (5 + 1 * 6), - 1ULL << (4 + 2 * 6), - 1ULL << (3 + 3 * 6), - 1ULL << (2 + 4 * 6), - 1ULL << (1 + 5 * 6) - }; - size_t len; - for (len = 0; codepoint >= limits[len]; len++); - buffer[0] = masks[len]; - len += 1; - buffer[len] = '\0'; - for (; --len; codepoint >>= 6) - buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL); - buffer[0] |= (char)codepoint; -} - - -static int -check_utf8_char(const char *s, size_t *lenp, size_t size) -{ - size_t i; - *lenp = 0; - if (!size) { - return 0; - } else if ((*s & 0x80) == 0) { - *lenp = 1; - return 1; - } else if ((*s & 0xE0) == 0xC0) { - *lenp = 2; - } else if ((*s & 0xF0) == 0xE0) { - *lenp = 3; - } else if ((*s & 0xF8) == 0xF0) { - *lenp = 4; - } else if ((*s & 0xFC) == 0xF8) { - *lenp = 5; - } else if ((*s & 0xFE) == 0xFC) { - *lenp = 6; - } else { - *lenp = 0; - return -1; - } - for (i = 1; i < *lenp; i++) { - if (i == size) - return 0; - if ((s[i] & 0xC0) != 0x80) - return -1; - } - return 1; -} - - -static unsigned long long int -utf8_decode(const char *s, size_t *ip) -{ - unsigned long long int cp = 0; - size_t len; - - if ((s[*ip] & 0x80) == 0) { - return (unsigned long long int)s[(*ip)++]; - } else if ((s[*ip] & 0xE0) == 0xC0) { - cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U); - len = 2; - goto need_1; - } else if ((s[*ip] & 0xF0) == 0xE0) { - cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U); - len = 3; - goto need_2; - } else if ((s[*ip] & 0xF8) == 0xF0) { - cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U); - len = 4; - goto need_3; - } else if ((s[*ip] & 0xFC) == 0xF8) { - cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U); - len = 5; - goto need_4; - } else if ((s[*ip] & 0xFE) == 0xFC) { - cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU); - len = 6; - goto need_5; - } - -need_5: - if ((s[*ip] & 0xC0) != 0x80) return 0; - cp <<= 6; - cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); - -need_4: - if ((s[*ip] & 0xC0) != 0x80) return 0; - cp <<= 6; - cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); - -need_3: - if ((s[*ip] & 0xC0) != 0x80) return 0; - cp <<= 6; - cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); - -need_2: - if ((s[*ip] & 0xC0) != 0x80) return 0; - cp <<= 6; - cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); - -need_1: - if ((s[*ip] & 0xC0) != 0x80) return 0; - cp <<= 6; - cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); - - /* Let's ignore the 0x10FFFF upper bound. */ - - if (cp < 1ULL << (7 + 0 * 6)) - return 0; - if (cp < 1ULL << (5 + 1 * 6)) - return len > 2 ? 0ULL : cp; - if (cp < 1ULL << (4 + 2 * 6)) - return len > 3 ? 0ULL : cp; - if (cp < 1ULL << (3 + 3 * 6)) - return len > 4 ? 0ULL : cp; - if (cp < 1ULL << (2 + 4 * 6)) - return len > 5 ? 0ULL : cp; - if (cp < 1ULL << (1 + 5 * 6)) - return len > 6 ? 0ULL : cp; - - return 0; -} - - -static void parse_sequence(union libterminput_input *input, struct libterminput_state *ctx) { unsigned long long int *nums, numsbuf[6]; @@ -348,9 +205,9 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx) ctx->mouse_tracking = 0; nums = numsbuf; pos = ctx->stored_tail; - if ((nums[0] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 || - (nums[1] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 || - (nums[2] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32) { + if ((nums[0] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 || + (nums[1] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 || + (nums[2] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32) { ctx->stored_tail = pos; input->keypress.key = LIBTERMINPUT_MACRO; return; @@ -471,7 +328,7 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx) input->type = LIBTERMINPUT_NONE; break; } - encode_utf8(nums[0], input->keypress.symbol); + libterminput_encode_utf8__(nums[0], input->keypress.symbol); input->keypress.times = 1; break; case '$': @@ -612,112 +469,6 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx) } -static int -read_bracketed_paste(int fd, union libterminput_input *input, struct libterminput_state *ctx) -{ - ssize_t r; - size_t n; - - /* Unfortunately there is no standard for how to handle pasted ESC's, - * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as - * is, so we cannot do anything about them, however, a good terminal - * would stop the paste at the ~ in ESC [201~, send ~ as normal, and - * then continue the brackated paste mode. */ - - if (ctx->stored_head - ctx->stored_tail) { - ctx->paused = 0; - n = ctx->stored_head - ctx->stored_tail; - if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6 ? n : 6)) { - if (n >= 6) { - ctx->stored_tail += 6; - if (ctx->stored_tail == ctx->stored_head) - ctx->stored_tail = ctx->stored_head = 0; - ctx->bracketed_paste = 0; - input->type = LIBTERMINPUT_BRACKETED_PASTE_END; - return 1; - } - input->text.nbytes = ctx->stored_head - ctx->stored_tail; - memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes); - r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes); - if (r <= 0) - return (int)r; - input->text.nbytes += (size_t)r; - ctx->stored_head = ctx->stored_tail = 0; - goto normal; - } - input->text.nbytes = ctx->stored_head - ctx->stored_tail; - memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes); - ctx->stored_head = ctx->stored_tail = 0; - goto normal; - } - - r = read(fd, input->text.bytes, sizeof(input->text.bytes)); - if (r <= 0) - return (int)r; - input->text.nbytes = (size_t)r; - -normal: - for (n = 0; n + 5 < input->text.nbytes; n++) { - if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' && - input->text.bytes[n + 3] == '0' && input->text.bytes[n + 4] == '1' && input->text.bytes[n + 5] == '~') - break; - } - do { - if (n + 4 < input->text.nbytes) { - if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' && - input->text.bytes[n + 3] == '0' && input->text.bytes[n + 4] == '1') - break; - n += 1; - } - if (n + 3 < input->text.nbytes) { - if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' && - input->text.bytes[n + 3] == '0') - break; - n += 1; - } - if (n + 2 < input->text.nbytes) { - if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2') - break; - n += 1; - } - if (n + 1 < input->text.nbytes) { - if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[') - break; - n += 1; - } - if (n + 0 < input->text.nbytes) { - if (input->text.bytes[n + 0] == '\033') - break; - n += 1; - } - } while (0); - if (!n) { - if (input->text.nbytes < 6) { - input->text.type = LIBTERMINPUT_NONE; - memcpy(ctx->stored, input->text.bytes, input->text.nbytes); - ctx->stored_tail = 0; - ctx->stored_head = input->text.nbytes; - ctx->paused = 1; - return 1; - } - ctx->stored_tail = 0; - ctx->stored_head = input->text.nbytes - 6; - memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head); - if (ctx->stored_tail == ctx->stored_head) - ctx->stored_tail = ctx->stored_head = 0; - ctx->bracketed_paste = 0; - input->type = LIBTERMINPUT_BRACKETED_PASTE_END; - return 1; - } - ctx->stored_tail = 0; - ctx->stored_head = input->text.nbytes - n; - memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head); - input->text.nbytes = n; - input->text.type = LIBTERMINPUT_TEXT; - return 1; -} - - int libterminput_read(int fd, union libterminput_input *input, struct libterminput_state *ctx) { @@ -736,7 +487,7 @@ libterminput_read(int fd, union libterminput_input *input, struct libterminput_s } if (ctx->bracketed_paste) - return read_bracketed_paste(fd, input, ctx); + return libterminput_read_bracketed_paste__(fd, input, ctx); if (!ctx->mouse_tracking) { r = read_input(fd, &ret, ctx); if (r <= 0) @@ -815,15 +566,15 @@ again: return 1; } n = ctx->stored_tail; - r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n); + r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m); if (r <= 0) goto fallback_to_none_or_macro; n += m; - r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n); + r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m); if (r <= 0) goto fallback_to_none_or_macro; n += m; - r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n); + r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m); if (r <= 0) { fallback_to_none_or_macro: if (!r) { @@ -900,23 +651,3 @@ again: return 1; } - - -int -libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags) -{ - ctx->flags |= flags; - return 0; -} - - -int -libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags) -{ - ctx->flags |= flags; - ctx->flags ^= flags; - return 0; -} - - -extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx); diff --git a/libterminput_read_bracketed_paste__.c b/libterminput_read_bracketed_paste__.c new file mode 100644 index 0000000..0c7a81f --- /dev/null +++ b/libterminput_read_bracketed_paste__.c @@ -0,0 +1,108 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx) +{ + ssize_t r; + size_t n; + size_t i; + + /* Unfortunately there is no standard for how to handle pasted ESC's, + * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as + * is, so we cannot do anything about them, however, a good terminal + * would stop the paste at the ~ in ESC [201~, send ~ as normal, and + * then continue the brackated paste mode. */ + + /* Check for bracketed paste end marker to output LIBTERMINPUT_BRACKETED_PASTE_END + * and stop, and read more if we don't have it; the marker will be at the + * beginning as the function will stop when it encounteres it and output the + * text pasted before it */ + if (ctx->stored_head - ctx->stored_tail) { + /* If we have input buffered, unpause and handle it */ + ctx->paused = 0; + n = ctx->stored_head - ctx->stored_tail; + if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6U ? n : 6U)) { + /* If starting with bracketed paste end marker, output LIBTERMINPUT_BRACKETED_PASTE_END, */ + if (n >= 6U) { + ctx->stored_tail += 6U; + if (ctx->stored_tail == ctx->stored_head) + ctx->stored_tail = ctx->stored_head = 0; + ctx->bracketed_paste = 0; + input->type = LIBTERMINPUT_BRACKETED_PASTE_END; + return 1; + } + /* otherwise, but if the buffered input is a truncating of the marker, + * move over the data from the stored input buffer to the input buffer + * and store continue reading input */ + input->text.nbytes = ctx->stored_head - ctx->stored_tail; + memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes); + r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes); + if (r <= 0) + return (int)r; + input->text.nbytes += (size_t)r; + ctx->stored_head = ctx->stored_tail = 0; + } else { + /* If the buffered input does not begin with the bracketed paste end marker, + * or a truncation of it, move over the data from the stored input buffer + * to the input buffer */ + input->text.nbytes = ctx->stored_head - ctx->stored_tail; + memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes); + ctx->stored_head = ctx->stored_tail = 0; + } + } else { + /* If we don't have any input buffered, read some */ + r = read(fd, input->text.bytes, sizeof(input->text.bytes)); + if (r <= 0) + return (int)r; + input->text.nbytes = (size_t)r; + } + + /* Count the number of bytes available before a bracketed paste end + * marker, or a truncation of it at the end of the input buffer */ + for (n = 0; n + 5U < input->text.nbytes; n++) { + if (!strncmp(&input->text.bytes[n], "\033[201~", 6U)) + break; + } + for (i = 5U; i--;) { + if (n + i < input->text.nbytes) { + if (!strncmp(&input->text.bytes[n], "\033[201~", i + 1U)) + break; + n += 1; + } + } + + /* Of there was pasted input, output it */ + if (n) { + ctx->stored_tail = 0; + ctx->stored_head = input->text.nbytes - n; + memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head); + input->text.nbytes = n; + input->text.type = LIBTERMINPUT_TEXT; + return 1; + } + + /* If the input is solely a truncation of the bracketed paste + * end marker, output that we do not have any complete input */ + if (input->text.nbytes < 6U) { + input->text.type = LIBTERMINPUT_NONE; + memcpy(ctx->stored, input->text.bytes, input->text.nbytes); + ctx->stored_tail = 0; + ctx->stored_head = input->text.nbytes; + ctx->paused = 1; + return 1; + } + + /* If the input starts with a bracketed paste end marker, + * output it and store the rest of the input buffer for + * later processing */ + ctx->stored_tail = 0; + ctx->stored_head = input->text.nbytes - 6U; + memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head); + if (ctx->stored_tail == ctx->stored_head) + ctx->stored_tail = ctx->stored_head = 0; + ctx->bracketed_paste = 0; + input->type = LIBTERMINPUT_BRACKETED_PASTE_END; + return 1; +} diff --git a/libterminput_set_flags.c b/libterminput_set_flags.c new file mode 100644 index 0000000..508d0d6 --- /dev/null +++ b/libterminput_set_flags.c @@ -0,0 +1,10 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags) +{ + ctx->flags |= flags; + return 0; +} diff --git a/libterminput_utf8_decode__.c b/libterminput_utf8_decode__.c new file mode 100644 index 0000000..e4d0e75 --- /dev/null +++ b/libterminput_utf8_decode__.c @@ -0,0 +1,79 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +unsigned long long int +libterminput_utf8_decode__(const char *s, size_t *ip) +{ + unsigned long long int cp = 0; + size_t len; + + /* Parse the first byte, to get the highest codepoint bits and the encoding length */ + if ((s[*ip] & 0x80) == 0) { + return (unsigned long long int)s[(*ip)++]; + } else if ((s[*ip] & 0xE0) == 0xC0) { + cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U); + len = 2U; + goto need_1; + } else if ((s[*ip] & 0xF0) == 0xE0) { + cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U); + len = 3U; + goto need_2; + } else if ((s[*ip] & 0xF8) == 0xF0) { + cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U); + len = 4U; + goto need_3; + } else if ((s[*ip] & 0xFC) == 0xF8) { + cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U); + len = 5U; + goto need_4; + } else if ((s[*ip] & 0xFE) == 0xFC) { + cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU); + len = 6U; + goto need_5; + } + + /* Parse continuation bytes; check marked as continuation the get codepoint bits */ +need_5: + if ((s[*ip] & 0xC0) != 0x80) return 0; + cp <<= 6; + cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); + +need_4: + if ((s[*ip] & 0xC0) != 0x80) return 0; + cp <<= 6; + cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); + +need_3: + if ((s[*ip] & 0xC0) != 0x80) return 0; + cp <<= 6; + cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); + +need_2: + if ((s[*ip] & 0xC0) != 0x80) return 0; + cp <<= 6; + cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); + +need_1: + if ((s[*ip] & 0xC0) != 0x80) return 0; + cp <<= 6; + cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U); + + /* Check that encoded codepoint is encoded with the minimum possible length */ + if (cp < 1ULL << (7 + 0 * 6)) + return 0; + if (cp < 1ULL << (5 + 1 * 6)) + return len > 2U ? 0ULL : cp; + if (cp < 1ULL << (4 + 2 * 6)) + return len > 3U ? 0ULL : cp; + if (cp < 1ULL << (3 + 3 * 6)) + return len > 4U ? 0ULL : cp; + if (cp < 1ULL << (2 + 4 * 6)) + return len > 5U ? 0ULL : cp; + if (cp < 1ULL << (1 + 5 * 6)) + return len > 6U ? 0ULL : cp; + + /* (Let's ignore the 0x10FFFF upper bound.) */ + + return 0; +} |