aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2025-02-16 14:23:16 +0100
committerMattias Andrée <m@maandree.se>2025-02-16 14:23:16 +0100
commit16e00dd5f26ce342e9562bec08f529d98c23c01c (patch)
treea8389795475d59653930322e10e9f07025ff1e14
parentinteractive-test: add TEST_LIBTERMINPUT_PRINT_STATE (diff)
downloadlibterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.gz
libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.bz2
libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.xz
Improve code organisation, documentation, and m code improvement
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--Makefile12
-rw-r--r--common.h68
-rw-r--r--libterminput_check_utf8_char__.c36
-rw-r--r--libterminput_clear_flags.c11
-rw-r--r--libterminput_encode_utf8__.c44
-rw-r--r--libterminput_is_ready.c5
-rw-r--r--libterminput_read.c (renamed from libterminput.c)287
-rw-r--r--libterminput_read_bracketed_paste__.c108
-rw-r--r--libterminput_set_flags.c10
-rw-r--r--libterminput_utf8_decode__.c79
10 files changed, 380 insertions, 280 deletions
diff --git a/Makefile b/Makefile
index 5295a38..e958335 100644
--- a/Makefile
+++ b/Makefile
@@ -16,10 +16,18 @@ LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR)
OBJ =\
- libterminput.o
+ libterminput_read.o\
+ libterminput_is_ready.o\
+ libterminput_set_flags.o\
+ libterminput_clear_flags.o\
+ libterminput_encode_utf8__.o\
+ libterminput_check_utf8_char__.o\
+ libterminput_utf8_decode__.o\
+ libterminput_read_bracketed_paste__.o\
HDR =\
- libterminput.h
+ libterminput.h\
+ common.h
TESTS =\
interactive-test\
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..05b5269
--- /dev/null
+++ b/common.h
@@ -0,0 +1,68 @@
+/* See LICENSE file for copyright and license details. */
+#include "libterminput.h"
+
+#include <alloca.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+
+#if defined(__GNUC__)
+# define HIDDEN __attribute__((__visibility__("hidden")))
+#else
+# define HIDDEN
+#endif
+
+
+struct input {
+ enum libterminput_mod mods;
+ char symbol[7];
+};
+
+
+/**
+ * Encode a Unicode codepoint in UTF-8
+ *
+ * @param codepoint The codepoint to encode
+ * @param buffer Output buffer for the NUL-byte terminated UTF-8 encoding of `codepoint`
+ */
+HIDDEN void libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7]);
+
+/**
+ * Validate an UTF-8 byte sequence, up to one codepoint encoding
+ *
+ * @param s The buffer to read from
+ * @param size The number of bytes available in `s`
+ * @param len_out Output parameter for the encoding length of the
+ * codepoint encoded at the beginning of `s`
+ * @return 1 if `s` begins with a valid codepoint,
+ * 0 if `size` is too small to determine the validity,
+ * -1 if the byte sequence is illegal
+ */
+HIDDEN int libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out);
+
+/**
+ * Decode a Unicode codepoint encoded in UTF-8
+ *
+ * @param s The buffer to read from
+ * @param ip Pointer to the current position in `s`, will be updated
+ * @return The first encode codepoint, 0 if invalid (or if 0)
+ */
+HIDDEN unsigned long long int libterminput_utf8_decode__(const char *s, size_t *ip);
+
+/**
+ * Get input, from the terminal that, that appear after
+ * the start marker for a bracketed paste
+ *
+ * @param fd The file descriptor to the terminal
+ * @param input Output parameter for input
+ * @param ctx State for the terminal, parts of the state may be stored in `input`
+ * @return 1 normally, 0 on end of input, -1 on error
+ *
+ * @throws Any reason specified for read(3)
+ */
+HIDDEN int libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx);
+
+
+#undef HIDDEN
diff --git a/libterminput_check_utf8_char__.c b/libterminput_check_utf8_char__.c
new file mode 100644
index 0000000..d2884cf
--- /dev/null
+++ b/libterminput_check_utf8_char__.c
@@ -0,0 +1,36 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out)
+{
+ size_t i;
+ *len_out = 0;
+ if (!size) {
+ return 0;
+ } else if ((*s & 0x80) == 0) {
+ *len_out = 1U;
+ return 1;
+ } else if ((*s & 0xE0) == 0xC0) {
+ *len_out = 2U;
+ } else if ((*s & 0xF0) == 0xE0) {
+ *len_out = 3U;
+ } else if ((*s & 0xF8) == 0xF0) {
+ *len_out = 4U;
+ } else if ((*s & 0xFC) == 0xF8) {
+ *len_out = 5U;
+ } else if ((*s & 0xFE) == 0xFC) {
+ *len_out = 6U;
+ } else {
+ *len_out = 0U;
+ return -1;
+ }
+ for (i = 1; i < *len_out; i++) {
+ if (i == size)
+ return 0;
+ if ((s[i] & 0xC0) != 0x80)
+ return -1;
+ }
+ return 1;
+}
diff --git a/libterminput_clear_flags.c b/libterminput_clear_flags.c
new file mode 100644
index 0000000..9eba361
--- /dev/null
+++ b/libterminput_clear_flags.c
@@ -0,0 +1,11 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
+{
+ ctx->flags |= flags;
+ ctx->flags ^= flags;
+ return 0;
+}
diff --git a/libterminput_encode_utf8__.c b/libterminput_encode_utf8__.c
new file mode 100644
index 0000000..7e83a04
--- /dev/null
+++ b/libterminput_encode_utf8__.c
@@ -0,0 +1,44 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+void
+libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7])
+{
+ static const char masks[6] = {
+ (char)0x00, /* 1 byte = 0 high set bits, */
+ (char)0xC0, /* 2 bytes = 2 high set bits, */
+ (char)0xE0, /* 3 bytes = 3 high set bits, ... */
+ (char)0xF0,
+ (char)0xF8,
+ (char)0xFC /* 6 bytes = 3 high set bits */
+ };
+ static const unsigned long long int limits[6] = {
+ 1ULL << (7 + 0 * 6), /* 1 byte has room for 7 codepoint encoding bits, */
+ 1ULL << (5 + 1 * 6), /* 2 bytes has room for 5 bits in the first by and 6 bits the rest, */
+ 1ULL << (4 + 2 * 6), /* 3 bytes has room for 4 bits in the first by and 6 bits the rest, ... */
+ 1ULL << (3 + 3 * 6),
+ 1ULL << (2 + 4 * 6),
+ 1ULL << (1 + 5 * 6) /* 6 bytes has room for 1 bits in the first by and 6 bits the rest */
+ };
+
+ size_t len;
+
+ /* Get encoding length for codepoint */
+ for (len = 0; codepoint >= limits[len]; len++);
+
+ /* Set the `len` (but 0 if 1) high bits in the first byte
+ * to encode the encoding length of the codepoint */
+ buffer[0] = masks[len];
+
+ /* NUL terminate the encoding buffer,
+ * to mark the encode of the encoding */
+ buffer[++len] = '\0';
+
+ /* Encode the bites representing the code point
+ * and the length continuation marker bits in
+ * the non-first bytes */
+ for (; --len; codepoint >>= 6)
+ buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
+ buffer[0] |= (char)codepoint;
+}
diff --git a/libterminput_is_ready.c b/libterminput_is_ready.c
new file mode 100644
index 0000000..9c8af91
--- /dev/null
+++ b/libterminput_is_ready.c
@@ -0,0 +1,5 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx);
diff --git a/libterminput.c b/libterminput_read.c
index 86666c5..50095bd 100644
--- a/libterminput.c
+++ b/libterminput_read.c
@@ -1,17 +1,5 @@
/* See LICENSE file for copyright and license details. */
-#include "libterminput.h"
-
-#include <alloca.h>
-#include <ctype.h>
-#include <limits.h>
-#include <string.h>
-#include <unistd.h>
-
-
-struct input {
- enum libterminput_mod mods;
- char symbol[7];
-};
+#include "common.h"
static int
@@ -122,137 +110,6 @@ again:
static void
-encode_utf8(unsigned long long int codepoint, char buffer[7])
-{
- static const char masks[6] = {(char)0x00, (char)0xC0, (char)0xE0, (char)0xF0, (char)0xF8, (char)0xFC};
- static const unsigned long long int limits[6] = {
- 1ULL << (7 + 0 * 6),
- 1ULL << (5 + 1 * 6),
- 1ULL << (4 + 2 * 6),
- 1ULL << (3 + 3 * 6),
- 1ULL << (2 + 4 * 6),
- 1ULL << (1 + 5 * 6)
- };
- size_t len;
- for (len = 0; codepoint >= limits[len]; len++);
- buffer[0] = masks[len];
- len += 1;
- buffer[len] = '\0';
- for (; --len; codepoint >>= 6)
- buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
- buffer[0] |= (char)codepoint;
-}
-
-
-static int
-check_utf8_char(const char *s, size_t *lenp, size_t size)
-{
- size_t i;
- *lenp = 0;
- if (!size) {
- return 0;
- } else if ((*s & 0x80) == 0) {
- *lenp = 1;
- return 1;
- } else if ((*s & 0xE0) == 0xC0) {
- *lenp = 2;
- } else if ((*s & 0xF0) == 0xE0) {
- *lenp = 3;
- } else if ((*s & 0xF8) == 0xF0) {
- *lenp = 4;
- } else if ((*s & 0xFC) == 0xF8) {
- *lenp = 5;
- } else if ((*s & 0xFE) == 0xFC) {
- *lenp = 6;
- } else {
- *lenp = 0;
- return -1;
- }
- for (i = 1; i < *lenp; i++) {
- if (i == size)
- return 0;
- if ((s[i] & 0xC0) != 0x80)
- return -1;
- }
- return 1;
-}
-
-
-static unsigned long long int
-utf8_decode(const char *s, size_t *ip)
-{
- unsigned long long int cp = 0;
- size_t len;
-
- if ((s[*ip] & 0x80) == 0) {
- return (unsigned long long int)s[(*ip)++];
- } else if ((s[*ip] & 0xE0) == 0xC0) {
- cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U);
- len = 2;
- goto need_1;
- } else if ((s[*ip] & 0xF0) == 0xE0) {
- cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U);
- len = 3;
- goto need_2;
- } else if ((s[*ip] & 0xF8) == 0xF0) {
- cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U);
- len = 4;
- goto need_3;
- } else if ((s[*ip] & 0xFC) == 0xF8) {
- cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U);
- len = 5;
- goto need_4;
- } else if ((s[*ip] & 0xFE) == 0xFC) {
- cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU);
- len = 6;
- goto need_5;
- }
-
-need_5:
- if ((s[*ip] & 0xC0) != 0x80) return 0;
- cp <<= 6;
- cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_4:
- if ((s[*ip] & 0xC0) != 0x80) return 0;
- cp <<= 6;
- cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_3:
- if ((s[*ip] & 0xC0) != 0x80) return 0;
- cp <<= 6;
- cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_2:
- if ((s[*ip] & 0xC0) != 0x80) return 0;
- cp <<= 6;
- cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_1:
- if ((s[*ip] & 0xC0) != 0x80) return 0;
- cp <<= 6;
- cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
- /* Let's ignore the 0x10FFFF upper bound. */
-
- if (cp < 1ULL << (7 + 0 * 6))
- return 0;
- if (cp < 1ULL << (5 + 1 * 6))
- return len > 2 ? 0ULL : cp;
- if (cp < 1ULL << (4 + 2 * 6))
- return len > 3 ? 0ULL : cp;
- if (cp < 1ULL << (3 + 3 * 6))
- return len > 4 ? 0ULL : cp;
- if (cp < 1ULL << (2 + 4 * 6))
- return len > 5 ? 0ULL : cp;
- if (cp < 1ULL << (1 + 5 * 6))
- return len > 6 ? 0ULL : cp;
-
- return 0;
-}
-
-
-static void
parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
{
unsigned long long int *nums, numsbuf[6];
@@ -348,9 +205,9 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
ctx->mouse_tracking = 0;
nums = numsbuf;
pos = ctx->stored_tail;
- if ((nums[0] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 ||
- (nums[1] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 ||
- (nums[2] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32) {
+ if ((nums[0] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 ||
+ (nums[1] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 ||
+ (nums[2] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32) {
ctx->stored_tail = pos;
input->keypress.key = LIBTERMINPUT_MACRO;
return;
@@ -471,7 +328,7 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
input->type = LIBTERMINPUT_NONE;
break;
}
- encode_utf8(nums[0], input->keypress.symbol);
+ libterminput_encode_utf8__(nums[0], input->keypress.symbol);
input->keypress.times = 1;
break;
case '$':
@@ -612,112 +469,6 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
}
-static int
-read_bracketed_paste(int fd, union libterminput_input *input, struct libterminput_state *ctx)
-{
- ssize_t r;
- size_t n;
-
- /* Unfortunately there is no standard for how to handle pasted ESC's,
- * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as
- * is, so we cannot do anything about them, however, a good terminal
- * would stop the paste at the ~ in ESC [201~, send ~ as normal, and
- * then continue the brackated paste mode. */
-
- if (ctx->stored_head - ctx->stored_tail) {
- ctx->paused = 0;
- n = ctx->stored_head - ctx->stored_tail;
- if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6 ? n : 6)) {
- if (n >= 6) {
- ctx->stored_tail += 6;
- if (ctx->stored_tail == ctx->stored_head)
- ctx->stored_tail = ctx->stored_head = 0;
- ctx->bracketed_paste = 0;
- input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
- return 1;
- }
- input->text.nbytes = ctx->stored_head - ctx->stored_tail;
- memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
- r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes);
- if (r <= 0)
- return (int)r;
- input->text.nbytes += (size_t)r;
- ctx->stored_head = ctx->stored_tail = 0;
- goto normal;
- }
- input->text.nbytes = ctx->stored_head - ctx->stored_tail;
- memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
- ctx->stored_head = ctx->stored_tail = 0;
- goto normal;
- }
-
- r = read(fd, input->text.bytes, sizeof(input->text.bytes));
- if (r <= 0)
- return (int)r;
- input->text.nbytes = (size_t)r;
-
-normal:
- for (n = 0; n + 5 < input->text.nbytes; n++) {
- if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
- input->text.bytes[n + 3] == '0' && input->text.bytes[n + 4] == '1' && input->text.bytes[n + 5] == '~')
- break;
- }
- do {
- if (n + 4 < input->text.nbytes) {
- if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
- input->text.bytes[n + 3] == '0' && input->text.bytes[n + 4] == '1')
- break;
- n += 1;
- }
- if (n + 3 < input->text.nbytes) {
- if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
- input->text.bytes[n + 3] == '0')
- break;
- n += 1;
- }
- if (n + 2 < input->text.nbytes) {
- if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2')
- break;
- n += 1;
- }
- if (n + 1 < input->text.nbytes) {
- if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[')
- break;
- n += 1;
- }
- if (n + 0 < input->text.nbytes) {
- if (input->text.bytes[n + 0] == '\033')
- break;
- n += 1;
- }
- } while (0);
- if (!n) {
- if (input->text.nbytes < 6) {
- input->text.type = LIBTERMINPUT_NONE;
- memcpy(ctx->stored, input->text.bytes, input->text.nbytes);
- ctx->stored_tail = 0;
- ctx->stored_head = input->text.nbytes;
- ctx->paused = 1;
- return 1;
- }
- ctx->stored_tail = 0;
- ctx->stored_head = input->text.nbytes - 6;
- memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head);
- if (ctx->stored_tail == ctx->stored_head)
- ctx->stored_tail = ctx->stored_head = 0;
- ctx->bracketed_paste = 0;
- input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
- return 1;
- }
- ctx->stored_tail = 0;
- ctx->stored_head = input->text.nbytes - n;
- memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head);
- input->text.nbytes = n;
- input->text.type = LIBTERMINPUT_TEXT;
- return 1;
-}
-
-
int
libterminput_read(int fd, union libterminput_input *input, struct libterminput_state *ctx)
{
@@ -736,7 +487,7 @@ libterminput_read(int fd, union libterminput_input *input, struct libterminput_s
}
if (ctx->bracketed_paste)
- return read_bracketed_paste(fd, input, ctx);
+ return libterminput_read_bracketed_paste__(fd, input, ctx);
if (!ctx->mouse_tracking) {
r = read_input(fd, &ret, ctx);
if (r <= 0)
@@ -815,15 +566,15 @@ again:
return 1;
}
n = ctx->stored_tail;
- r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+ r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
if (r <= 0)
goto fallback_to_none_or_macro;
n += m;
- r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+ r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
if (r <= 0)
goto fallback_to_none_or_macro;
n += m;
- r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+ r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
if (r <= 0) {
fallback_to_none_or_macro:
if (!r) {
@@ -900,23 +651,3 @@ again:
return 1;
}
-
-
-int
-libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
-{
- ctx->flags |= flags;
- return 0;
-}
-
-
-int
-libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
-{
- ctx->flags |= flags;
- ctx->flags ^= flags;
- return 0;
-}
-
-
-extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx);
diff --git a/libterminput_read_bracketed_paste__.c b/libterminput_read_bracketed_paste__.c
new file mode 100644
index 0000000..0c7a81f
--- /dev/null
+++ b/libterminput_read_bracketed_paste__.c
@@ -0,0 +1,108 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx)
+{
+ ssize_t r;
+ size_t n;
+ size_t i;
+
+ /* Unfortunately there is no standard for how to handle pasted ESC's,
+ * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as
+ * is, so we cannot do anything about them, however, a good terminal
+ * would stop the paste at the ~ in ESC [201~, send ~ as normal, and
+ * then continue the brackated paste mode. */
+
+ /* Check for bracketed paste end marker to output LIBTERMINPUT_BRACKETED_PASTE_END
+ * and stop, and read more if we don't have it; the marker will be at the
+ * beginning as the function will stop when it encounteres it and output the
+ * text pasted before it */
+ if (ctx->stored_head - ctx->stored_tail) {
+ /* If we have input buffered, unpause and handle it */
+ ctx->paused = 0;
+ n = ctx->stored_head - ctx->stored_tail;
+ if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6U ? n : 6U)) {
+ /* If starting with bracketed paste end marker, output LIBTERMINPUT_BRACKETED_PASTE_END, */
+ if (n >= 6U) {
+ ctx->stored_tail += 6U;
+ if (ctx->stored_tail == ctx->stored_head)
+ ctx->stored_tail = ctx->stored_head = 0;
+ ctx->bracketed_paste = 0;
+ input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
+ return 1;
+ }
+ /* otherwise, but if the buffered input is a truncating of the marker,
+ * move over the data from the stored input buffer to the input buffer
+ * and store continue reading input */
+ input->text.nbytes = ctx->stored_head - ctx->stored_tail;
+ memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
+ r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes);
+ if (r <= 0)
+ return (int)r;
+ input->text.nbytes += (size_t)r;
+ ctx->stored_head = ctx->stored_tail = 0;
+ } else {
+ /* If the buffered input does not begin with the bracketed paste end marker,
+ * or a truncation of it, move over the data from the stored input buffer
+ * to the input buffer */
+ input->text.nbytes = ctx->stored_head - ctx->stored_tail;
+ memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
+ ctx->stored_head = ctx->stored_tail = 0;
+ }
+ } else {
+ /* If we don't have any input buffered, read some */
+ r = read(fd, input->text.bytes, sizeof(input->text.bytes));
+ if (r <= 0)
+ return (int)r;
+ input->text.nbytes = (size_t)r;
+ }
+
+ /* Count the number of bytes available before a bracketed paste end
+ * marker, or a truncation of it at the end of the input buffer */
+ for (n = 0; n + 5U < input->text.nbytes; n++) {
+ if (!strncmp(&input->text.bytes[n], "\033[201~", 6U))
+ break;
+ }
+ for (i = 5U; i--;) {
+ if (n + i < input->text.nbytes) {
+ if (!strncmp(&input->text.bytes[n], "\033[201~", i + 1U))
+ break;
+ n += 1;
+ }
+ }
+
+ /* Of there was pasted input, output it */
+ if (n) {
+ ctx->stored_tail = 0;
+ ctx->stored_head = input->text.nbytes - n;
+ memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head);
+ input->text.nbytes = n;
+ input->text.type = LIBTERMINPUT_TEXT;
+ return 1;
+ }
+
+ /* If the input is solely a truncation of the bracketed paste
+ * end marker, output that we do not have any complete input */
+ if (input->text.nbytes < 6U) {
+ input->text.type = LIBTERMINPUT_NONE;
+ memcpy(ctx->stored, input->text.bytes, input->text.nbytes);
+ ctx->stored_tail = 0;
+ ctx->stored_head = input->text.nbytes;
+ ctx->paused = 1;
+ return 1;
+ }
+
+ /* If the input starts with a bracketed paste end marker,
+ * output it and store the rest of the input buffer for
+ * later processing */
+ ctx->stored_tail = 0;
+ ctx->stored_head = input->text.nbytes - 6U;
+ memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head);
+ if (ctx->stored_tail == ctx->stored_head)
+ ctx->stored_tail = ctx->stored_head = 0;
+ ctx->bracketed_paste = 0;
+ input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
+ return 1;
+}
diff --git a/libterminput_set_flags.c b/libterminput_set_flags.c
new file mode 100644
index 0000000..508d0d6
--- /dev/null
+++ b/libterminput_set_flags.c
@@ -0,0 +1,10 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
+{
+ ctx->flags |= flags;
+ return 0;
+}
diff --git a/libterminput_utf8_decode__.c b/libterminput_utf8_decode__.c
new file mode 100644
index 0000000..e4d0e75
--- /dev/null
+++ b/libterminput_utf8_decode__.c
@@ -0,0 +1,79 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+unsigned long long int
+libterminput_utf8_decode__(const char *s, size_t *ip)
+{
+ unsigned long long int cp = 0;
+ size_t len;
+
+ /* Parse the first byte, to get the highest codepoint bits and the encoding length */
+ if ((s[*ip] & 0x80) == 0) {
+ return (unsigned long long int)s[(*ip)++];
+ } else if ((s[*ip] & 0xE0) == 0xC0) {
+ cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U);
+ len = 2U;
+ goto need_1;
+ } else if ((s[*ip] & 0xF0) == 0xE0) {
+ cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U);
+ len = 3U;
+ goto need_2;
+ } else if ((s[*ip] & 0xF8) == 0xF0) {
+ cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U);
+ len = 4U;
+ goto need_3;
+ } else if ((s[*ip] & 0xFC) == 0xF8) {
+ cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U);
+ len = 5U;
+ goto need_4;
+ } else if ((s[*ip] & 0xFE) == 0xFC) {
+ cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU);
+ len = 6U;
+ goto need_5;
+ }
+
+ /* Parse continuation bytes; check marked as continuation the get codepoint bits */
+need_5:
+ if ((s[*ip] & 0xC0) != 0x80) return 0;
+ cp <<= 6;
+ cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_4:
+ if ((s[*ip] & 0xC0) != 0x80) return 0;
+ cp <<= 6;
+ cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_3:
+ if ((s[*ip] & 0xC0) != 0x80) return 0;
+ cp <<= 6;
+ cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_2:
+ if ((s[*ip] & 0xC0) != 0x80) return 0;
+ cp <<= 6;
+ cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_1:
+ if ((s[*ip] & 0xC0) != 0x80) return 0;
+ cp <<= 6;
+ cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+ /* Check that encoded codepoint is encoded with the minimum possible length */
+ if (cp < 1ULL << (7 + 0 * 6))
+ return 0;
+ if (cp < 1ULL << (5 + 1 * 6))
+ return len > 2U ? 0ULL : cp;
+ if (cp < 1ULL << (4 + 2 * 6))
+ return len > 3U ? 0ULL : cp;
+ if (cp < 1ULL << (3 + 3 * 6))
+ return len > 4U ? 0ULL : cp;
+ if (cp < 1ULL << (2 + 4 * 6))
+ return len > 5U ? 0ULL : cp;
+ if (cp < 1ULL << (1 + 5 * 6))
+ return len > 6U ? 0ULL : cp;
+
+ /* (Let's ignore the 0x10FFFF upper bound.) */
+
+ return 0;
+}