Improve code organisation, documentation, and m code improvement

Signed-off-by: Mattias Andrée <m@maandree.se>
author: Mattias Andrée <m@maandree.se> 2025-02-16 14:23:16 +0100
committer: Mattias Andrée <m@maandree.se> 2025-02-16 14:23:16 +0100
commit: 16e00dd5f26ce342e9562bec08f529d98c23c01c (patch)
tree: a8389795475d59653930322e10e9f07025ff1e14
parent: interactive-test: add TEST_LIBTERMINPUT_PRINT_STATE (diff)
download: libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.gz
libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.bz2
libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.xz
10 files changed, 380 insertions, 280 deletions
diff --git a/Makefile b/Makefile
index 5295a38..e958335 100644
--- a/Makefile
+++ b/Makefile
@@ -16,10 +16,18 @@ LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR)
 
 
 OBJ =\
-	libterminput.o
+	libterminput_read.o\
+	libterminput_is_ready.o\
+	libterminput_set_flags.o\
+	libterminput_clear_flags.o\
+	libterminput_encode_utf8__.o\
+	libterminput_check_utf8_char__.o\
+	libterminput_utf8_decode__.o\
+	libterminput_read_bracketed_paste__.o\
 
 HDR =\
-	libterminput.h
+	libterminput.h\
+	common.h
 
 TESTS =\
 	interactive-test\
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..05b5269
--- /dev/null
+++ b/common.h
@@ -0,0 +1,68 @@
+/* See LICENSE file for copyright and license details. */
+#include "libterminput.h"
+
+#include <alloca.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+
+
+#if defined(__GNUC__)
+# define HIDDEN __attribute__((__visibility__("hidden")))
+#else
+# define HIDDEN
+#endif
+
+
+struct input {
+	enum libterminput_mod mods;
+	char symbol[7];
+};
+
+
+/**
+ * Encode a Unicode codepoint in UTF-8
+ * 
+ * @param  codepoint  The codepoint to encode
+ * @param  buffer     Output buffer for the NUL-byte terminated UTF-8 encoding of `codepoint`
+ */
+HIDDEN void libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7]);
+
+/**
+ * Validate an UTF-8 byte sequence, up to one codepoint encoding
+ *
+ * @param   s        The buffer to read from
+ * @param   size     The number of bytes available in `s`
+ * @param   len_out  Output parameter for the encoding length of the
+ *                   codepoint encoded at the beginning of `s`
+ * @return           1 if `s` begins with a valid codepoint,
+ *                   0 if `size` is too small to determine the validity,
+ *                   -1 if the byte sequence is illegal
+ */
+HIDDEN int libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out);
+
+/**
+ * Decode a Unicode codepoint encoded in UTF-8
+ * 
+ * @param   s   The buffer to read from
+ * @param   ip  Pointer to the current position in `s`, will be updated
+ * @return      The first encode codepoint, 0 if invalid (or if 0)
+ */
+HIDDEN unsigned long long int libterminput_utf8_decode__(const char *s, size_t *ip);
+
+/**
+ * Get input, from the terminal that, that appear after
+ * the start marker for a bracketed paste
+ * 
+ * @param   fd     The file descriptor to the terminal
+ * @param   input  Output parameter for input
+ * @param   ctx    State for the terminal, parts of the state may be stored in `input`
+ * @return         1 normally, 0 on end of input, -1 on error
+ *
+ * @throws  Any reason specified for read(3)
+ */
+HIDDEN int libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx);
+
+
+#undef HIDDEN
diff --git a/libterminput_check_utf8_char__.c b/libterminput_check_utf8_char__.c
new file mode 100644
index 0000000..d2884cf
--- /dev/null
+++ b/libterminput_check_utf8_char__.c
@@ -0,0 +1,36 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_check_utf8_char__(const char *s, size_t size, size_t *len_out)
+{
+	size_t i;
+	*len_out = 0;
+	if (!size) {
+		return 0;
+	} else if ((*s & 0x80) == 0) {
+		*len_out = 1U;
+		return 1;
+	} else if ((*s & 0xE0) == 0xC0) {
+		*len_out = 2U;
+	} else if ((*s & 0xF0) == 0xE0) {
+		*len_out = 3U;
+	} else if ((*s & 0xF8) == 0xF0) {
+		*len_out = 4U;
+	} else if ((*s & 0xFC) == 0xF8) {
+		*len_out = 5U;
+	} else if ((*s & 0xFE) == 0xFC) {
+		*len_out = 6U;
+	} else {
+		*len_out = 0U;
+		return -1;
+	}
+	for (i = 1; i < *len_out; i++) {
+		if (i == size)
+			return 0;
+		if ((s[i] & 0xC0) != 0x80)
+			return -1;
+	}
+	return 1;
+}
diff --git a/libterminput_clear_flags.c b/libterminput_clear_flags.c
new file mode 100644
index 0000000..9eba361
--- /dev/null
+++ b/libterminput_clear_flags.c
@@ -0,0 +1,11 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
+{
+	ctx->flags |= flags;
+	ctx->flags ^= flags;
+	return 0;
+}
diff --git a/libterminput_encode_utf8__.c b/libterminput_encode_utf8__.c
new file mode 100644
index 0000000..7e83a04
--- /dev/null
+++ b/libterminput_encode_utf8__.c
@@ -0,0 +1,44 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+void
+libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7])
+{
+	static const char masks[6] = {
+		(char)0x00, /* 1 byte = 0 high set bits, */
+		(char)0xC0, /* 2 bytes = 2 high set bits, */
+		(char)0xE0, /* 3 bytes = 3 high set bits, ... */
+		(char)0xF0,
+		(char)0xF8,
+		(char)0xFC  /* 6 bytes = 3 high set bits */
+	};
+	static const unsigned long long int limits[6] = {
+		1ULL << (7 + 0 * 6), /* 1 byte has room for 7 codepoint encoding bits, */
+		1ULL << (5 + 1 * 6), /* 2 bytes has room for 5 bits in the first by and 6 bits the rest, */
+		1ULL << (4 + 2 * 6), /* 3 bytes has room for 4 bits in the first by and 6 bits the rest, ... */
+		1ULL << (3 + 3 * 6),
+		1ULL << (2 + 4 * 6),
+		1ULL << (1 + 5 * 6)  /* 6 bytes has room for 1 bits in the first by and 6 bits the rest */
+	};
+
+	size_t len;
+
+	/* Get encoding length for codepoint */
+	for (len = 0; codepoint >= limits[len]; len++);
+
+	/* Set the `len` (but 0 if 1) high bits in the first byte
+	 * to encode the encoding length of the codepoint */
+	buffer[0] = masks[len];
+
+	/* NUL terminate the encoding buffer,
+	 * to mark the encode of the encoding */
+	buffer[++len] = '\0';
+
+	/* Encode the bites representing the code point
+	 * and the length continuation marker bits in
+	 * the non-first bytes */
+	for (; --len; codepoint >>= 6)
+		buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
+	buffer[0] |= (char)codepoint;
+}
diff --git a/libterminput_is_ready.c b/libterminput_is_ready.c
new file mode 100644
index 0000000..9c8af91
--- /dev/null
+++ b/libterminput_is_ready.c
@@ -0,0 +1,5 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx);
diff --git a/libterminput.c b/libterminput_read.c
index 86666c5..50095bd 100644
--- a/libterminput.c
+++ b/libterminput_read.c
@@ -1,17 +1,5 @@
 /* See LICENSE file for copyright and license details. */
-#include "libterminput.h"
-
-#include <alloca.h>
-#include <ctype.h>
-#include <limits.h>
-#include <string.h>
-#include <unistd.h>
-
-
-struct input {
-	enum libterminput_mod mods;
-	char symbol[7];
-};
+#include "common.h"
 
 
 static int
@@ -122,137 +110,6 @@ again:
 
 
 static void
-encode_utf8(unsigned long long int codepoint, char buffer[7])
-{
-	static const char masks[6] = {(char)0x00, (char)0xC0, (char)0xE0, (char)0xF0, (char)0xF8, (char)0xFC};
-	static const unsigned long long int limits[6] = {
-		1ULL << (7 + 0 * 6),
-		1ULL << (5 + 1 * 6),
-		1ULL << (4 + 2 * 6),
-		1ULL << (3 + 3 * 6),
-		1ULL << (2 + 4 * 6),
-		1ULL << (1 + 5 * 6)
-	};
-	size_t len;
-	for (len = 0; codepoint >= limits[len]; len++);
-	buffer[0] = masks[len];
-	len += 1;
-	buffer[len] = '\0';
-	for (; --len; codepoint >>= 6)
-		buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
-	buffer[0] |= (char)codepoint;
-}
-
-
-static int
-check_utf8_char(const char *s, size_t *lenp, size_t size)
-{
-	size_t i;
-	*lenp = 0;
-	if (!size) {
-		return 0;
-	} else if ((*s & 0x80) == 0) {
-		*lenp = 1;
-		return 1;
-	} else if ((*s & 0xE0) == 0xC0) {
-		*lenp = 2;
-	} else if ((*s & 0xF0) == 0xE0) {
-		*lenp = 3;
-	} else if ((*s & 0xF8) == 0xF0) {
-		*lenp = 4;
-	} else if ((*s & 0xFC) == 0xF8) {
-		*lenp = 5;
-	} else if ((*s & 0xFE) == 0xFC) {
-		*lenp = 6;
-	} else {
-		*lenp = 0;
-		return -1;
-	}
-	for (i = 1; i < *lenp; i++) {
-		if (i == size)
-			return 0;
-		if ((s[i] & 0xC0) != 0x80)
-			return -1;
-	}
-	return 1;
-}
-
-
-static unsigned long long int
-utf8_decode(const char *s, size_t *ip)
-{
-	unsigned long long int cp = 0;
-	size_t len;
-
-	if ((s[*ip] & 0x80) == 0) {
-		return (unsigned long long int)s[(*ip)++];
-	} else if ((s[*ip] & 0xE0) == 0xC0) {
-		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U);
-		len = 2;
-		goto need_1;
-	} else if ((s[*ip] & 0xF0) == 0xE0) {
-		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U);
-		len = 3;
-		goto need_2;
-	} else if ((s[*ip] & 0xF8) == 0xF0) {
-		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U);
-		len = 4;
-		goto need_3;
-	} else if ((s[*ip] & 0xFC) == 0xF8) {
-		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U);
-		len = 5;
-		goto need_4;
-	} else if ((s[*ip] & 0xFE) == 0xFC) {
-		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU);
-		len = 6;
-		goto need_5;
-	}
-
-need_5:
-	if ((s[*ip] & 0xC0) != 0x80) return 0;
-	cp <<= 6;
-	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_4:
-	if ((s[*ip] & 0xC0) != 0x80) return 0;
-	cp <<= 6;
-	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_3:
-	if ((s[*ip] & 0xC0) != 0x80) return 0;
-	cp <<= 6;
-	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_2:
-	if ((s[*ip] & 0xC0) != 0x80) return 0;
-	cp <<= 6;
-	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-need_1:
-	if ((s[*ip] & 0xC0) != 0x80) return 0;
-	cp <<= 6;
-	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
-
-	/* Let's ignore the 0x10FFFF upper bound. */
-
-	if (cp < 1ULL << (7 + 0 * 6))
-		return 0;
-	if (cp < 1ULL << (5 + 1 * 6))
-		return len > 2 ? 0ULL : cp;
-	if (cp < 1ULL << (4 + 2 * 6))
-		return len > 3 ? 0ULL : cp;
-	if (cp < 1ULL << (3 + 3 * 6))
-		return len > 4 ? 0ULL : cp;
-	if (cp < 1ULL << (2 + 4 * 6))
-		return len > 5 ? 0ULL : cp;
-	if (cp < 1ULL << (1 + 5 * 6))
-		return len > 6 ? 0ULL : cp;
-
-	return 0;
-}
-
-
-static void
 parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
 {
 	unsigned long long int *nums, numsbuf[6];
@@ -348,9 +205,9 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
 					ctx->mouse_tracking = 0;
 					nums = numsbuf;
 					pos = ctx->stored_tail;
-					if ((nums[0] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 ||
-					    (nums[1] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32 ||
-					    (nums[2] = utf8_decode(ctx->stored, &ctx->stored_tail)) < 32) {
+					if ((nums[0] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 ||
+					    (nums[1] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32 ||
+					    (nums[2] = libterminput_utf8_decode__(ctx->stored, &ctx->stored_tail)) < 32) {
 						ctx->stored_tail = pos;
 						input->keypress.key = LIBTERMINPUT_MACRO;
 						return;
@@ -471,7 +328,7 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
 					input->type = LIBTERMINPUT_NONE;
 					break;
 				}
-				encode_utf8(nums[0], input->keypress.symbol);
+				libterminput_encode_utf8__(nums[0], input->keypress.symbol);
 				input->keypress.times = 1;
 				break;
 			case '$':
@@ -612,112 +469,6 @@ parse_sequence(union libterminput_input *input, struct libterminput_state *ctx)
 }
 
 
-static int
-read_bracketed_paste(int fd, union libterminput_input *input, struct libterminput_state *ctx)
-{
-	ssize_t r;
-	size_t n;
-
-	/* Unfortunately there is no standard for how to handle pasted ESC's,
-	 * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as
-	 * is, so we cannot do anything about them, however, a good terminal
-	 * would stop the paste at the ~ in ESC [201~, send ~ as normal, and
-	 * then continue the brackated paste mode. */
-
-	if (ctx->stored_head - ctx->stored_tail) {
-		ctx->paused = 0;
-		n = ctx->stored_head - ctx->stored_tail;
-		if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6 ? n : 6)) {
-			if (n >= 6) {
-				ctx->stored_tail += 6;
-				if (ctx->stored_tail == ctx->stored_head)
-					ctx->stored_tail = ctx->stored_head = 0;
-				ctx->bracketed_paste = 0;
-				input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
-				return 1;
-			}
-			input->text.nbytes = ctx->stored_head - ctx->stored_tail;
-			memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
-			r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes);
-			if (r <= 0)
-				return (int)r;
-			input->text.nbytes += (size_t)r;
-			ctx->stored_head = ctx->stored_tail = 0;
-			goto normal;
-		}
-		input->text.nbytes = ctx->stored_head - ctx->stored_tail;
-		memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
-		ctx->stored_head = ctx->stored_tail = 0;
-		goto normal;
-	}
-
-	r = read(fd, input->text.bytes, sizeof(input->text.bytes));
-	if (r <= 0)
-		return (int)r;
-	input->text.nbytes = (size_t)r;
-
-normal:
-	for (n = 0; n + 5 < input->text.nbytes; n++) {
-		if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
-		    input->text.bytes[n + 3] == '0'    && input->text.bytes[n + 4] == '1' && input->text.bytes[n + 5] == '~')
-			break;
-	}
-	do {
-		if (n + 4 < input->text.nbytes) {
-			if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
-			    input->text.bytes[n + 3] == '0'    && input->text.bytes[n + 4] == '1')
-				break;
-			n += 1;
-		}
-		if (n + 3 < input->text.nbytes) {
-			if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2' &&
-			    input->text.bytes[n + 3] == '0')
-				break;
-			n += 1;
-		}
-		if (n + 2 < input->text.nbytes) {
-			if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[' && input->text.bytes[n + 2] == '2')
-				break;
-			n += 1;
-		}
-		if (n + 1 < input->text.nbytes) {
-			if (input->text.bytes[n + 0] == '\033' && input->text.bytes[n + 1] == '[')
-				break;
-			n += 1;
-		}
-		if (n + 0 < input->text.nbytes) {
-			if (input->text.bytes[n + 0] == '\033')
-				break;
-			n += 1;
-		}
-	} while (0);
-	if (!n) {
-		if (input->text.nbytes < 6) {
-			input->text.type = LIBTERMINPUT_NONE;
-			memcpy(ctx->stored, input->text.bytes, input->text.nbytes);
-			ctx->stored_tail = 0;
-			ctx->stored_head = input->text.nbytes;
-			ctx->paused = 1;
-			return 1;
-		}
-		ctx->stored_tail = 0;
-		ctx->stored_head = input->text.nbytes - 6;
-		memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head);
-		if (ctx->stored_tail == ctx->stored_head)
-			ctx->stored_tail = ctx->stored_head = 0;
-		ctx->bracketed_paste = 0;
-		input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
-		return 1;
-	}
-	ctx->stored_tail = 0;
-	ctx->stored_head = input->text.nbytes - n;
-	memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head);
-	input->text.nbytes = n;
-	input->text.type = LIBTERMINPUT_TEXT;
-	return 1;
-}
-
-
 int
 libterminput_read(int fd, union libterminput_input *input, struct libterminput_state *ctx)
 {
@@ -736,7 +487,7 @@ libterminput_read(int fd, union libterminput_input *input, struct libterminput_s
 	}
 
 	if (ctx->bracketed_paste)
-		return read_bracketed_paste(fd, input, ctx);
+		return libterminput_read_bracketed_paste__(fd, input, ctx);
 	if (!ctx->mouse_tracking) {
 		r = read_input(fd, &ret, ctx);
 		if (r <= 0)
@@ -815,15 +566,15 @@ again:
 				return 1;
 			}
 			n = ctx->stored_tail;
-			r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+			r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
 			if (r <= 0)
 				goto fallback_to_none_or_macro;
 			n += m;
-			r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+			r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
 			if (r <= 0)
 				goto fallback_to_none_or_macro;
 			n += m;
-			r = check_utf8_char(&ctx->stored[n], &m, ctx->stored_head - n);
+			r = libterminput_check_utf8_char__(&ctx->stored[n], ctx->stored_head - n, &m);
 			if (r <= 0) {
 			fallback_to_none_or_macro:
 				if (!r) {
@@ -900,23 +651,3 @@ again:
 
 	return 1;
 }
-
-
-int
-libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
-{
-	ctx->flags |= flags;
-	return 0;
-}
-
-
-int
-libterminput_clear_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
-{
-	ctx->flags |= flags;
-	ctx->flags ^= flags;
-	return 0;
-}
-
-
-extern inline int libterminput_is_ready(const union libterminput_input *input, const struct libterminput_state *ctx);
diff --git a/libterminput_read_bracketed_paste__.c b/libterminput_read_bracketed_paste__.c
new file mode 100644
index 0000000..0c7a81f
--- /dev/null
+++ b/libterminput_read_bracketed_paste__.c
@@ -0,0 +1,108 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_read_bracketed_paste__(int fd, union libterminput_input *input, struct libterminput_state *ctx)
+{
+	ssize_t r;
+	size_t n;
+	size_t i;
+
+	/* Unfortunately there is no standard for how to handle pasted ESC's,
+	 * not even ESC [201~ or ESC ESC. Terminates seem to just paste ESC as
+	 * is, so we cannot do anything about them, however, a good terminal
+	 * would stop the paste at the ~ in ESC [201~, send ~ as normal, and
+	 * then continue the brackated paste mode. */
+
+	/* Check for bracketed paste end marker to output LIBTERMINPUT_BRACKETED_PASTE_END
+	 * and stop, and read more if we don't have it; the marker will be at the
+	 * beginning as the function will stop when it encounteres it and output the
+	 * text pasted before it */
+	if (ctx->stored_head - ctx->stored_tail) {
+		/* If we have input buffered, unpause and handle it */
+		ctx->paused = 0;
+		n = ctx->stored_head - ctx->stored_tail;
+		if (!strncmp(&ctx->stored[ctx->stored_tail], "\033[201~", n < 6U ? n : 6U)) {
+			/* If starting with bracketed paste end marker, output LIBTERMINPUT_BRACKETED_PASTE_END, */
+			if (n >= 6U) {
+				ctx->stored_tail += 6U;
+				if (ctx->stored_tail == ctx->stored_head)
+					ctx->stored_tail = ctx->stored_head = 0;
+				ctx->bracketed_paste = 0;
+				input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
+				return 1;
+			}
+			/* otherwise, but if the buffered input is a truncating of the marker,
+			 * move over the data from the stored input buffer to the input buffer
+			 * and store continue reading input */
+			input->text.nbytes = ctx->stored_head - ctx->stored_tail;
+			memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
+			r = read(fd, &input->text.bytes[input->text.nbytes], sizeof(input->text.bytes) - input->text.nbytes);
+			if (r <= 0)
+				return (int)r;
+			input->text.nbytes += (size_t)r;
+			ctx->stored_head = ctx->stored_tail = 0;
+		} else {
+			/* If the buffered input does not begin with the bracketed paste end marker,
+			 * or a truncation of it, move over the data from the stored input buffer
+			 * to the input buffer */
+			input->text.nbytes = ctx->stored_head - ctx->stored_tail;
+			memcpy(input->text.bytes, &ctx->stored[ctx->stored_tail], input->text.nbytes);
+			ctx->stored_head = ctx->stored_tail = 0;
+		}
+	} else {
+		/* If we don't have any input buffered, read some */
+		r = read(fd, input->text.bytes, sizeof(input->text.bytes));
+		if (r <= 0)
+			return (int)r;
+		input->text.nbytes = (size_t)r;
+	}
+
+	/* Count the number of bytes available before a bracketed paste end
+	 * marker, or a truncation of it at the end of the input buffer */
+	for (n = 0; n + 5U < input->text.nbytes; n++) {
+		if (!strncmp(&input->text.bytes[n], "\033[201~", 6U))
+			break;
+	}
+	for (i = 5U; i--;) {
+		if (n + i < input->text.nbytes) {
+			if (!strncmp(&input->text.bytes[n], "\033[201~", i + 1U))
+				break;
+			n += 1;
+		}
+	}
+
+	/* Of there was pasted input, output it */
+	if (n) {
+		ctx->stored_tail = 0;
+		ctx->stored_head = input->text.nbytes - n;
+		memcpy(ctx->stored, &input->text.bytes[n], ctx->stored_head);
+		input->text.nbytes = n;
+		input->text.type = LIBTERMINPUT_TEXT;
+		return 1;
+	}
+
+	/* If the input is solely a truncation of the bracketed paste
+	 * end marker, output that we do not have any complete input */
+	if (input->text.nbytes < 6U) {
+		input->text.type = LIBTERMINPUT_NONE;
+		memcpy(ctx->stored, input->text.bytes, input->text.nbytes);
+		ctx->stored_tail = 0;
+		ctx->stored_head = input->text.nbytes;
+		ctx->paused = 1;
+		return 1;
+	}
+
+	/* If the input starts with a bracketed paste end marker,
+	 * output it and store the rest of the input buffer for
+	 * later processing */
+	ctx->stored_tail = 0;
+	ctx->stored_head = input->text.nbytes - 6U;
+	memcpy(ctx->stored, &input->text.bytes[6], ctx->stored_head);
+	if (ctx->stored_tail == ctx->stored_head)
+		ctx->stored_tail = ctx->stored_head = 0;
+	ctx->bracketed_paste = 0;
+	input->type = LIBTERMINPUT_BRACKETED_PASTE_END;
+	return 1;
+}
diff --git a/libterminput_set_flags.c b/libterminput_set_flags.c
new file mode 100644
index 0000000..508d0d6
--- /dev/null
+++ b/libterminput_set_flags.c
@@ -0,0 +1,10 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+libterminput_set_flags(struct libterminput_state *ctx, enum libterminput_flags flags)
+{
+	ctx->flags |= flags;
+	return 0;
+}
diff --git a/libterminput_utf8_decode__.c b/libterminput_utf8_decode__.c
new file mode 100644
index 0000000..e4d0e75
--- /dev/null
+++ b/libterminput_utf8_decode__.c
@@ -0,0 +1,79 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+unsigned long long int
+libterminput_utf8_decode__(const char *s, size_t *ip)
+{
+	unsigned long long int cp = 0;
+	size_t len;
+
+	/* Parse the first byte, to get the highest codepoint bits and the encoding length */
+	if ((s[*ip] & 0x80) == 0) {
+		return (unsigned long long int)s[(*ip)++];
+	} else if ((s[*ip] & 0xE0) == 0xC0) {
+		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U);
+		len = 2U;
+		goto need_1;
+	} else if ((s[*ip] & 0xF0) == 0xE0) {
+		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U);
+		len = 3U;
+		goto need_2;
+	} else if ((s[*ip] & 0xF8) == 0xF0) {
+		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U);
+		len = 4U;
+		goto need_3;
+	} else if ((s[*ip] & 0xFC) == 0xF8) {
+		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U);
+		len = 5U;
+		goto need_4;
+	} else if ((s[*ip] & 0xFE) == 0xFC) {
+		cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU);
+		len = 6U;
+		goto need_5;
+	}
+
+	/* Parse continuation bytes; check marked as continuation the get codepoint bits */
+need_5:
+	if ((s[*ip] & 0xC0) != 0x80) return 0;
+	cp <<= 6;
+	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_4:
+	if ((s[*ip] & 0xC0) != 0x80) return 0;
+	cp <<= 6;
+	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_3:
+	if ((s[*ip] & 0xC0) != 0x80) return 0;
+	cp <<= 6;
+	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_2:
+	if ((s[*ip] & 0xC0) != 0x80) return 0;
+	cp <<= 6;
+	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+need_1:
+	if ((s[*ip] & 0xC0) != 0x80) return 0;
+	cp <<= 6;
+	cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
+
+	/* Check that encoded codepoint is encoded with the minimum possible length */
+	if (cp < 1ULL << (7 + 0 * 6))
+		return 0;
+	if (cp < 1ULL << (5 + 1 * 6))
+		return len > 2U ? 0ULL : cp;
+	if (cp < 1ULL << (4 + 2 * 6))
+		return len > 3U ? 0ULL : cp;
+	if (cp < 1ULL << (3 + 3 * 6))
+		return len > 4U ? 0ULL : cp;
+	if (cp < 1ULL << (2 + 4 * 6))
+		return len > 5U ? 0ULL : cp;
+	if (cp < 1ULL << (1 + 5 * 6))
+		return len > 6U ? 0ULL : cp;
+
+	/* (Let's ignore the 0x10FFFF upper bound.) */
+
+	return 0;
+}
author	Mattias Andrée <m@maandree.se>	2025-02-16 14:23:16 +0100
committer	Mattias Andrée <m@maandree.se>	2025-02-16 14:23:16 +0100
commit	16e00dd5f26ce342e9562bec08f529d98c23c01c (patch)
tree	a8389795475d59653930322e10e9f07025ff1e14
parent	interactive-test: add TEST_LIBTERMINPUT_PRINT_STATE (diff)
download	libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.gz libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.bz2 libterminput-16e00dd5f26ce342e9562bec08f529d98c23c01c.tar.xz