aboutsummaryrefslogtreecommitdiffstats
path: root/libcharconv_decode_utf8_.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcharconv_decode_utf8_.c')
-rw-r--r--libcharconv_decode_utf8_.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/libcharconv_decode_utf8_.c b/libcharconv_decode_utf8_.c
new file mode 100644
index 0000000..db66040
--- /dev/null
+++ b/libcharconv_decode_utf8_.c
@@ -0,0 +1,48 @@
+/* See LICENSE file for copyright and license details. */
+#include "lib-common.h"
+
+
+size_t
+libcharconv_decode_utf8_(const char *s, size_t slen, uint_least32_t *cp)
+{
+ uint_least32_t min, max;
+ size_t i, n;
+
+ if (slen < 1u)
+ return 0u;
+
+ if (!(s[0] & 0x80)) {
+ *cp = (uint_least32_t)s[0];
+ return 1u;
+ } else if ((s[0] & 0xE0) == 0xC0) {
+ *cp = (uint_least32_t)s[0] & 0x3Fu;
+ n = 2u;
+ min = UINT32_C(0x80);
+ max = UINT32_C(0x800);
+ } else if ((s[0] & 0xF0) == 0xE0) {
+ *cp = (uint_least32_t)s[0] & 0x1Fu;
+ n = 3u;
+ min = UINT32_C(0x800);
+ max = UINT32_C(0x10000);
+ } else if ((s[0] & 0xF8) == 0xF0) {
+ *cp = (uint_least32_t)s[0] & 0x0Fu;
+ n = 4u;
+ min = UINT32_C(0x10000);
+ max = UINT32_C(0x110000);
+ } else {
+ return 0u;
+ }
+
+ if (slen < n)
+ return n;
+
+ for (i = 1u; i < n; i++) {
+ *cp <<= 6;
+ *cp |= (uint_least32_t)s[i] & 0x3Fu;
+ }
+
+ if (min > *cp || *cp >= max)
+ return 0u;
+
+ return n;
+}