blob: e4d0e750a8d73634f6b9247ee4729de1c1d172b9 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
/* See LICENSE file for copyright and license details. */
#include "common.h"
unsigned long long int
libterminput_utf8_decode__(const char *s, size_t *ip)
{
unsigned long long int cp = 0;
size_t len;
/* Parse the first byte, to get the highest codepoint bits and the encoding length */
if ((s[*ip] & 0x80) == 0) {
return (unsigned long long int)s[(*ip)++];
} else if ((s[*ip] & 0xE0) == 0xC0) {
cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xC0U);
len = 2U;
goto need_1;
} else if ((s[*ip] & 0xF0) == 0xE0) {
cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xE0U);
len = 3U;
goto need_2;
} else if ((s[*ip] & 0xF8) == 0xF0) {
cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF0U);
len = 4U;
goto need_3;
} else if ((s[*ip] & 0xFC) == 0xF8) {
cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xF8U);
len = 5U;
goto need_4;
} else if ((s[*ip] & 0xFE) == 0xFC) {
cp = (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0xFCU);
len = 6U;
goto need_5;
}
/* Parse continuation bytes; check marked as continuation the get codepoint bits */
need_5:
if ((s[*ip] & 0xC0) != 0x80) return 0;
cp <<= 6;
cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
need_4:
if ((s[*ip] & 0xC0) != 0x80) return 0;
cp <<= 6;
cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
need_3:
if ((s[*ip] & 0xC0) != 0x80) return 0;
cp <<= 6;
cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
need_2:
if ((s[*ip] & 0xC0) != 0x80) return 0;
cp <<= 6;
cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
need_1:
if ((s[*ip] & 0xC0) != 0x80) return 0;
cp <<= 6;
cp |= (unsigned long long int)((unsigned char)s[(*ip)++] ^ 0x80U);
/* Check that encoded codepoint is encoded with the minimum possible length */
if (cp < 1ULL << (7 + 0 * 6))
return 0;
if (cp < 1ULL << (5 + 1 * 6))
return len > 2U ? 0ULL : cp;
if (cp < 1ULL << (4 + 2 * 6))
return len > 3U ? 0ULL : cp;
if (cp < 1ULL << (3 + 3 * 6))
return len > 4U ? 0ULL : cp;
if (cp < 1ULL << (2 + 4 * 6))
return len > 5U ? 0ULL : cp;
if (cp < 1ULL << (1 + 5 * 6))
return len > 6U ? 0ULL : cp;
/* (Let's ignore the 0x10FFFF upper bound.) */
return 0;
}
|