1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
/* See LICENSE file for copyright and license details. */
#include "common.h"
void
libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7])
{
static const char masks[6] = {
(char)0x00, /* 1 byte = 0 high set bits, */
(char)0xC0, /* 2 bytes = 2 high set bits, */
(char)0xE0, /* 3 bytes = 3 high set bits, ... */
(char)0xF0,
(char)0xF8,
(char)0xFC /* 6 bytes = 3 high set bits */
};
static const unsigned long long int limits[6] = {
1ULL << (7 + 0 * 6), /* 1 byte has room for 7 codepoint encoding bits, */
1ULL << (5 + 1 * 6), /* 2 bytes has room for 5 bits in the first by and 6 bits the rest, */
1ULL << (4 + 2 * 6), /* 3 bytes has room for 4 bits in the first by and 6 bits the rest, ... */
1ULL << (3 + 3 * 6),
1ULL << (2 + 4 * 6),
1ULL << (1 + 5 * 6) /* 6 bytes has room for 1 bits in the first by and 6 bits the rest */
};
size_t len;
/* Get encoding length for codepoint */
for (len = 0; codepoint >= limits[len]; len++);
/* Set the `len` (but 0 if 1) high bits in the first byte
* to encode the encoding length of the codepoint */
buffer[0] = masks[len];
/* NUL terminate the encoding buffer,
* to mark the encode of the encoding */
buffer[++len] = '\0';
/* Encode the bites representing the code point
* and the length continuation marker bits in
* the non-first bytes */
for (; --len; codepoint >>= 6)
buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
buffer[0] |= (char)codepoint;
}
|