aboutsummaryrefslogtreecommitdiffstats
path: root/libterminput_encode_utf8__.c
blob: 7e83a04a2cead164f3c68f764bc262267d27c5fa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/* See LICENSE file for copyright and license details. */
#include "common.h"


void
libterminput_encode_utf8__(unsigned long long int codepoint, char buffer[7])
{
	static const char masks[6] = {
		(char)0x00, /* 1 byte = 0 high set bits, */
		(char)0xC0, /* 2 bytes = 2 high set bits, */
		(char)0xE0, /* 3 bytes = 3 high set bits, ... */
		(char)0xF0,
		(char)0xF8,
		(char)0xFC  /* 6 bytes = 3 high set bits */
	};
	static const unsigned long long int limits[6] = {
		1ULL << (7 + 0 * 6), /* 1 byte has room for 7 codepoint encoding bits, */
		1ULL << (5 + 1 * 6), /* 2 bytes has room for 5 bits in the first by and 6 bits the rest, */
		1ULL << (4 + 2 * 6), /* 3 bytes has room for 4 bits in the first by and 6 bits the rest, ... */
		1ULL << (3 + 3 * 6),
		1ULL << (2 + 4 * 6),
		1ULL << (1 + 5 * 6)  /* 6 bytes has room for 1 bits in the first by and 6 bits the rest */
	};

	size_t len;

	/* Get encoding length for codepoint */
	for (len = 0; codepoint >= limits[len]; len++);

	/* Set the `len` (but 0 if 1) high bits in the first byte
	 * to encode the encoding length of the codepoint */
	buffer[0] = masks[len];

	/* NUL terminate the encoding buffer,
	 * to mark the encode of the encoding */
	buffer[++len] = '\0';

	/* Encode the bites representing the code point
	 * and the length continuation marker bits in
	 * the non-first bytes */
	for (; --len; codepoint >>= 6)
		buffer[len] = (char)((codepoint & 0x3FULL) | 0x80ULL);
	buffer[0] |= (char)codepoint;
}