aboutsummaryrefslogtreecommitdiffstats
path: root/libparsepsf.c
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2021-08-13 16:57:18 +0200
committerMattias Andrée <maandree@kth.se>2021-08-13 16:57:18 +0200
commit513ee4f15567048b619848edb092cdc4f33fe1c0 (patch)
tree46a58fa34f1bd5307e4e4267f53c8db1f62acfde /libparsepsf.c
downloadlibparsepsf-513ee4f15567048b619848edb092cdc4f33fe1c0.tar.gz
libparsepsf-513ee4f15567048b619848edb092cdc4f33fe1c0.tar.bz2
libparsepsf-513ee4f15567048b619848edb092cdc4f33fe1c0.tar.xz
First commit1.0
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
-rw-r--r--libparsepsf.c470
1 files changed, 470 insertions, 0 deletions
diff --git a/libparsepsf.c b/libparsepsf.c
new file mode 100644
index 0000000..7c7b655
--- /dev/null
+++ b/libparsepsf.c
@@ -0,0 +1,470 @@
+/* See LICENSE file for copyright and license details. */
+#include "libparsepsf.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <grapheme.h>
+
+
+struct psf1_header {
+ uint8_t magic_x36;
+ uint8_t magic_x04;
+ uint8_t mode;
+#define PSF1_MODE512 0x01
+#define PSF1_MODEHASTAB 0x02
+/* #define PSF1_MODEHASSEQ 0x04 // really used */
+ uint8_t height;
+};
+#define PSF1_SEPARATOR 0xFFFF
+#define PSF1_STARTSEQ 0xFFFE
+
+struct psf2_header {
+ uint8_t magic_x72;
+ uint8_t magic_xb5;
+ uint8_t magic_x4a;
+ uint8_t magic_x86;
+ uint32_t version;
+#define PSF2_MAXVERSION 0
+ uint32_t header_size;
+ uint32_t flags;
+#define PSF2_HAS_UNICODE_TABLE 0x01
+ uint32_t num_glyphs;
+ uint32_t charsize; /* = height * ((width + 7) / 8) */
+ uint32_t height;
+ uint32_t width;
+};
+#define PSF2_SEPARATOR 0xFF
+#define PSF2_STARTSEQ 0xFE
+
+
+static void
+free_map(struct libparsepsf_unimap *node)
+{
+ size_t i;
+ for (i = 0; i < sizeof(node->nonterminal) / sizeof(*node->nonterminal); i++)
+ if (node->nonterminal[i])
+ free_map(node->nonterminal[i]);
+ free(node);
+}
+
+void
+libparsepsf_destroy_font(struct libparsepsf_font *font)
+{
+ free(font->glyph_data);
+ font->glyph_data = NULL;
+ if (font->map) {
+ free_map(font->map);
+ font->map = NULL;
+ }
+}
+
+
+static uint16_t
+letoh16(const uint8_t *le)
+{
+ uint16_t b0 = (uint16_t)((uint16_t)le[0] << 0);
+ uint16_t b1 = (uint16_t)((uint16_t)le[1] << 8);
+ return (uint16_t)(b0 | b1);
+}
+
+
+static uint32_t
+letoh32(uint32_t le)
+{
+ union {
+ uint32_t v;
+ uint8_t b[4];
+ } u = {.v = le};
+ uint32_t b0 = (uint32_t)((uint32_t)u.b[0] << 0);
+ uint32_t b1 = (uint32_t)((uint32_t)u.b[1] << 8);
+ uint32_t b2 = (uint32_t)((uint32_t)u.b[2] << 16);
+ uint32_t b3 = (uint32_t)((uint32_t)u.b[3] << 24);
+ return (uint32_t)(b0 | b1 | b2 | b3);
+}
+
+
+static uint32_t
+desurrogate(uint16_t high, uint16_t low)
+{
+ /* high surrogate has lower value */
+ uint32_t h = UINT32_C(0xD800) ^ (uint32_t)high;
+ uint32_t l = UINT32_C(0xDC00) ^ (uint32_t)low;
+ h <<= 10;
+ return (uint32_t)(h | l);
+}
+
+static int
+put_map_incomplete(struct libparsepsf_font *font, const uint8_t *seq, size_t seqlen,
+ uint8_t *savedp, struct libparsepsf_unimap **nodep)
+{
+ size_t i;
+ if (font->map == NULL) {
+ font->map = calloc(1, sizeof(*font->map));
+ if (!font->map)
+ goto enomem;
+ }
+ if (!seqlen)
+ goto ebfont;
+ *nodep = font->map;
+ *savedp = seq[--seqlen];
+ for (i = 0; i < seqlen; i++) {
+ if (!(*nodep)->nonterminal[seq[i]]) {
+ (*nodep)->nonterminal[seq[i]] = calloc(1, sizeof(*font->map));
+ if (!(*nodep)->nonterminal[seq[i]])
+ goto enomem;
+ }
+ *nodep = (*nodep)->nonterminal[seq[i]];
+ }
+
+ return 0;
+
+ebfont:
+ errno = EBFONT;
+ return -1;
+enomem:
+ errno = ENOMEM;
+ return -1;
+}
+
+static int
+put_map_finalise(size_t index, uint8_t saved, struct libparsepsf_unimap *node)
+{
+ /* unfortunately this actually happens in the real world */
+#if 0
+ if (node->terminal[saved]) {
+ return 0;
+# if 0
+ errno = EBFONT;
+ return -1;
+# endif
+ }
+#endif
+
+ node->terminal[saved] = index + 1;
+ return 0;
+}
+
+static int
+put_map(struct libparsepsf_font *font, size_t index, const uint8_t *seq, size_t seqlen)
+{
+ uint8_t saved = 0xFF;
+ struct libparsepsf_unimap *node = NULL;
+ if (put_map_incomplete(font, seq, seqlen, &saved, &node))
+ return -1;
+ return put_map_finalise(index, saved, node);
+}
+
+static int
+decode_utf8(const uint8_t *data, size_t size, size_t *np, uint32_t *cpp)
+{
+ uint8_t head;
+ uint32_t cp;
+
+ head = *data;
+ *np = 1;
+ if (!(head & 0x80)) {
+ if (cpp)
+ *cpp = (uint32_t)head;
+ return 0;
+ } else if (!(head & 0x40)) {
+ return -1;
+ }
+ size--;
+ cp = (uint32_t)head;
+ head <<= 1;
+
+ while (head & 0x80) {
+ head <<= 1;
+ if ((data[*np] & 0xC0) != 0x80)
+ return -1;
+ cp <<= 6;
+ cp |= (uint32_t)(data[*np] ^ 0x80);
+ *np += 1;
+ }
+ if (*np > 4)
+ return -1;
+
+ cp &= (UINT32_C(1) << (*np * 5 + 1)) - 1;
+ if ((cp & UINT32_C(0xFFF800)) == UINT32_C(0xD800) ||
+ cp > UINT32_C(0x10FFFF) ||
+ cp < UINT32_C(1) << (*np == 2 ? 7 : *np * 5 - 4))
+ return -1;
+
+ if (cpp)
+ *cpp = cp;
+ return 0;
+}
+
+int
+libparsepsf_parse_font(const void *data, size_t size, struct libparsepsf_font *fontp, uint32_t *unrecognised_versionp)
+{
+ union {
+ struct psf1_header psf1;
+ struct psf2_header psf2;
+ } header;
+ const uint8_t *udata = data;
+ size_t glyphs_offset;
+ size_t charsize;
+ size_t off;
+ size_t i;
+ size_t n;
+ uint32_t u32;
+ uint16_t u16, u16b;
+ uint8_t u8, utf8[4], utf8_saved;
+ struct libparsepsf_unimap *utf8_node;
+
+ *unrecognised_versionp = 0;
+ fontp->glyph_data = NULL;
+ fontp->map = NULL;
+
+ if (size < 4)
+ goto ebfont;
+
+ if (udata[0] == 0x36) { /* TODO untested */
+ if (size < sizeof(header.psf1))
+ goto ebfont;
+ memcpy(&header.psf1, udata, sizeof(header.psf1));
+ if (header.psf1.magic_x36 != 0x36 ||
+ header.psf1.magic_x04 != 0x04)
+ goto ebfont;
+ fontp->num_glyphs = (header.psf1.mode & PSF1_MODE512) ? 512 : 256;
+ fontp->height = (size_t)header.psf1.height;
+ fontp->width = 8;
+ glyphs_offset = sizeof(header.psf1);
+ charsize = fontp->height;
+ if (glyphs_offset > size ||
+ !fontp->num_glyphs ||
+ charsize > (size - glyphs_offset) / fontp->num_glyphs)
+ goto ebfont;
+ if (header.psf1.mode & PSF1_MODEHASTAB) {
+ off = glyphs_offset + fontp->num_glyphs * charsize;
+ for (i = 0; i < fontp->num_glyphs; i++) {
+ for (;;) {
+ if (off + 2 > size)
+ goto ebfont;
+ u16 = letoh16(&udata[off]);
+ off += 2;
+ if (u16 == PSF1_STARTSEQ) {
+ break;
+ } else if (u16 == PSF1_SEPARATOR) {
+ goto next_char_psf1;
+ } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) {
+ if (off + 2 > size)
+ goto ebfont;
+ u16b = letoh16(&udata[off]);
+ off += 2;
+ if (((u16 ^ u16b) & 0xDC00) != 0x0400)
+ goto ebfont;
+ u32 = desurrogate(u16 < u16b ? u16 : u16b,
+ u16 < u16b ? u16b : u16);
+ } else {
+ u32 = (uint32_t)u16;
+ }
+ n = grapheme_cp_encode(u32, utf8, sizeof(utf8));
+ if (n > sizeof(utf8))
+ abort();
+ if (put_map(fontp, i, utf8, n))
+ goto fail;
+ }
+ utf8_saved = 0xFF;
+ utf8_node = NULL;
+ for (;;) {
+ if (off + 2 > size)
+ goto ebfont;
+ u16 = letoh16(&udata[off]);
+ off += 2;
+ if (u16 == PSF1_STARTSEQ || u16 == PSF1_SEPARATOR) {
+ if (put_map_finalise(i, utf8_saved, utf8_node))
+ goto fail;
+ if (u16 == PSF1_SEPARATOR)
+ goto next_char_psf1;
+ utf8_saved = 0xFF;
+ utf8_node = NULL;
+ continue;
+ } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) {
+ if (off + 2 > size)
+ goto ebfont;
+ u16b = letoh16(&udata[off]);
+ off += 2;
+ if (((u16 ^ u16b) & 0xDC00) != 0x0400)
+ goto ebfont;
+ u32 = desurrogate(u16 > u16b ? u16 : u16b,
+ u16 > u16b ? u16b : u16);
+ } else {
+ u32 = (uint32_t)u16;
+ }
+ n = grapheme_cp_encode(u32, utf8, sizeof(utf8));
+ if (n > sizeof(utf8))
+ abort();
+ if (put_map_incomplete(fontp, utf8, n, &utf8_saved, &utf8_node))
+ goto fail;
+ }
+ next_char_psf1:;
+ }
+ }
+
+ } else {
+ if (size < sizeof(header.psf2))
+ goto ebfont;
+ memcpy(&header.psf2, udata, sizeof(header.psf2));
+ if (header.psf2.magic_x72 != 0x72 ||
+ header.psf2.magic_xb5 != 0xb5 ||
+ header.psf2.magic_x4a != 0x4a ||
+ header.psf2.magic_x86 != 0x86)
+ goto ebfont;
+ header.psf2.version = letoh32(header.psf2.version);
+ header.psf2.header_size = letoh32(header.psf2.header_size);
+ header.psf2.flags = letoh32(header.psf2.flags);
+ header.psf2.num_glyphs = letoh32(header.psf2.num_glyphs);
+ header.psf2.charsize = letoh32(header.psf2.charsize);
+ header.psf2.height = letoh32(header.psf2.height);
+ header.psf2.width = letoh32(header.psf2.width);
+ if (header.psf2.height * ((header.psf2.width + 7) / 8) != header.psf2.charsize)
+ goto ebfont;
+ if (header.psf2.version > PSF2_MAXVERSION)
+ *unrecognised_versionp = 0;
+ fontp->num_glyphs = (size_t)header.psf2.num_glyphs;
+ fontp->height = (size_t)header.psf2.height;
+ fontp->width = (size_t)header.psf2.width;
+ glyphs_offset = (size_t)header.psf2.header_size;
+ charsize = (size_t)header.psf2.charsize;
+ if (glyphs_offset > size ||
+ !fontp->num_glyphs ||
+ charsize > (size - glyphs_offset) / fontp->num_glyphs)
+ goto ebfont;
+ if (header.psf2.flags & PSF2_HAS_UNICODE_TABLE) {
+ off = glyphs_offset + fontp->num_glyphs * charsize;
+ for (i = 0; i < fontp->num_glyphs; i++) {
+ for (;;) {
+ if (off == size)
+ goto ebfont;
+ u8 = udata[off];
+ if (u8 == PSF2_STARTSEQ) {
+ off += 1;
+ break;
+ } else if (u8 == PSF2_SEPARATOR) {
+ off += 1;
+ goto next_char_psf2;
+ }
+ if (decode_utf8(&udata[off], size - off, &n, NULL))
+ goto ebfont;
+ if (put_map(fontp, i, &udata[off], n))
+ goto fail;
+ off += n;
+ }
+ utf8_saved = 0xFF;
+ utf8_node = NULL;
+ for (;;) {
+ if (off == size)
+ goto ebfont;
+ u8 = udata[off];
+ if (u8 == 0xFE || u8 == 0xFF) {
+ if (put_map_finalise(i, utf8_saved, utf8_node))
+ goto fail;
+ off += 1;
+ if (u8 == 0xFF)
+ goto next_char_psf2;
+ utf8_saved = 0xFF;
+ utf8_node = NULL;
+ } else {
+ if (decode_utf8(&udata[off], size - off, &n, NULL))
+ goto ebfont;
+ if (put_map_incomplete(fontp, &udata[off], n, &utf8_saved, &utf8_node))
+ goto fail;
+ off += n;
+ }
+ }
+ next_char_psf2:;
+ }
+ }
+ }
+
+ if (charsize) {
+ fontp->glyph_data = malloc(fontp->num_glyphs * charsize);
+ if (!fontp->glyph_data)
+ goto enomem;
+ }
+ memcpy(fontp->glyph_data, &udata[glyphs_offset], fontp->num_glyphs * charsize);
+
+ return 0;
+
+enomem:
+ errno = ENOMEM;
+ goto fail;
+ebfont:
+ errno = EBFONT;
+fail:
+ libparsepsf_destroy_font(fontp);
+ return -1;
+}
+
+
+size_t
+libparsepsf_get_glyph(const struct libparsepsf_font *font, const char *c, size_t *remp, const char **next_cp)
+{
+ size_t glyph = 0, rem = 0, n;
+ uint32_t cp;
+ struct libparsepsf_unimap *node = font->map;
+
+ if (!node) {
+ if (!remp && !*c)
+ return 0;
+ if (decode_utf8((const uint8_t *)c, remp ? *remp : SIZE_MAX, &n, &cp)) {
+ errno = EILSEQ;
+ return 0;
+ }
+ if (remp)
+ rem = *remp - n;
+ c = &c[n];
+ glyph = (size_t)cp;
+ if (glyph >= font->num_glyphs)
+ return 0;
+ glyph -= 1;
+ goto out;
+
+ } else if (remp) {
+ rem = *remp;
+ if (!rem)
+ return 0;
+ for (; rem > 1; c = &c[1], rem -= 1) {
+ if (node->terminal[*(const uint8_t *)c]) {
+ glyph = node->terminal[*(const uint8_t *)c];
+ if (next_cp)
+ *next_cp = &c[1];
+ if (remp)
+ *remp = rem - 1;
+ }
+ node = node->nonterminal[*(const uint8_t *)c];
+ if (!node)
+ return glyph;
+ }
+
+ } else {
+ if (!c[0])
+ return 0;
+ for (; c[1]; c = &c[1]) {
+ if (node->terminal[*(const uint8_t *)c]) {
+ glyph = node->terminal[*(const uint8_t *)c];
+ if (next_cp)
+ *next_cp = &c[1];
+ }
+ node = node->nonterminal[*(const uint8_t *)c];
+ if (!node)
+ return glyph;
+ }
+ }
+
+ glyph = node->terminal[*(const uint8_t *)c];
+out:
+ if (glyph) {
+ if (next_cp)
+ *next_cp = &c[1];
+ if (remp)
+ *remp = rem - 1;
+ }
+ return glyph;
+}