diff options
Diffstat (limited to 'libparsepsf.c')
-rw-r--r-- | libparsepsf.c | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/libparsepsf.c b/libparsepsf.c new file mode 100644 index 0000000..7c7b655 --- /dev/null +++ b/libparsepsf.c @@ -0,0 +1,470 @@ +/* See LICENSE file for copyright and license details. */ +#include "libparsepsf.h" + +#include <errno.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <grapheme.h> + + +struct psf1_header { + uint8_t magic_x36; + uint8_t magic_x04; + uint8_t mode; +#define PSF1_MODE512 0x01 +#define PSF1_MODEHASTAB 0x02 +/* #define PSF1_MODEHASSEQ 0x04 // really used */ + uint8_t height; +}; +#define PSF1_SEPARATOR 0xFFFF +#define PSF1_STARTSEQ 0xFFFE + +struct psf2_header { + uint8_t magic_x72; + uint8_t magic_xb5; + uint8_t magic_x4a; + uint8_t magic_x86; + uint32_t version; +#define PSF2_MAXVERSION 0 + uint32_t header_size; + uint32_t flags; +#define PSF2_HAS_UNICODE_TABLE 0x01 + uint32_t num_glyphs; + uint32_t charsize; /* = height * ((width + 7) / 8) */ + uint32_t height; + uint32_t width; +}; +#define PSF2_SEPARATOR 0xFF +#define PSF2_STARTSEQ 0xFE + + +static void +free_map(struct libparsepsf_unimap *node) +{ + size_t i; + for (i = 0; i < sizeof(node->nonterminal) / sizeof(*node->nonterminal); i++) + if (node->nonterminal[i]) + free_map(node->nonterminal[i]); + free(node); +} + +void +libparsepsf_destroy_font(struct libparsepsf_font *font) +{ + free(font->glyph_data); + font->glyph_data = NULL; + if (font->map) { + free_map(font->map); + font->map = NULL; + } +} + + +static uint16_t +letoh16(const uint8_t *le) +{ + uint16_t b0 = (uint16_t)((uint16_t)le[0] << 0); + uint16_t b1 = (uint16_t)((uint16_t)le[1] << 8); + return (uint16_t)(b0 | b1); +} + + +static uint32_t +letoh32(uint32_t le) +{ + union { + uint32_t v; + uint8_t b[4]; + } u = {.v = le}; + uint32_t b0 = (uint32_t)((uint32_t)u.b[0] << 0); + uint32_t b1 = (uint32_t)((uint32_t)u.b[1] << 8); + uint32_t b2 = (uint32_t)((uint32_t)u.b[2] << 16); + uint32_t b3 = (uint32_t)((uint32_t)u.b[3] << 24); + return (uint32_t)(b0 | b1 | b2 | b3); +} + + +static uint32_t +desurrogate(uint16_t high, uint16_t low) +{ + /* high surrogate has lower value */ + uint32_t h = UINT32_C(0xD800) ^ (uint32_t)high; + uint32_t l = UINT32_C(0xDC00) ^ (uint32_t)low; + h <<= 10; + return (uint32_t)(h | l); +} + +static int +put_map_incomplete(struct libparsepsf_font *font, const uint8_t *seq, size_t seqlen, + uint8_t *savedp, struct libparsepsf_unimap **nodep) +{ + size_t i; + if (font->map == NULL) { + font->map = calloc(1, sizeof(*font->map)); + if (!font->map) + goto enomem; + } + if (!seqlen) + goto ebfont; + *nodep = font->map; + *savedp = seq[--seqlen]; + for (i = 0; i < seqlen; i++) { + if (!(*nodep)->nonterminal[seq[i]]) { + (*nodep)->nonterminal[seq[i]] = calloc(1, sizeof(*font->map)); + if (!(*nodep)->nonterminal[seq[i]]) + goto enomem; + } + *nodep = (*nodep)->nonterminal[seq[i]]; + } + + return 0; + +ebfont: + errno = EBFONT; + return -1; +enomem: + errno = ENOMEM; + return -1; +} + +static int +put_map_finalise(size_t index, uint8_t saved, struct libparsepsf_unimap *node) +{ + /* unfortunately this actually happens in the real world */ +#if 0 + if (node->terminal[saved]) { + return 0; +# if 0 + errno = EBFONT; + return -1; +# endif + } +#endif + + node->terminal[saved] = index + 1; + return 0; +} + +static int +put_map(struct libparsepsf_font *font, size_t index, const uint8_t *seq, size_t seqlen) +{ + uint8_t saved = 0xFF; + struct libparsepsf_unimap *node = NULL; + if (put_map_incomplete(font, seq, seqlen, &saved, &node)) + return -1; + return put_map_finalise(index, saved, node); +} + +static int +decode_utf8(const uint8_t *data, size_t size, size_t *np, uint32_t *cpp) +{ + uint8_t head; + uint32_t cp; + + head = *data; + *np = 1; + if (!(head & 0x80)) { + if (cpp) + *cpp = (uint32_t)head; + return 0; + } else if (!(head & 0x40)) { + return -1; + } + size--; + cp = (uint32_t)head; + head <<= 1; + + while (head & 0x80) { + head <<= 1; + if ((data[*np] & 0xC0) != 0x80) + return -1; + cp <<= 6; + cp |= (uint32_t)(data[*np] ^ 0x80); + *np += 1; + } + if (*np > 4) + return -1; + + cp &= (UINT32_C(1) << (*np * 5 + 1)) - 1; + if ((cp & UINT32_C(0xFFF800)) == UINT32_C(0xD800) || + cp > UINT32_C(0x10FFFF) || + cp < UINT32_C(1) << (*np == 2 ? 7 : *np * 5 - 4)) + return -1; + + if (cpp) + *cpp = cp; + return 0; +} + +int +libparsepsf_parse_font(const void *data, size_t size, struct libparsepsf_font *fontp, uint32_t *unrecognised_versionp) +{ + union { + struct psf1_header psf1; + struct psf2_header psf2; + } header; + const uint8_t *udata = data; + size_t glyphs_offset; + size_t charsize; + size_t off; + size_t i; + size_t n; + uint32_t u32; + uint16_t u16, u16b; + uint8_t u8, utf8[4], utf8_saved; + struct libparsepsf_unimap *utf8_node; + + *unrecognised_versionp = 0; + fontp->glyph_data = NULL; + fontp->map = NULL; + + if (size < 4) + goto ebfont; + + if (udata[0] == 0x36) { /* TODO untested */ + if (size < sizeof(header.psf1)) + goto ebfont; + memcpy(&header.psf1, udata, sizeof(header.psf1)); + if (header.psf1.magic_x36 != 0x36 || + header.psf1.magic_x04 != 0x04) + goto ebfont; + fontp->num_glyphs = (header.psf1.mode & PSF1_MODE512) ? 512 : 256; + fontp->height = (size_t)header.psf1.height; + fontp->width = 8; + glyphs_offset = sizeof(header.psf1); + charsize = fontp->height; + if (glyphs_offset > size || + !fontp->num_glyphs || + charsize > (size - glyphs_offset) / fontp->num_glyphs) + goto ebfont; + if (header.psf1.mode & PSF1_MODEHASTAB) { + off = glyphs_offset + fontp->num_glyphs * charsize; + for (i = 0; i < fontp->num_glyphs; i++) { + for (;;) { + if (off + 2 > size) + goto ebfont; + u16 = letoh16(&udata[off]); + off += 2; + if (u16 == PSF1_STARTSEQ) { + break; + } else if (u16 == PSF1_SEPARATOR) { + goto next_char_psf1; + } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) { + if (off + 2 > size) + goto ebfont; + u16b = letoh16(&udata[off]); + off += 2; + if (((u16 ^ u16b) & 0xDC00) != 0x0400) + goto ebfont; + u32 = desurrogate(u16 < u16b ? u16 : u16b, + u16 < u16b ? u16b : u16); + } else { + u32 = (uint32_t)u16; + } + n = grapheme_cp_encode(u32, utf8, sizeof(utf8)); + if (n > sizeof(utf8)) + abort(); + if (put_map(fontp, i, utf8, n)) + goto fail; + } + utf8_saved = 0xFF; + utf8_node = NULL; + for (;;) { + if (off + 2 > size) + goto ebfont; + u16 = letoh16(&udata[off]); + off += 2; + if (u16 == PSF1_STARTSEQ || u16 == PSF1_SEPARATOR) { + if (put_map_finalise(i, utf8_saved, utf8_node)) + goto fail; + if (u16 == PSF1_SEPARATOR) + goto next_char_psf1; + utf8_saved = 0xFF; + utf8_node = NULL; + continue; + } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) { + if (off + 2 > size) + goto ebfont; + u16b = letoh16(&udata[off]); + off += 2; + if (((u16 ^ u16b) & 0xDC00) != 0x0400) + goto ebfont; + u32 = desurrogate(u16 > u16b ? u16 : u16b, + u16 > u16b ? u16b : u16); + } else { + u32 = (uint32_t)u16; + } + n = grapheme_cp_encode(u32, utf8, sizeof(utf8)); + if (n > sizeof(utf8)) + abort(); + if (put_map_incomplete(fontp, utf8, n, &utf8_saved, &utf8_node)) + goto fail; + } + next_char_psf1:; + } + } + + } else { + if (size < sizeof(header.psf2)) + goto ebfont; + memcpy(&header.psf2, udata, sizeof(header.psf2)); + if (header.psf2.magic_x72 != 0x72 || + header.psf2.magic_xb5 != 0xb5 || + header.psf2.magic_x4a != 0x4a || + header.psf2.magic_x86 != 0x86) + goto ebfont; + header.psf2.version = letoh32(header.psf2.version); + header.psf2.header_size = letoh32(header.psf2.header_size); + header.psf2.flags = letoh32(header.psf2.flags); + header.psf2.num_glyphs = letoh32(header.psf2.num_glyphs); + header.psf2.charsize = letoh32(header.psf2.charsize); + header.psf2.height = letoh32(header.psf2.height); + header.psf2.width = letoh32(header.psf2.width); + if (header.psf2.height * ((header.psf2.width + 7) / 8) != header.psf2.charsize) + goto ebfont; + if (header.psf2.version > PSF2_MAXVERSION) + *unrecognised_versionp = 0; + fontp->num_glyphs = (size_t)header.psf2.num_glyphs; + fontp->height = (size_t)header.psf2.height; + fontp->width = (size_t)header.psf2.width; + glyphs_offset = (size_t)header.psf2.header_size; + charsize = (size_t)header.psf2.charsize; + if (glyphs_offset > size || + !fontp->num_glyphs || + charsize > (size - glyphs_offset) / fontp->num_glyphs) + goto ebfont; + if (header.psf2.flags & PSF2_HAS_UNICODE_TABLE) { + off = glyphs_offset + fontp->num_glyphs * charsize; + for (i = 0; i < fontp->num_glyphs; i++) { + for (;;) { + if (off == size) + goto ebfont; + u8 = udata[off]; + if (u8 == PSF2_STARTSEQ) { + off += 1; + break; + } else if (u8 == PSF2_SEPARATOR) { + off += 1; + goto next_char_psf2; + } + if (decode_utf8(&udata[off], size - off, &n, NULL)) + goto ebfont; + if (put_map(fontp, i, &udata[off], n)) + goto fail; + off += n; + } + utf8_saved = 0xFF; + utf8_node = NULL; + for (;;) { + if (off == size) + goto ebfont; + u8 = udata[off]; + if (u8 == 0xFE || u8 == 0xFF) { + if (put_map_finalise(i, utf8_saved, utf8_node)) + goto fail; + off += 1; + if (u8 == 0xFF) + goto next_char_psf2; + utf8_saved = 0xFF; + utf8_node = NULL; + } else { + if (decode_utf8(&udata[off], size - off, &n, NULL)) + goto ebfont; + if (put_map_incomplete(fontp, &udata[off], n, &utf8_saved, &utf8_node)) + goto fail; + off += n; + } + } + next_char_psf2:; + } + } + } + + if (charsize) { + fontp->glyph_data = malloc(fontp->num_glyphs * charsize); + if (!fontp->glyph_data) + goto enomem; + } + memcpy(fontp->glyph_data, &udata[glyphs_offset], fontp->num_glyphs * charsize); + + return 0; + +enomem: + errno = ENOMEM; + goto fail; +ebfont: + errno = EBFONT; +fail: + libparsepsf_destroy_font(fontp); + return -1; +} + + +size_t +libparsepsf_get_glyph(const struct libparsepsf_font *font, const char *c, size_t *remp, const char **next_cp) +{ + size_t glyph = 0, rem = 0, n; + uint32_t cp; + struct libparsepsf_unimap *node = font->map; + + if (!node) { + if (!remp && !*c) + return 0; + if (decode_utf8((const uint8_t *)c, remp ? *remp : SIZE_MAX, &n, &cp)) { + errno = EILSEQ; + return 0; + } + if (remp) + rem = *remp - n; + c = &c[n]; + glyph = (size_t)cp; + if (glyph >= font->num_glyphs) + return 0; + glyph -= 1; + goto out; + + } else if (remp) { + rem = *remp; + if (!rem) + return 0; + for (; rem > 1; c = &c[1], rem -= 1) { + if (node->terminal[*(const uint8_t *)c]) { + glyph = node->terminal[*(const uint8_t *)c]; + if (next_cp) + *next_cp = &c[1]; + if (remp) + *remp = rem - 1; + } + node = node->nonterminal[*(const uint8_t *)c]; + if (!node) + return glyph; + } + + } else { + if (!c[0]) + return 0; + for (; c[1]; c = &c[1]) { + if (node->terminal[*(const uint8_t *)c]) { + glyph = node->terminal[*(const uint8_t *)c]; + if (next_cp) + *next_cp = &c[1]; + } + node = node->nonterminal[*(const uint8_t *)c]; + if (!node) + return glyph; + } + } + + glyph = node->terminal[*(const uint8_t *)c]; +out: + if (glyph) { + if (next_cp) + *next_cp = &c[1]; + if (remp) + *remp = rem - 1; + } + return glyph; +} |