/* See LICENSE file for copyright and license details. */ #include "libparsepsf.h" #include #include #include #include #include #include struct psf1_header { uint8_t magic_x36; uint8_t magic_x04; uint8_t mode; #define PSF1_MODE512 0x01 #define PSF1_MODEHASTAB 0x02 /* #define PSF1_MODEHASSEQ 0x04 // really used */ uint8_t height; }; #define PSF1_SEPARATOR 0xFFFF #define PSF1_STARTSEQ 0xFFFE struct psf2_header { uint8_t magic_x72; uint8_t magic_xb5; uint8_t magic_x4a; uint8_t magic_x86; uint32_t version; #define PSF2_MAXVERSION 0 uint32_t header_size; uint32_t flags; #define PSF2_HAS_UNICODE_TABLE 0x01 uint32_t num_glyphs; uint32_t charsize; /* = height * ((width + 7) / 8) */ uint32_t height; uint32_t width; }; #define PSF2_SEPARATOR 0xFF #define PSF2_STARTSEQ 0xFE static void free_map(struct libparsepsf_unimap *node) { size_t i; for (i = 0; i < sizeof(node->nonterminal) / sizeof(*node->nonterminal); i++) if (node->nonterminal[i]) free_map(node->nonterminal[i]); free(node); } void libparsepsf_destroy_font(struct libparsepsf_font *font) { free(font->glyph_data); font->glyph_data = NULL; if (font->map) { free_map(font->map); font->map = NULL; } } static uint16_t letoh16(const uint8_t *le) { uint16_t b0 = (uint16_t)((uint16_t)le[0] << 0); uint16_t b1 = (uint16_t)((uint16_t)le[1] << 8); return (uint16_t)(b0 | b1); } static uint32_t letoh32(uint32_t le) { union { uint32_t v; uint8_t b[4]; } u = {.v = le}; uint32_t b0 = (uint32_t)((uint32_t)u.b[0] << 0); uint32_t b1 = (uint32_t)((uint32_t)u.b[1] << 8); uint32_t b2 = (uint32_t)((uint32_t)u.b[2] << 16); uint32_t b3 = (uint32_t)((uint32_t)u.b[3] << 24); return (uint32_t)(b0 | b1 | b2 | b3); } static uint32_t desurrogate(uint16_t high, uint16_t low) { /* high surrogate has lower value */ uint32_t h = UINT32_C(0xD800) ^ (uint32_t)high; uint32_t l = UINT32_C(0xDC00) ^ (uint32_t)low; h <<= 10; return (uint32_t)(h | l); } static int put_map_incomplete(struct libparsepsf_font *font, const uint8_t *seq, size_t seqlen, uint8_t *savedp, struct libparsepsf_unimap **nodep) { size_t i; if (font->map == NULL) { font->map = calloc(1, sizeof(*font->map)); if (!font->map) goto enomem; } if (!seqlen) goto ebfont; *nodep = font->map; *savedp = seq[--seqlen]; for (i = 0; i < seqlen; i++) { if (!(*nodep)->nonterminal[seq[i]]) { (*nodep)->nonterminal[seq[i]] = calloc(1, sizeof(*font->map)); if (!(*nodep)->nonterminal[seq[i]]) goto enomem; } *nodep = (*nodep)->nonterminal[seq[i]]; } return 0; ebfont: errno = EBFONT; return -1; enomem: errno = ENOMEM; return -1; } static int put_map_finalise(size_t index, uint8_t saved, struct libparsepsf_unimap *node) { /* unfortunately this actually happens in the real world */ #if 0 if (node->terminal[saved]) { return 0; # if 0 errno = EBFONT; return -1; # endif } #endif node->terminal[saved] = index + 1; return 0; } static int put_map(struct libparsepsf_font *font, size_t index, const uint8_t *seq, size_t seqlen) { uint8_t saved = 0xFF; struct libparsepsf_unimap *node = NULL; if (put_map_incomplete(font, seq, seqlen, &saved, &node)) return -1; return put_map_finalise(index, saved, node); } static int decode_utf8(const uint8_t *data, size_t size, size_t *np, uint32_t *cpp) { uint8_t head; uint32_t cp; head = *data; *np = 1; if (!(head & 0x80)) { if (cpp) *cpp = (uint32_t)head; return 0; } else if (!(head & 0x40)) { return -1; } size--; cp = (uint32_t)head; head <<= 1; while (head & 0x80) { head <<= 1; if ((data[*np] & 0xC0) != 0x80) return -1; cp <<= 6; cp |= (uint32_t)(data[*np] ^ 0x80); *np += 1; } if (*np > 4) return -1; cp &= (UINT32_C(1) << (*np * 5 + 1)) - 1; if ((cp & UINT32_C(0xFFF800)) == UINT32_C(0xD800) || cp > UINT32_C(0x10FFFF) || cp < UINT32_C(1) << (*np == 2 ? 7 : *np * 5 - 4)) return -1; if (cpp) *cpp = cp; return 0; } int libparsepsf_parse_font(const void *data, size_t size, struct libparsepsf_font *fontp, uint32_t *unrecognised_versionp) { union { struct psf1_header psf1; struct psf2_header psf2; } header; const uint8_t *udata = data; size_t glyphs_offset; size_t charsize; size_t off; size_t i; size_t n; uint32_t u32; uint16_t u16, u16b; uint8_t u8, utf8[4], utf8_saved; struct libparsepsf_unimap *utf8_node; *unrecognised_versionp = 0; fontp->glyph_data = NULL; fontp->map = NULL; if (size < 4) goto ebfont; if (udata[0] == 0x36) { /* TODO untested */ if (size < sizeof(header.psf1)) goto ebfont; memcpy(&header.psf1, udata, sizeof(header.psf1)); if (header.psf1.magic_x36 != 0x36 || header.psf1.magic_x04 != 0x04) goto ebfont; fontp->num_glyphs = (header.psf1.mode & PSF1_MODE512) ? 512 : 256; fontp->height = (size_t)header.psf1.height; fontp->width = 8; glyphs_offset = sizeof(header.psf1); charsize = fontp->height; if (glyphs_offset > size || !fontp->num_glyphs || charsize > (size - glyphs_offset) / fontp->num_glyphs) goto ebfont; if (header.psf1.mode & PSF1_MODEHASTAB) { off = glyphs_offset + fontp->num_glyphs * charsize; for (i = 0; i < fontp->num_glyphs; i++) { for (;;) { if (off + 2 > size) goto ebfont; u16 = letoh16(&udata[off]); off += 2; if (u16 == PSF1_STARTSEQ) { break; } else if (u16 == PSF1_SEPARATOR) { goto next_char_psf1; } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) { if (off + 2 > size) goto ebfont; u16b = letoh16(&udata[off]); off += 2; if (((u16 ^ u16b) & 0xDC00) != 0x0400) goto ebfont; u32 = desurrogate(u16 < u16b ? u16 : u16b, u16 < u16b ? u16b : u16); } else { u32 = (uint32_t)u16; } n = grapheme_encode_utf8(u32, (char *)utf8, sizeof(utf8)); if (n > sizeof(utf8)) abort(); if (put_map(fontp, i, utf8, n)) goto fail; } utf8_saved = 0xFF; utf8_node = NULL; for (;;) { if (off + 2 > size) goto ebfont; u16 = letoh16(&udata[off]); off += 2; if (u16 == PSF1_STARTSEQ || u16 == PSF1_SEPARATOR) { if (put_map_finalise(i, utf8_saved, utf8_node)) goto fail; if (u16 == PSF1_SEPARATOR) goto next_char_psf1; utf8_saved = 0xFF; utf8_node = NULL; continue; } else if ((u16 & UINT32_C(0xF800)) == UINT32_C(0xD800)) { if (off + 2 > size) goto ebfont; u16b = letoh16(&udata[off]); off += 2; if (((u16 ^ u16b) & 0xDC00) != 0x0400) goto ebfont; u32 = desurrogate(u16 > u16b ? u16 : u16b, u16 > u16b ? u16b : u16); } else { u32 = (uint32_t)u16; } n = grapheme_encode_utf8(u32, (char *)utf8, sizeof(utf8)); if (n > sizeof(utf8)) abort(); if (put_map_incomplete(fontp, utf8, n, &utf8_saved, &utf8_node)) goto fail; } next_char_psf1:; } } } else { if (size < sizeof(header.psf2)) goto ebfont; memcpy(&header.psf2, udata, sizeof(header.psf2)); if (header.psf2.magic_x72 != 0x72 || header.psf2.magic_xb5 != 0xb5 || header.psf2.magic_x4a != 0x4a || header.psf2.magic_x86 != 0x86) goto ebfont; header.psf2.version = letoh32(header.psf2.version); header.psf2.header_size = letoh32(header.psf2.header_size); header.psf2.flags = letoh32(header.psf2.flags); header.psf2.num_glyphs = letoh32(header.psf2.num_glyphs); header.psf2.charsize = letoh32(header.psf2.charsize); header.psf2.height = letoh32(header.psf2.height); header.psf2.width = letoh32(header.psf2.width); if (header.psf2.height * ((header.psf2.width + 7) / 8) != header.psf2.charsize) goto ebfont; if (header.psf2.version > PSF2_MAXVERSION) *unrecognised_versionp = 1; fontp->num_glyphs = (size_t)header.psf2.num_glyphs; fontp->height = (size_t)header.psf2.height; fontp->width = (size_t)header.psf2.width; glyphs_offset = (size_t)header.psf2.header_size; charsize = (size_t)header.psf2.charsize; if (glyphs_offset > size || !fontp->num_glyphs || charsize > (size - glyphs_offset) / fontp->num_glyphs) goto ebfont; if (header.psf2.flags & PSF2_HAS_UNICODE_TABLE) { off = glyphs_offset + fontp->num_glyphs * charsize; for (i = 0; i < fontp->num_glyphs; i++) { for (;;) { if (off == size) goto ebfont; u8 = udata[off]; if (u8 == PSF2_STARTSEQ) { off += 1; break; } else if (u8 == PSF2_SEPARATOR) { off += 1; goto next_char_psf2; } if (decode_utf8(&udata[off], size - off, &n, NULL)) goto ebfont; if (put_map(fontp, i, &udata[off], n)) goto fail; off += n; } utf8_saved = 0xFF; utf8_node = NULL; for (;;) { if (off == size) goto ebfont; u8 = udata[off]; if (u8 == 0xFE || u8 == 0xFF) { if (put_map_finalise(i, utf8_saved, utf8_node)) goto fail; off += 1; if (u8 == 0xFF) goto next_char_psf2; utf8_saved = 0xFF; utf8_node = NULL; } else { if (decode_utf8(&udata[off], size - off, &n, NULL)) goto ebfont; if (put_map_incomplete(fontp, &udata[off], n, &utf8_saved, &utf8_node)) goto fail; off += n; } } next_char_psf2:; } } } if (charsize) { fontp->glyph_data = malloc(fontp->num_glyphs * charsize); if (!fontp->glyph_data) goto enomem; } memcpy(fontp->glyph_data, &udata[glyphs_offset], fontp->num_glyphs * charsize); return 0; enomem: errno = ENOMEM; goto fail; ebfont: errno = EBFONT; fail: libparsepsf_destroy_font(fontp); return -1; } size_t libparsepsf_get_glyph(const struct libparsepsf_font *font, const char *c, size_t *remp, const char **next_cp) { size_t glyph = 0, rem = 0, n; uint32_t cp; struct libparsepsf_unimap *node = font->map; if (!node) { if (!remp && !*c) return 0; if (decode_utf8((const uint8_t *)c, remp ? *remp : SIZE_MAX, &n, &cp)) { errno = EILSEQ; return 0; } glyph = (size_t)cp; if (glyph >= font->num_glyphs) return 0; if (next_cp) *next_cp = &c[n]; if (remp) *remp = *remp - n; return glyph + 1; } else if (remp) { rem = *remp; if (!rem) return 0; for (; rem > 1; c = &c[1], rem -= 1) { if (node->terminal[*(const uint8_t *)c]) { glyph = node->terminal[*(const uint8_t *)c]; if (next_cp) *next_cp = &c[1]; if (remp) *remp = rem - 1; } node = node->nonterminal[*(const uint8_t *)c]; if (!node) return glyph; } } else { if (!c[0]) return 0; for (; c[1]; c = &c[1]) { if (node->terminal[*(const uint8_t *)c]) { glyph = node->terminal[*(const uint8_t *)c]; if (next_cp) *next_cp = &c[1]; } node = node->nonterminal[*(const uint8_t *)c]; if (!node) return glyph; } } glyph = node->terminal[*(const uint8_t *)c]; if (glyph) { if (next_cp) *next_cp = &c[1]; if (remp) *remp = rem - 1; } return glyph; }