aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <m@maandree.se>2025-12-21 09:53:48 +0100
committerMattias Andrée <m@maandree.se>2025-12-21 09:53:48 +0100
commit15904ad51325426bcb3b8e87e714584e437417b7 (patch)
treeef41e227769654a5b28b6e55d040200313cc57ea
parentFirst commit (diff)
downloadlibcmap-15904ad51325426bcb3b8e87e714584e437417b7.tar.gz
libcmap-15904ad51325426bcb3b8e87e714584e437417b7.tar.bz2
libcmap-15904ad51325426bcb3b8e87e714584e437417b7.tar.xz
Implement script and block listing
Signed-off-by: Mattias Andrée <m@maandree.se>
-rw-r--r--.gitignore3
-rw-r--r--Blocks.parse.c136
-rw-r--r--LICENSE.UNICODE39
-rw-r--r--Makefile31
-rw-r--r--Scripts.parse.c260
-rw-r--r--config.mk2
-rw-r--r--libcmap.h4
-rw-r--r--libcmap_block_list.c5
-rw-r--r--libcmap_find_in_no_block.c20
-rw-r--r--libcmap_script_list.c5
-rw-r--r--parse-common.c123
11 files changed, 618 insertions, 10 deletions
diff --git a/.gitignore b/.gitignore
index a071ed4..52b97af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,6 @@
*.gcov
*.gcno
*.gcda
+*.txt
+*.txt.c
+*.parse
diff --git a/Blocks.parse.c b/Blocks.parse.c
new file mode 100644
index 0000000..bb61704
--- /dev/null
+++ b/Blocks.parse.c
@@ -0,0 +1,136 @@
+/* See LICENSE file for copyright and license details. */
+#include "parse-common.c"
+
+
+struct block {
+ char *name;
+ unsigned long int low, high;
+};
+
+
+static struct block *blocks = NULL;
+static size_t nblocks = 0;
+
+
+static void
+parse_line(char *text, size_t lineno)
+{
+ unsigned long int low, high;
+ char *name;
+ size_t i;
+
+ errno = 0;
+
+ if (!isxdigit(*text)) {
+ malformat:
+ fprintf(stderr, "%s: line %zu in is malformatted\n", argv0, lineno);
+ exit(1);
+ }
+
+ high = low = strtoul(text, &text, 16);
+ if (errno || low > 0x10FFFFUL)
+ goto malformat;
+ if (text[0] == '.' && text[1] == '.') {
+ if (!isxdigit(text[2]))
+ goto malformat;
+ high = strtoul(&text[2], &text, 16);
+ if (errno || high > 0x10FFFFUL || high < low)
+ goto malformat;
+ }
+ while (isspace(*text))
+ text++;
+
+ if (*text++ != ';')
+ goto malformat;
+
+ while (isspace(*text))
+ text++;
+ name = text;
+ while (*text && *text != ';')
+ text++;
+ for (i = 1U; isspace(text[-i]); i++)
+ text[-i] = '\0';
+
+ if (*text == ';') {
+ static int warned = 0;
+ if (!warned) {
+ warned = 1;
+ fprintf(stderr, "%s: unrecognised column detected in <stdin>\n", argv0);
+ }
+ *text++ = '\0';
+ } else if (*text) {
+ goto malformat;
+ }
+
+ i = nblocks++;
+ blocks = realloc(blocks, nblocks * sizeof(*blocks));
+ if (!blocks) {
+ fprintf(stderr, "%s: realloc %zu: %s\n", argv0, nblocks * sizeof(*blocks), strerror(errno));
+ exit(1);
+ }
+ blocks[i].name = strdup(name);
+ if (!blocks[i].name) {
+ fprintf(stderr, "%s: strdup: %s\n", argv0, strerror(errno));
+ exit(1);
+ }
+ blocks[i].low = low;
+ blocks[i].high = high;
+}
+
+
+static int
+blockcmp_name(const void *av, const void *bv)
+{
+ const struct block *a = av, *b = bv;
+ return strcmp(a->name, b->name);
+}
+
+
+static int
+blockcmp_range(const void *av, const void *bv)
+{
+ const struct block *a = av, *b = bv;
+ return a->low < b->low ? -1 : +1;
+}
+
+
+static int
+output(void)
+{
+ size_t i;
+ int x = 0;
+
+ qsort(blocks, nblocks, sizeof(*blocks), &blockcmp_name);
+
+ x |= printf("static const struct libcmap_block list[] = {\n");
+ for (i = 0; i < nblocks;) {
+ x |= printf("\t{\"%s\", {0x%04lX, 0x%04lX}}", blocks[i].name, blocks[i].low, blocks[i].high);
+ free(blocks[i].name);
+ x |= printf("%s\n", ++i < nblocks ? "," : "");
+ }
+ x |= printf("};\n\n");
+
+ qsort(blocks, nblocks, sizeof(*blocks), &blockcmp_range);
+ if (!nblocks || blocks[0].low)
+ abort();
+ for (i = 1U; i < nblocks; i++)
+ if (blocks[i].low > blocks[i - 1U].high + 1U)
+ break;
+ if (i == nblocks && blocks[i - 1U].high == 0x10FFFFUL) {
+ x |= printf("const struct libcmap_script libcmap_no_block = {\"No Block\", NULL, 0};\n");
+ } else {
+ x |= printf("static const struct libcmap_range No_Block[] = {\n");
+ x |= printf("\t{0x%04lX, 0x%04lX}", blocks[i - 1U].high + 1U, blocks[i].low - 1U);
+ for (i++; i < nblocks; i++)
+ if (blocks[i].low > blocks[i - 1U].high + 1)
+ x |= printf(",\n\t{0x%04lX, 0x%04lX}", blocks[i - 1U].high + 1U, blocks[i].low - 1U);
+ if (blocks[i - 1U].high < 0x10FFFFUL)
+ x |= printf(",\n\t{0x%04lX, 0x10FFFFUL}", blocks[i - 1U].high + 1U);
+ x |= printf("\n};\n");
+ x |= printf("const struct libcmap_script libcmap_no_block = ");
+ x |= printf("{\"No Block\", No_Block, sizeof(No_Block) / sizeof(*No_Block)};\n");
+ }
+
+ free(blocks);
+ return x;
+}
diff --git a/LICENSE.UNICODE b/LICENSE.UNICODE
new file mode 100644
index 0000000..c16d7cc
--- /dev/null
+++ b/LICENSE.UNICODE
@@ -0,0 +1,39 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2025 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
diff --git a/Makefile b/Makefile
index e189657..5b9f09b 100644
--- a/Makefile
+++ b/Makefile
@@ -15,10 +15,13 @@ LIB_MINOR = 0
LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR)
LIB_NAME = cmap
+UNICODE_VERSION = 17.0.0
+
OBJ =\
libcmap_block_list.o\
libcmap_script_list.o\
+ libcmap_find_in_no_block.o\
libcmap_find_block.o\
libcmap_find_script.o
@@ -27,10 +30,18 @@ HDR =\
LOBJ = $(OBJ:.o=.lo)
+PARSERS =\
+ Scripts.parse\
+ Blocks.parse
+
all: libcmap.a libcmap.$(LIBEXT)
$(OBJ): $(HDR)
$(LOBJ): $(HDR)
+$(PARSERS:=.o): parse-common.c
+
+libcmap_script_list.o: Scripts.txt.c
+libcmap_block_list.o: Blocks.txt.c
.c.o:
$(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS)
@@ -46,6 +57,18 @@ libcmap.a: $(OBJ)
libcmap.$(LIBEXT): $(LOBJ)
$(CC) $(LIBFLAGS) -o $@ $(LOBJ) $(LDFLAGS)
+Scripts.$(UNICODE_VERSION).txt:
+ $(DOWNLOAD) 'https://www.unicode.org/Public/17.0.0/ucd/Scripts.txt' > $@
+
+Blocks.$(UNICODE_VERSION).txt:
+ $(DOWNLOAD) 'https://www.unicode.org/Public/17.0.0/ucd/Blocks.txt' > $@
+
+Scripts.txt.c: Scripts.$(UNICODE_VERSION).txt Scripts.parse
+ ./Scripts.parse < $(@:.txt.c=).$(UNICODE_VERSION).txt > $@
+
+Blocks.txt.c: Blocks.$(UNICODE_VERSION).txt Blocks.parse
+ ./Blocks.parse < $(@:.txt.c=).$(UNICODE_VERSION).txt > $@
+
install: libcmap.a libcmap.$(LIBEXT)
mkdir -p -- "$(DESTDIR)$(PREFIX)/lib"
mkdir -p -- "$(DESTDIR)$(PREFIX)/include"
@@ -66,8 +89,14 @@ uninstall:
clean:
-rm -f -- *.o *.a *.lo *.su *.so *.so.* *.dll *.dylib
-rm -f -- *.gch *.gcov *.gcno *.gcda *.$(LIBEXT)
+ -rm -f -- *.txt.c *.parse
+
+clean-downloads:
+ -rm -f -- *.txt
+
+clean-all: clean clean-downloads
.SUFFIXES:
.SUFFIXES: .lo .o .c
-.PHONY: all install uninstall clean
+.PHONY: all install uninstall clean clean-downloads clean-all
diff --git a/Scripts.parse.c b/Scripts.parse.c
new file mode 100644
index 0000000..d5d167b
--- /dev/null
+++ b/Scripts.parse.c
@@ -0,0 +1,260 @@
+/* See LICENSE file for copyright and license details. */
+#include "parse-common.c"
+
+
+struct range {
+ unsigned long int low, high;
+};
+
+struct script {
+ char *cname;
+ char *hname;
+ struct range *ranges;
+ size_t nranges;
+};
+
+
+static struct script *scripts = NULL;
+static size_t nscripts = 0;
+
+
+static const char *
+fixed_script_name(const char *hname)
+{
+ if (!strcmp(hname, "NKo"))
+ return "N'Ko";
+ return hname;
+}
+
+
+static struct script *
+find_script(char *cname)
+{
+ size_t i, j;
+
+ for (i = nscripts; i--;)
+ if (!strcmp(cname, scripts[i].cname))
+ return &scripts[i];
+
+ i = nscripts++;
+ scripts = realloc(scripts, nscripts * sizeof(*scripts));
+ if (!scripts) {
+ fprintf(stderr, "%s: realloc %zu: %s\n", argv0, nscripts * sizeof(*scripts), strerror(errno));
+ exit(1);
+ }
+ scripts[i].cname = strdup(cname);
+ if (!scripts[i].cname) {
+ fprintf(stderr, "%s: strdup: %s\n", argv0, strerror(errno));
+ exit(1);
+ }
+ scripts[i].ranges = NULL;
+ scripts[i].nranges = 0;
+ for (j = 0; cname[j]; j++)
+ if (cname[j] == '_')
+ cname[j] = ' ';
+ scripts[i].hname = strdup(fixed_script_name(cname));
+ if (!scripts[i].hname) {
+ fprintf(stderr, "%s: strdup: %s\n", argv0, strerror(errno));
+ exit(1);
+ }
+
+ return &scripts[i];
+}
+
+
+static void
+parse_line(char *text, size_t lineno)
+{
+ unsigned long int low, high;
+ struct script *script;
+ char *name;
+ size_t i;
+
+ errno = 0;
+
+ if (!isxdigit(*text)) {
+ malformat:
+ fprintf(stderr, "%s: line %zu in is malformatted\n", argv0, lineno);
+ exit(1);
+ }
+
+ high = low = strtoul(text, &text, 16);
+ if (errno || low > 0x10FFFFUL)
+ goto malformat;
+ if (text[0] == '.' && text[1] == '.') {
+ if (!isxdigit(text[2]))
+ goto malformat;
+ high = strtoul(&text[2], &text, 16);
+ if (errno || high > 0x10FFFFUL || high < low)
+ goto malformat;
+ }
+ while (isspace(*text))
+ text++;
+
+ if (*text++ != ';')
+ goto malformat;
+
+ while (isspace(*text))
+ text++;
+ name = text;
+ while (*text && !isspace(*text) && *text != ';')
+ text++;
+ while (isspace(*text))
+ *text++ = '\0';
+
+ if (*text == ';') {
+ static int warned = 0;
+ if (!warned) {
+ warned = 1;
+ fprintf(stderr, "%s: unrecognised column detected in <stdin>\n", argv0);
+ }
+ *text++ = '\0';
+ } else if (*text) {
+ goto malformat;
+ }
+
+ script = find_script(name);
+ i = script->nranges++;
+ script->ranges = realloc(script->ranges, script->nranges * sizeof(*script->ranges));
+ if (!script->ranges) {
+ fprintf(stderr, "%s: realloc %zu: %s\n", argv0, script->nranges * sizeof(*script->ranges), strerror(errno));
+ exit(1);
+ }
+ script->ranges[i].low = low;
+ script->ranges[i].high = high;
+}
+
+
+static int
+scriptcmp(const void *av, const void *bv)
+{
+ const struct script *a = av, *b = bv;
+ return strcmp(a->hname, b->hname);
+}
+
+
+static int
+rangecmp(const void *av, const void *bv)
+{
+ const struct range *a = av, *b = bv;
+ return a->low < b->low ? -1 : a->low > b->low ? +1 : a->high < b->high ? -1 : a->high > b->high;
+}
+
+
+static size_t
+join_ranges(struct range *ranges, size_t n)
+{
+ size_t r, w;
+
+ if (!n)
+ abort();
+
+ qsort(ranges, n, sizeof(*ranges), &rangecmp);
+ for (r = w = 1U; r < n; r++) {
+ if (ranges[r].low == ranges[w - 1U].low)
+ ranges[w - 1U].high = ranges[r].high;
+ else if (ranges[r].low <= ranges[w - 1U].high + 1U)
+ ranges[w - 1U].high = ranges[r].high;
+ else
+ ranges[w++] = ranges[r];
+ }
+
+ return w;
+}
+
+
+static void
+range_minus(struct range **rangesp, size_t *np, const struct range *xranges, size_t xn)
+{
+ struct range *ranges = *rangesp;
+ size_t i, j, n = *np;
+
+ for (i = 0, j = 0; i < n && j < xn;) {
+ if (xranges[j].high < ranges[i].low) {
+ j++;
+ } else if (xranges[j].low > ranges[i].high) {
+ i++;
+ } else if (xranges[j].low <= ranges[i].low && xranges[j].high >= ranges[i].high) {
+ memmove(&ranges[i], &ranges[i + 1U], (--n - i) * sizeof(*ranges));
+ } else if (xranges[j].low <= ranges[i].low && xranges[j].high < ranges[i].high) {
+ ranges[i].low = xranges[j++].high + 1U;
+ } else if (xranges[j].high >= ranges[i].high && xranges[j].low > ranges[i].low) {
+ ranges[i++].high = xranges[j].low - 1U;
+ } else if (xranges[j].low > ranges[i].low && xranges[j].high < ranges[i].high) {
+ ranges = realloc(ranges, (n + 1U) * sizeof(*ranges));
+ if (!ranges) {
+ fprintf(stderr, "%s: realloc %zu: %s\n", argv0, (n + 1U) * sizeof(*ranges), strerror(errno));
+ exit(1);
+ }
+ memmove(&ranges[i + 1U], &ranges[i], (n++ - i) * sizeof(*ranges));
+ ranges[i].low = ranges[i + 1U].low;
+ ranges[i].high = xranges[j].low - 1U;
+ ranges[i + 1U].low = xranges[j].high + 1U;
+ i++;
+ } else {
+ abort();
+ }
+ }
+
+ *rangesp = ranges;
+ *np = n;
+}
+
+
+static int
+output(void)
+{
+ size_t i, j;
+ int x = 0;
+ const char *prefix;
+ struct script *unknown;
+ struct range *ranges;
+ size_t nranges;
+
+ ranges = malloc(sizeof(*unknown->ranges));
+ if (!ranges) {
+ fprintf(stderr, "%s: malloc %zu: %s\n", argv0, sizeof(*unknown->ranges), strerror(errno));
+ exit(1);
+ }
+ nranges = 1U;
+ ranges[0].low = 0;
+ ranges[0].high = 0x10FFFF;
+
+ for (i = 0; i < nscripts; i++) {
+ scripts[i].nranges = join_ranges(scripts[i].ranges, scripts[i].nranges);
+ range_minus(&ranges, &nranges, scripts[i].ranges, scripts[i].nranges);
+ }
+
+ qsort(scripts, nscripts, sizeof(*scripts), &scriptcmp);
+
+ if (nranges) {
+ unknown = find_script((char []){"Unknown"});
+ unknown->ranges = ranges;
+ unknown->nranges = nranges;
+ } else {
+ free(ranges);
+ }
+
+ for (i = 0; i < nscripts; i++) {
+ x |= printf("static const struct libcmap_range %s[] = {", scripts[i].cname);
+ for (j = 0; j < scripts[i].nranges;) {
+ prefix = j % 5U /* no more than 93!! */ == 0U ? "\n\t" : " ";
+ x |= printf("%s{0x%04lX, 0x%04lX}", prefix, scripts[i].ranges[j].low, scripts[i].ranges[j].high);
+ if (++j < scripts[i].nranges)
+ x |= printf(",");
+ }
+ x |= printf("\n};\n");
+ }
+ x |= printf("\nstatic const struct libcmap_script list[] = {\n");
+ for (i = 0; i < nscripts;) {
+ x |= printf("\t{\"%s\", %s, %zu}", scripts[i].hname, scripts[i].cname, scripts[i].nranges);
+ free(scripts[i].cname);
+ free(scripts[i].hname);
+ free(scripts[i].ranges);
+ x |= printf("%s\n", ++i < nscripts ? "," : "");
+ }
+ x |= printf("};\n");
+ free(scripts);
+
+ return x;
+}
diff --git a/config.mk b/config.mk
index f4adf12..c458d77 100644
--- a/config.mk
+++ b/config.mk
@@ -6,3 +6,5 @@ CC = c99
CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE
CFLAGS =
LDFLAGS =
+
+DOWNLOAD = curl
diff --git a/libcmap.h b/libcmap.h
index 54e60cc..7ee5d74 100644
--- a/libcmap.h
+++ b/libcmap.h
@@ -26,11 +26,12 @@ struct libcmap_block {
struct libcmap_script {
const char *name;
- struct libcmap_range *ranges;
+ const struct libcmap_range *ranges;
size_t nranges;
};
+extern const struct libcmap_script libcmap_no_block;
extern const struct libcmap_block *const libcmap_block_list;
extern const size_t libcmap_block_list_size;
@@ -38,6 +39,7 @@ extern const struct libcmap_script *const libcmap_script_list;
extern const size_t libcmap_script_list_size;
+int libcmap_find_in_no_block(uint32_t codepoint, size_t *offset_out, size_t *subrange_out);
const struct libcmap_block *libcmap_find_block(uint32_t codepoint, size_t *offset_out);
const struct libcmap_script *libcmap_find_script(uint32_t codepoint, size_t *offset_out, size_t *subrange_out);
diff --git a/libcmap_block_list.c b/libcmap_block_list.c
index e0d3b93..c0d9214 100644
--- a/libcmap_block_list.c
+++ b/libcmap_block_list.c
@@ -2,10 +2,7 @@
#include "libcmap.h"
-static const struct libcmap_block list[] = {
- {"Phony block", {0, 0}}
-};
-
+#include "Blocks.txt.c"
const struct libcmap_block *const libcmap_block_list = list;
const size_t libcmap_block_list_size = sizeof(list) / sizeof(*list);
diff --git a/libcmap_find_in_no_block.c b/libcmap_find_in_no_block.c
new file mode 100644
index 0000000..216aaa0
--- /dev/null
+++ b/libcmap_find_in_no_block.c
@@ -0,0 +1,20 @@
+/* See LICENSE file for copyright and license details. */
+#include "libcmap.h"
+
+
+int
+libcmap_find_in_no_block(uint32_t codepoint, size_t *offset_out, size_t *subrange_out)
+{
+ size_t i, skipped = 0;
+ for (i = 0; i < libcmap_no_block.nranges; i++) {
+ if (libcmap_no_block.ranges[i].first <= codepoint && libcmap_no_block.ranges[i].last <= codepoint) {
+ if (offset_out)
+ *offset_out = skipped + (size_t)(codepoint - libcmap_no_block.ranges[i].first);
+ if (subrange_out)
+ *subrange_out = i;
+ return 1;
+ }
+ skipped += (size_t)(libcmap_no_block.ranges[i].last - libcmap_no_block.ranges[i].first) + 1U;
+ }
+ return 0;
+}
diff --git a/libcmap_script_list.c b/libcmap_script_list.c
index 7464f5c..b692073 100644
--- a/libcmap_script_list.c
+++ b/libcmap_script_list.c
@@ -2,10 +2,7 @@
#include "libcmap.h"
-static const struct libcmap_script list[] = {
- {"Phony script", NULL, 0}
-};
-
+#include "Scripts.txt.c"
const struct libcmap_script *const libcmap_script_list = list;
const size_t libcmap_script_list_size = sizeof(list) / sizeof(*list);
diff --git a/parse-common.c b/parse-common.c
new file mode 100644
index 0000000..2333a1c
--- /dev/null
+++ b/parse-common.c
@@ -0,0 +1,123 @@
+/* See LICENSE file for copyright and license details. */
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+
+static const char *argv0;
+
+
+static void parse_line(char *text, size_t lineno);
+static int output(void);
+
+
+static size_t
+parse(char *buf, size_t len)
+{
+ static int prev_was_cr = 0;
+ static size_t lineno = 1U;
+ size_t ret = 0;
+ size_t off = 0;
+ size_t she;
+
+beginning:
+ if (prev_was_cr && off < len && buf[off] == '\n')
+ off++;
+ while (off < len && buf[off] != '\n' && buf[off] != '\r' && isspace(buf[off]))
+ off++;
+ ret = off;
+ if (off == len)
+ return ret;
+ if (buf[off] == '#') {
+ while (off < len && buf[off] != '\n' && buf[off] != '\r')
+ off++;
+ if (off == len)
+ return ret;
+ goto newline;
+ } else if (buf[off] == '\n' || buf[off] == '\r') {
+ newline:
+ prev_was_cr = buf[off] == '\r';
+ off++;
+ lineno++;
+ goto beginning;
+ }
+
+ for (; off < len; off++) {
+ if (buf[off] == '\0') {
+ fprintf(stderr, "%s: NUL byte found in <stdin> on line %zu\n", argv0, lineno);
+ exit(1);
+ } else if (buf[off] == '\n' || buf[off] == '\r' || buf[off] == '#') {
+ break;
+ }
+ }
+ if (off == len)
+ return ret;
+
+ if (buf[off] == '\n' || buf[off] == '\r') {
+ prev_was_cr = buf[off] == '\r';
+ buf[off++] = '\0';
+ } else if (buf[off] == '#') {
+ she = off++;
+ while (off < len && buf[off] != '\n' && buf[off] != '\r')
+ off++;
+ if (off == len)
+ return ret;
+ buf[she] = '\0';
+ prev_was_cr = buf[off++] == '\r';
+ } else {
+ abort();
+ }
+
+ parse_line(&buf[ret], lineno);
+ lineno++;
+ goto beginning;
+}
+
+
+int
+main(int argc, char *argv[])
+{
+ char *buf = NULL;
+ size_t bufsize = 0;
+ size_t len = 0;
+ size_t parsed;
+ ssize_t r;
+
+ argv0 = argv[0];
+ (void) argc;
+
+ for (;;) {
+ if (len == bufsize) {
+ buf = realloc(buf, bufsize += 8192U);
+ if (!buf) {
+ fprintf(stderr, "%s: realloc %zu: %s\n", argv0, bufsize, strerror(errno));
+ exit(1);
+ }
+ }
+ r = read(STDIN_FILENO, &buf[len], bufsize - len);
+ if (r <= 0) {
+ if (!r)
+ break;
+ if (errno == EINTR)
+ continue;
+ fprintf(stderr, "%s: read <stdin>: %s\n", argv0, strerror(errno));
+ exit(1);
+ }
+ len += (size_t)r;
+ parsed = parse(buf, len);
+ memmove(&buf[0], &buf[parsed], len -= parsed);
+ }
+ buf[len++] = '\n';
+ parse(buf, len);
+ free(buf);
+
+ if (output() < 0 || fflush(stdout)) {
+ fprintf(stderr, "%s: failed to write to <stdout>\n", argv0);
+ exit(1);
+ }
+
+ return 0;
+}