diff options
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | common.h | 24 | ||||
-rw-r--r-- | config.mk | 2 | ||||
-rw-r--r-- | mklint.c | 121 | ||||
-rw-r--r-- | text.c | 143 | ||||
-rw-r--r-- | ui.c | 3 |
7 files changed, 199 insertions, 97 deletions
@@ -1,6 +1,6 @@ ISC License -© 2021 Mattias Andrée <maandree@kth.se> +© 2021, 2022 Mattias Andrée <maandree@kth.se> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -5,6 +5,7 @@ include $(CONFIGFILE) OBJ =\ mklint.o\ + text.o\ ui.o HDR =\ @@ -1,6 +1,9 @@ /* See LICENSE file for copyright and license details. */ +#include <locale.h> + #include <libsimple.h> #include <libsimple-arg.h> +#include <grapheme.h> #define EXIT_STYLE 1 #define EXIT_WARNING 2 @@ -15,12 +18,15 @@ X(WC_MAKEFILE, "makefile", INFORM)\ X(WC_EXTRA_MAKEFILE, "extra-makefile", WARN)\ X(WC_CMDLINE, "cmdline", WARN)\ - X(WC_TEXT, "text", WARN) + X(WC_TEXT, "text", WARN)\ + X(WC_ENCODING, "encoding", WARN)\ + X(WC_LONG_LINE, "long-line", WARN_STYLE) enum action { IGNORE, INFORM, + WARN_STYLE, WARN }; @@ -41,16 +47,30 @@ struct line { size_t len; const char *path; size_t lineno; + int eof; + int nest_level; +}; + +struct style { + size_t max_line_length; }; extern int exit_status; +extern struct style style; + + +/* text.c */ +struct line *load_text_file(int fd, const char *fname, int nest_level, size_t *nlinesp); +void check_utf8_encoding(struct line *line); +void check_column_count(struct line *line); /* ui.c */ extern struct warning_class_data warning_classes[]; void xprintwarningf(enum warning_class class, int severity, const char *fmt, ...); -#define printinfof(CLASS, ...) xprintwarningf(CLASS, EXIT_STYLE, __VA_ARGS__) +#define printinfof(CLASS, ...) xprintwarningf(CLASS, 0, __VA_ARGS__) +#define warnf_style(CLASS, ...) xprintwarningf(CLASS, EXIT_STYLE, __VA_ARGS__) #define warnf_warning(CLASS, ...) xprintwarningf(CLASS, EXIT_WARNING, __VA_ARGS__) #define warnf_unspecified(CLASS, ...) xprintwarningf(CLASS, EXIT_UNSPECIFIED, __VA_ARGS__) #define warnf_undefined(CLASS, ...) xprintwarningf(CLASS, EXIT_UNDEFINED, __VA_ARGS__) @@ -5,4 +5,4 @@ CC = cc CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE CFLAGS = -std=c99 -Wall -g -LDFLAGS = -lsimple +LDFLAGS = -lsimple -lgrapheme @@ -4,9 +4,13 @@ NUSAGE(EXIT_ERROR, "[-f makefile]"); - int exit_status = 0; +struct style style = { + .max_line_length = 120 +}; + + static const char *const default_makefiles[] = { "makefile", "Makefile" @@ -69,86 +73,27 @@ cmdline_opt_f(const char *arg, const char **makefile_pathp) static struct line * -load_file(int fd, const char *fname, size_t *nlinesp) +load_makefile(const char *path, size_t *nlinesp) { struct line *lines; - char *buf = NULL, *p; - size_t size = 0; - size_t len = 0; - size_t i; - ssize_t r; - - /* getline(3) may seem like the best way to read line by line, - * however, it may terminate before the end of the line is - * reached, which we would have to deal with, additionally, - * we want to check for null bytes. Therefore we will keep - * this simple and use read(3) and scan manually; and as a - * bonus we can leave the file descriptor open, and let the - * caller than created it close it. - */ - - i = 0; - *nlinesp = 0; - for (;;) { - if (len == size) - buf = erealloc(buf, size += 2048); - r = read(fd, &buf[len], size - len); - if (r > 0) - len += (size_t)r; - else if (!r) - break; - else if (errno == EINTR) - continue; - else - eprintf("read %s:", fname); - - for (; i < len; i++) { - if (buf[i] == '\n') { - *nlinesp += 1; - buf[i] = '\0'; - } else if (buf[i] == '\0') { - /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ - warnf_undefined(WC_TEXT, "%s:%zu: file contains a NUL byte, this is disallowed, because " - "input files are text files, and causes undefined behaviour", - fname, *nlinesp + 1); - /* make(1) should probably just abort */ - printinfof(WC_TEXT, "this implementation will replace it with a <space>"); - buf[i] = ' '; - } - } - } - - if (len && buf[len - 1] != '\0') { /* LF has been converted to NUL above */ - /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ - warnf_undefined(WC_TEXT, "%s:%zu: is non-empty but does not end with a <newline>, which is " - "required because input files are text files, and omission of it " - "causes undefined behaviour", - fname, *nlinesp + 1); - /* make(1) should probably just abort */ - printinfof(WC_TEXT, "this implementation will add the missing <newline>"); - buf = erealloc(buf, len + 1); - buf[len++] = '\0'; - *nlinesp += 1; - } + int fd; - lines = *nlinesp ? ecalloc(*nlinesp, sizeof(*lines)) : NULL; - for (p = buf, i = 0; i < *nlinesp; i++) { - lines[i].lineno = i + 1; - lines[i].path = fname; - lines[i].len = strlen(p); - lines[i].data = ememdup(p, lines[i].len + 1); - if (lines[i].len + 1 > 2048) { - /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ - warnf_undefined(WC_TEXT, "%s:%zu: line is, including the <newline> character, longer than " - "2048 bytes which causes undefined behaviour as input files are " - "text files and POSIX only guarantees support for lines up to 2048 " - "bytes long including the <newline> character in text files", - fname, *nlinesp + 1); - } - p += lines[i].len + 1; + if (!path) { + fd = open_default_makefile(&path); + } else if (!strcmp(path, "-")) { + /* “A pathname of '-' shall denote the standard input” */ + fd = dup(STDIN_FILENO); + if (fd < 0) + eprintf("dup <stdin>:"); + path = "<stdin>"; + } else { + fd = open(path, O_RDONLY); + if (fd < 0) + eprintf("open %s O_RDONLY:", path); } - free(buf); + lines = load_text_file(fd, path, 0, nlinesp); + close(fd); return lines; } @@ -157,9 +102,9 @@ int main(int argc, char *argv[]) { const char *path = NULL; - int fd; struct line *lines; size_t nlines; + size_t i; libsimple_default_failure_exit = EXIT_ERROR; @@ -176,22 +121,14 @@ main(int argc, char *argv[]) if (argc) usage(); - if (!path) { - fd = open_default_makefile(&path); - } else if (!strcmp(path, "-")) { - /* “A pathname of '-' shall denote the standard input” */ - fd = dup(STDIN_FILENO); - if (fd < 0) - eprintf("dup <stdin>:"); - path = "<stdin>"; - } else { - fd = open(path, O_RDONLY); - if (fd < 0) - eprintf("open %s O_RDONLY:", path); - } + setlocale(LC_ALL, ""); /* Required by wcwidth(3) */ - lines = load_file(fd, path, &nlines); - close(fd); + lines = load_makefile(path, &nlines); + + for (i = 0; i < nlines; i++) { + check_utf8_encoding(&lines[i]); + check_column_count(&lines[i]); + } free(lines); return exit_status; @@ -0,0 +1,143 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +struct line * +load_text_file(int fd, const char *fname, int nest_level, size_t *nlinesp) +{ + struct line *lines; + char *buf = NULL, *p; + size_t size = 0; + size_t len = 0; + size_t i; + ssize_t r; + + /* getline(3) may seem like the best way to read line by line, + * however, it may terminate before the end of the line is + * reached, which we would have to deal with, additionally, + * we want to check for null bytes. Therefore we will keep + * this simple and use read(3) and scan manually; and as a + * bonus we can leave the file descriptor open, and let the + * caller than created it close it. + */ + + i = 0; + *nlinesp = 0; + for (;;) { + if (len == size) + buf = erealloc(buf, size += 2048); + r = read(fd, &buf[len], size - len); + if (r > 0) + len += (size_t)r; + else if (!r) + break; + else if (errno == EINTR) + continue; + else + eprintf("read %s:", fname); + + for (; i < len; i++) { + if (buf[i] == '\n') { + *nlinesp += 1; + buf[i] = '\0'; + } else if (buf[i] == '\0') { + /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ + warnf_undefined(WC_TEXT, "%s:%zu: file contains a NUL byte, this is disallowed, because " + "input files are text files, and causes undefined behaviour", + fname, *nlinesp + 1); + /* make(1) should probably just abort */ + printinfof(WC_TEXT, "this implementation will replace it with a <space>"); + buf[i] = ' '; + } + } + } + + if (len && buf[len - 1] != '\0') { /* LF has been converted to NUL above */ + /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ + warnf_undefined(WC_TEXT, "%s:%zu: is non-empty but does not end with a <newline>, which is " + "required because input files are text files, and omission of it " + "causes undefined behaviour", + fname, *nlinesp + 1); + /* make(1) should probably just abort */ + printinfof(WC_TEXT, "this implementation will add the missing <newline>"); + buf = erealloc(buf, len + 1); + buf[len++] = '\0'; + *nlinesp += 1; + } + + lines = *nlinesp ? ecalloc(*nlinesp, sizeof(*lines)) : NULL; + for (p = buf, i = 0; i < *nlinesp; i++) { + lines[i].lineno = i + 1; + lines[i].path = fname; + lines[i].len = strlen(p); + lines[i].data = ememdup(p, lines[i].len + 1); + lines[i].eof = i + 1 == *nlinesp; + lines[i].nest_level = nest_level; + + if (lines[i].len + 1 > 2048) { + /* https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_403 */ + warnf_undefined(WC_TEXT, "%s:%zu: line is, including the <newline> character, longer than " + "2048 bytes which causes undefined behaviour as input files are " + "text files and POSIX only guarantees support for lines up to 2048 " + "bytes long including the <newline> character in text files", + fname, *nlinesp + 1); + } + p += lines[i].len + 1; + } + + free(buf); + return lines; +} + + +void +check_utf8_encoding(struct line *line) +{ + size_t off, r; + uint_least32_t codepoint; +#if GRAPHEME_INVALID_CODEPOINT == 0xFFFD + unsigned char invalid_codepoint_encoding[] = {0xEF, 0xBF, 0xBD}; +#endif + + for (off = 0; off < line->len; off += r) { + r = grapheme_decode_utf8(&line->data[off], line->len - off, &codepoint); + + if (codepoint == GRAPHEME_INVALID_CODEPOINT && + (r != ELEMSOF(invalid_codepoint_encoding) || + memcmp(&line->data[off], invalid_codepoint_encoding, r))) { + + warnf_unspecified(WC_ENCODING, "%s:%zu: line contains invalid UTF-8", line->path, line->lineno); + printinfof(WC_ENCODING, "this implementation will replace it the " + "Unicode replacement character (U+FFFD)"); + + line->data = erealloc(line->data, line->len - r + ELEMSOF(invalid_codepoint_encoding)); + memmove(&line->data[off + ELEMSOF(invalid_codepoint_encoding)], + &line->data[off + r], + line->len - off - r); + memcpy(&line->data[off], invalid_codepoint_encoding, ELEMSOF(invalid_codepoint_encoding)); + line->len = line->len - r + ELEMSOF(invalid_codepoint_encoding); + } + } +} + + +void +check_column_count(struct line *line) +{ + size_t columns = 0; + size_t off, r; + uint_least32_t codepoint; + + if (line->len <= style.max_line_length) /* Column count cannot be more than byte count */ + return; + + for (off = 0; off < line->len; off += r) { + r = grapheme_decode_utf8(&line->data[off], line->len - off, &codepoint); + columns += (size_t)abs(wcwidth((wchar_t)codepoint)); + } + + if (columns > style.max_line_length) { + warnf_style(WC_LONG_LINE, "%s:%zu: line is longer than %zu columns", + line->path, line->lineno, columns); + } +} @@ -15,7 +15,8 @@ vxprintwarningf(enum warning_class class, int severity, const char *fmt, va_list { if (warning_classes[class].action != IGNORE) { fprintf(stderr, "%s: [%s] ", argv0, - warning_classes[class].action == INFORM ? "info" : "warning"); + warning_classes[class].action == INFORM ? "info" : + warning_classes[class].action == WARN_STYLE ? "style" : "warning"); vfprintf(stderr, fmt, ap); fprintf(stderr, " (-w%s)\n", warning_classes[class].name); if (warning_classes[class].action != INFORM) |