From 32da76d4577b6508aaa2539625b6689f946bc95a Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sun, 28 Feb 2021 00:56:09 +0100 Subject: Finish rewrite to C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- README | 14 +-- config.mk | 4 +- gpp.1 | 5 +- gpp.c | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- gpp.py | 230 -------------------------------------------- 5 files changed, 331 insertions(+), 244 deletions(-) delete mode 100755 gpp.py diff --git a/README b/README index ec06c25..9d39bdf 100644 --- a/README +++ b/README @@ -23,12 +23,12 @@ DESCRIPTION symbol for preprocessing directives) or @>, or is between a line starting with @< and a line starting with @>, is parsed as a line, written in Bash, that - is executed during preprocessing. - A @< line must have an associated @> line somewhere - after it, all lines between them are parsed as - preprocessing directives. A @> does however not need - an associated @< line somewhere before it, making - @> suitable for single line directives. + is executed during preprocessing. A @< line must have + an associated @> line somewhere after it, all lines + between them are parsed as preprocessing directives. + A @> does however not need an associated @< line + somewhere before it, making @> suitable for single + line directives. Preprocessing directives can also be inline. For this, use @(COMMAND) where COMMAND is the Bash code to run. @@ -41,7 +41,7 @@ DESCRIPTION for lowercase. Everything that is not a preprocessing directive is - echo verbatim. + echo verbatim, except all @@ are replaced by @. OPTIONS -D name=value diff --git a/config.mk b/config.mk index 2e90419..20f922d 100644 --- a/config.mk +++ b/config.mk @@ -2,5 +2,5 @@ PREFIX = /usr MANPREFIX = $(PREFIX)/share/man CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -CFLAGS = -std=c99 -Wall -O2 -LDFLAGS = -s +CFLAGS = -std=c99 -Wall -Og -g +LDFLAGS = diff --git a/gpp.1 b/gpp.1 index 1b7bb69..8841a2e 100644 --- a/gpp.1 +++ b/gpp.1 @@ -85,7 +85,10 @@ or for lowercase. .PP Everything that is not a preprocessing directive is -echo verbatim. +echo verbatim, except all +.B @@ +are replaced by +.BR @ . .SH OPTIONS The diff --git a/gpp.c b/gpp.c index 569ae76..bc466af 100644 --- a/gpp.c +++ b/gpp.c @@ -1,8 +1,12 @@ /* See LICENSE file for copyright and license details. */ +#include #include #include #include #include +#include +#include +#include #include #include #include @@ -87,6 +91,29 @@ xcreate(const char *path) } +static void +append(char **restrict out_datap, size_t *restrict out_lenp, size_t *restrict out_sizep, const char *fmt, ...) +{ + va_list ap, ap2; + size_t len; + va_start(ap, fmt); + va_copy(ap2, ap); + len = (size_t)vsnprintf(NULL, 0, fmt, ap2); + va_end(ap2); + if (*out_lenp + len + 1 > *out_sizep) { + *out_sizep = *out_lenp + len + 1; + *out_datap = realloc(*out_datap, *out_sizep); + if (!*out_datap) { + fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno)); + exit(1); + } + } + vsprintf(&(*out_datap)[*out_lenp], fmt, ap); + *out_lenp += len; + va_end(ap); +} + + int main(int argc, char *argv[]) { @@ -94,6 +121,7 @@ main(int argc, char *argv[]) const char *input_file = NULL; const char *output_file = NULL; const char *symbol = NULL; + size_t symlen = 1; int iterations = -1; int unshebang = 0; long int tmp; @@ -101,8 +129,16 @@ main(int argc, char *argv[]) char *in_data = NULL, *out_data = NULL; size_t in_size = 0, in_len = 0, in_off = 0; size_t out_size = 0, out_len = 0, out_off = 0; - int in_fd, out_fd, do_close; + int in_fd, out_fd, do_close, fds_in[2], fds_out[2]; + struct pollfd pfds[2]; + size_t npfds; + char buffer[4096], c, *quotes = NULL, quote; + size_t brackets, nquotes, quotes_size = 0; + int symb, esc, dollar; + size_t len, j, lineno, no = 0, cnt; + int i, n, status, state, entered; ssize_t r; + pid_t pid; ARGBEGIN { case 'D': @@ -154,6 +190,9 @@ main(int argc, char *argv[]) if (symbol) usage(); symbol = EARGF(usage()); + symlen = strlen(symbol); + if (!symlen) + usage(); break; case 'u': if (unshebang == 2) @@ -164,8 +203,11 @@ main(int argc, char *argv[]) usage(); } ARGEND; - if (argc) + if (argc) { + if (!**argv) + usage(); shell = (void *)argv; + } if (setenv("_GPP", argv0, 1)) fprintf(stderr, "%s: setenv _GPP %s 1: %s\n", argv0, argv0, strerror(errno)); @@ -217,17 +259,289 @@ main(int argc, char *argv[]) after_unshebang: while (iterations--) { - /* TODO parse: in -> out */ + entered = 0; + state = 0; + lineno = 0; + brackets = nquotes = 0; + symb = esc = dollar = 0; + while (in_off < in_len) { + if (state == 2) { + preprocess: + c = in_data[in_off++]; + if (c == '\n') { + state = 0; + brackets = nquotes = 0; + symb = esc = dollar = 0; + append(&out_data, &out_len, &out_size, "'\n"); + } else if (brackets) { + if (esc) { + esc = 0; + } else if (nquotes) { + if (dollar) { + dollar = 0; + if (c == '(') { + quote = ')'; + goto add_to_quotes; + } else if (c == '{') { + quote = '}'; + goto add_to_quotes; + } + } else if (c == quotes[nquotes - 1]) { + nquotes -= 1; + } else if ((quotes[nquotes - 1] == ')' || quotes[nquotes - 1] == '}') && + (c == '"' || c == '\'' || c == '`')) { + quote = c; + goto add_to_quotes; + } else if (c == '\\' && quotes[nquotes - 1] != '\'') { + esc = 1; + } else if (c == '$') { + dollar = 1; + } + } else if (c == '"' || c == '\'' || c == '`') { + quote = c; + add_to_quotes: + if (nquotes == quotes_size) { + quotes = realloc(quotes, quotes_size += 1); + if (!quotes) { + fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno)); + return 1; + } + } + quotes[nquotes++] = quote; + } else if (c == ')' || c == '}') { + if (!--brackets) { + append(&out_data, &out_len, &out_size, "%c\"'", c); + continue; + } + } else if (c == '(' || c == '}') { + brackets += 1; + } else if (c == '\\') { + esc = 1; + } + append(&out_data, &out_len, &out_size, "%c", c); + } else if (c == *symbol && in_len - (in_off - 1) >= symlen && + !memcmp(&in_data[in_off - 1], symbol, symlen)) { + if (symb) + append(&out_data, &out_len, &out_size, "%s", symbol); + symb ^= 1; + in_off += symlen - 1; + } else if (symb) { + symb = 0; + if (c == '(' || c == '{') { + brackets += 1; + append(&out_data, &out_len, &out_size, "'\"$%c", c); + } else if (c == '\'') { + append(&out_data, &out_len, &out_size, "%s'\\''", symbol); + } else { + append(&out_data, &out_len, &out_size, "%s%c", symbol, c); + } + } else if (c == '\'') { + append(&out_data, &out_len, &out_size, "'\\''"); + } else { + if (out_len == out_size) { + out_data = realloc(out_data, out_size += 4096); + if (!out_data) { + fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno)); + return 1; + } + } else { + if ((out_data[out_len++] = c) == '\n') + state = 0; + } + } + } else if (state == 1) { + append_char: + if (out_len == out_size) { + out_data = realloc(out_data, out_size += 4096); + if (!out_data) { + fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno)); + return 1; + } + } else { + if ((out_data[out_len++] = in_data[in_off++]) == '\n') + state = 0; + } + } else { + lineno += 1; + if (in_len - in_off > symlen && !memcmp(&in_data[in_off], symbol, symlen) && + (in_data[in_off + symlen] == '<' || in_data[in_off + symlen] == '>')) { + state = 1; + entered = in_data[in_off + symlen] == '<'; + in_off += symlen + 1; + } else if (entered) { + goto append_char; + } else { + append(&out_data, &out_len, &out_size, "printf '\\000%%zu\\000%%s\\n' %zu '", lineno); + state = 2; + goto preprocess; + } + } + } + if (state == 2) + append(&out_data, &out_len, &out_size, "'"); in_len = 0; in_off = 0; - /* TODO shell: out -> in */ + if (pipe(fds_in) || pipe(fds_out)) { + fprintf(stderr, "%s: pipe: %s\n", argv0, strerror(errno)); + return 1; + } + pid = fork(); + switch (pid) { + case -1: + fprintf(stderr, "%s: fork: %s\n", argv0, strerror(errno)); + return 1; + case 0: + close(fds_in[1]); + close(fds_out[0]); + if (dup2(fds_in[0], STDIN_FILENO) != STDIN_FILENO) { + fprintf(stderr, "%s: dup2 STDIN_FILENO: %s\n", argv0, strerror(errno)); + return 1; + } + if (dup2(fds_out[1], STDOUT_FILENO) != STDOUT_FILENO) { + fprintf(stderr, "%s: dup2 STDOUT_FILENO: %s\n", argv0, strerror(errno)); + return 1; + } + close(fds_in[0]); + close(fds_out[1]); + execvp(*shell, (void *)shell); + fprintf(stderr, "%s: execvp %s: %s\n", argv0, *shell, strerror(errno)); + return 1; + default: + close(fds_in[0]); + close(fds_out[1]); + break; + } + pfds[0].fd = fds_in[1]; + pfds[0].events = POLLOUT; + pfds[1].fd = fds_out[0]; + pfds[1].events = POLLIN; + npfds = 2; + lineno = 1; + state = 0; + while (npfds) { + n = poll(pfds, npfds, -1); + if (n < 0) { + fprintf(stderr, "%s: poll: %s\n", argv0, strerror(errno)); + return 1; + } + for (i = 0; i < n; i++) { + if (!pfds[i].revents) + continue; + if (pfds[i].fd == fds_in[1]) { + if (out_off == out_len) { + if (close(fds_in[1])) { + fprintf(stderr, "%s: write : %s\n", argv0, strerror(errno)); + return 1; + } + pfds[i] = pfds[--npfds]; + continue; + } + r = write(fds_in[1], &out_data[out_off], out_len - out_off); + if (r <= 0) { + fprintf(stderr, "%s: write : %s\n", argv0, strerror(errno)); + return 1; + } + out_off += (size_t)r; + } else { + r = read(fds_out[0], buffer, sizeof(buffer)); + if (r <= 0) { + if (r < 0 || close(fds_out[0])) { + fprintf(stderr, "%s: read : %s\n", argv0, strerror(errno)); + return 1; + } + pfds[i] = pfds[--npfds]; + continue; + } + len = (size_t)r; + for (j = 0; j < len; j++) { + switch (state) { + case 0: + no = 0; + if (!buffer[j]) { + state = 1; + } else { + state = 3; + goto state3; + } + break; + case 1: + if (isdigit(buffer[j])) { + if (buffer[j] == '0') { + append(&in_data, &in_len, &in_size, "%c", 0); + state = 3; + goto state3; + } + state = 2; + } + /* fall through */ + case 2: + if (isdigit(buffer[j])) { + if (buffer[j] > (SIZE_MAX - (buffer[j] & 15)) / 10) { + append(&in_data, &in_len, &in_size, "%c%zu", 0, no); + state = 3; + goto state3; + } + no = no * 10 + (buffer[j] & 15); + } else if (!buffer[j]) { + if (no > lineno) { + cnt = no - lineno; + lineno = no; + if (in_len + cnt > in_size) { + in_size = in_len + cnt; + in_data = realloc(in_data, in_size); + if (!in_data) { + fprintf(stderr, "%s: realloc: %s\n", + argv0, strerror(errno)); + return 1; + } + } + while (cnt--) + in_data[in_len++] = '\n'; + } + state = 3; + } else { + append(&in_data, &in_len, &in_size, "%c%zu", 0, no); + state = 3; + goto state3; + } + break; + default: + state3: + if (in_len == in_size) { + in_size += 4096; + in_data = realloc(in_data, in_size); + if (!in_data) { + fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno)); + return 1; + } + } + in_data[in_len++] = buffer[j]; + if (buffer[j] == '\n') { + lineno += 1; + state = 0; + } + break; + } + } + } + } + } + if (waitpid(pid, &status, 0) != pid) { + fprintf(stderr, "%s: waitpid %s <&status> 0: %s\n", argv0, *shell, strerror(errno)); + return 1; + } + if (status) + return WIFEXITED(status) ? WEXITSTATUS(status) : 1; out_len = 0; out_off = 0; + in_off = 0; } + free(quotes); + free(out_data); out_fd = xcreate(output_file); while (in_off < in_len) { diff --git a/gpp.py b/gpp.py deleted file mode 100755 index 68a317f..0000000 --- a/gpp.py +++ /dev/null @@ -1,230 +0,0 @@ -#!@{SHEBANG} -# -*- coding: utf-8 -*- - -import os -import sys -import shlex -from subprocess import Popen, PIPE - -if sys.version_info.major < 3: - def bytes(string): - r = bytearray(len(string)) - b = buffer(r) - r[:] = string - return r - -if sys.version_info.major < 3: - def bytelist(string): - return [ord(c) for c in string] -else: - bytelist = list - -symbol = '@' -encoding = sys.getdefaultencoding() -iterations = 1 -input_file = '/dev/stdin' -output_file = '/dev/stdout' -unshebang = 0 - -args = sys.argv - -# If the file is executed, the first command line argument will be the first argument in the shebang, -# the second will be the rest of the arguments in the command line as is and the third and final will -# be the executed file. -if len(args) == 3: - if os.path.exists(args[2]) and os.path.isfile(args[2]) and ((os.stat(args[2]).st_mode & 0o111) != 0): - args[1 : 2] = shlex.split(args[1]) - -for i in range(1, len(args)): - arg = args[i] - i += 1 - if arg in ('-s', '--symbol'): symbol = sys.argv[i] - elif arg in ('-e', '--encoding'): encoding = sys.argv[i] - elif arg in ('-n', '--iterations'): iterations = int(sys.argv[i]) - elif arg in ('-u', '--unshebang'): unshebang += 1; continue - elif arg in ('-i', '--input'): input_file = sys.argv[i] - elif arg in ('-o', '--output'): output_file = sys.argv[i] - elif arg in ('-f', '--file'): - input_file = sys.argv[i] - output_file = sys.argv[i] - elif arg in ('-D', '--export'): - export = sys.argv[i] - if '=' not in export: - export += '=1' - export = (export.split('=')[0], '='.join(export.split('=')[1:])) - os.putenv(export[0], export[1]) - else: - continue - i += 1 - -if input_file == '-': input_file = '/dev/stdin' -if output_file == '-': output_file = '/dev/stdout' - -symbol = bytelist(symbol.encode(encoding)) -symlen = len(symbol) - -if iterations < 1: - if input_file != output_file: - data = None - with open(input_file, 'rb') as file: - data = file.read() - with open(output_file, 'wb') as file: - file.write(data) - file.flush() - sys.exit(0) - -def linesplit(bs): - rc = [] - elem = [] - for b in bs: - if b == 10: - rc.append(elem) - elem = [] - else: - elem.append(b) - rc.append(elem) - return rc - -def linejoin(bss): - rc = [] - if len(bss) > 0: - rc += bss[0] - for bs in bss[1:]: - rc.append(10) - rc += bs - return rc - -data = None -with open(input_file, 'rb') as file: - data = file.read() -data = linesplit(bytelist(data)) - -if unshebang == 1: - if (len(data[0]) >= 2) and (data[0][0] == ord('#')) and (data[0][1] == ord('!')): - data[0] = [] - -if unshebang >= 2: - if (len(data[0]) >= 2) and (data[0][0] == ord('#')) and (data[0][1] == ord('!')): - data[0] = data[1] - data[1] = [] - -def pp(line): - rc = [] - symb = False - brackets = 0 - esc = False - dollar = False - quote = [] - n = len(line) - i = 0 - rc.append(ord('\'')) - while i < n: - c = line[i] - i += 1 - if brackets > 0: - if esc: - esc = False - elif len(quote) > 0: - if dollar: - dollar = False - if c == ord('('): - quote.append(ord(')')) - elif c == ord('{'): - quote.append(ord('}')) - elif c == quote[-1]: - quote[:] = quote[:-1] - elif (quote[-1] in (ord(')'), ord('}'))) and (c in (ord('"'), ord('\''), ord('`'))): - quote.append(c) - elif (c == ord('\\')) and (quote[-1] != ord('\'')): - esc = True - elif c == ord('$'): - dollar = True - elif c in (ord('"'), ord('\''), ord('`')): - quote.append(c) - elif c in (ord(')'), ord('}')): - brackets -= 1 - if brackets == 0: - rc.append(c) - rc.append(ord('"')) - rc.append(ord('\'')) - continue - elif c in (ord('('), ord('{')): - brackets += 1 - elif c == ord('\\'): - esc = True - rc.append(c) - elif line[i - 1 : i + symlen - 1] == symbol: - if symb: - rc += symbol - symb = not symb - i += symlen - 1 - elif symb: - symb = False - if c in (ord('('), ord('{')): - brackets += 1 - rc.append(ord('\'')) - rc.append(ord('"')) - rc.append(ord('$')) - else: - rc += symbol - if c == ord('\''): - rc.append(c) - rc.append(ord('\\')) - rc.append(c) - rc.append(c) - elif c == ord('\''): - rc.append(c) - rc.append(ord('\\')) - rc.append(c) - rc.append(c) - else: - rc.append(c) - rc.append(ord('\'')) - return rc - -for _ in range(iterations): - entered = False - bashed = [] - - for lineno in range(len(data)): - line = data[lineno] - if (len(line) > symlen) and (line[:symlen] == symbol) and (line[symlen] in (ord('<'), ord('>'))): - bashed.append(line[symlen + 1:]) - entered = line[symlen] == ord('<') - elif entered: - bashed.append(line) - else: - buf = bytelist(('echo $\'\\e%i\\e\'' % lineno).encode()) - bashed.append(buf + pp(line)) - - bashed = bytes(linejoin(bashed)) - bash = Popen(["bash"], stdin = PIPE, stdout = PIPE, stderr = sys.stderr) - bashed = bash.communicate(bashed)[0] - - if bash.returncode != 0: - sys.exit(bash.returncode) - - bashed = linesplit(bytelist(bashed)) - data = [] - lineno = -1 - - for line in bashed: - no = -1 - if (len(line) > 0) and (line[0] == 0o33): - no = 0 - for i in range(1, len(line)): - if line[i] == 0o33: - line = line[i + 1:] - break - no = no * 10 + (line[i] - ord('0')) - if no > lineno: - while no != lineno + 1: - data.append([]) - lineno += 1 - data.append(line) - lineno += 1 - -data = bytes(linejoin(data)) -with open(output_file, 'wb') as file: - file.write(data) - file.flush() -- cgit v1.2.3-70-g09d2