diff options
authorMattias Andrée <maandree@kth.se>2021-02-28 00:56:09 +0100
committerMattias Andrée <maandree@kth.se>2021-02-28 00:56:09 +0100
commit32da76d4577b6508aaa2539625b6689f946bc95a (patch)
parentremove todo (diff)
Finish rewrite to C
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
5 files changed, 331 insertions, 244 deletions
diff --git a/README b/README
index ec06c25..9d39bdf 100644
--- a/README
+++ b/README
@@ -23,12 +23,12 @@ DESCRIPTION
symbol for preprocessing directives) or @>, or is
between a line starting with @< and a line starting
with @>, is parsed as a line, written in Bash, that
- is executed during preprocessing.
- A @< line must have an associated @> line somewhere
- after it, all lines between them are parsed as
- preprocessing directives. A @> does however not need
- an associated @< line somewhere before it, making
- @> suitable for single line directives.
+ is executed during preprocessing. A @< line must have
+ an associated @> line somewhere after it, all lines
+ between them are parsed as preprocessing directives.
+ A @> does however not need an associated @< line
+ somewhere before it, making @> suitable for single
+ line directives.
Preprocessing directives can also be inline. For this,
use @(COMMAND) where COMMAND is the Bash code to run.
@@ -41,7 +41,7 @@ DESCRIPTION
for lowercase.
Everything that is not a preprocessing directive is
- echo verbatim.
+ echo verbatim, except all @@ are replaced by @.
-D name=value
diff --git a/config.mk b/config.mk
index 2e90419..20f922d 100644
--- a/config.mk
+++ b/config.mk
@@ -2,5 +2,5 @@ PREFIX = /usr
MANPREFIX = $(PREFIX)/share/man
-CFLAGS = -std=c99 -Wall -O2
+CFLAGS = -std=c99 -Wall -Og -g
diff --git a/gpp.1 b/gpp.1
index 1b7bb69..8841a2e 100644
--- a/gpp.1
+++ b/gpp.1
@@ -85,7 +85,10 @@ or
for lowercase.
Everything that is not a preprocessing directive is
-echo verbatim.
+echo verbatim, except all
+.B @@
+are replaced by
+.BR @ .
diff --git a/gpp.c b/gpp.c
index 569ae76..bc466af 100644
--- a/gpp.c
+++ b/gpp.c
@@ -1,8 +1,12 @@
/* See LICENSE file for copyright and license details. */
+#include <sys/wait.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -87,6 +91,29 @@ xcreate(const char *path)
+static void
+append(char **restrict out_datap, size_t *restrict out_lenp, size_t *restrict out_sizep, const char *fmt, ...)
+ va_list ap, ap2;
+ size_t len;
+ va_start(ap, fmt);
+ va_copy(ap2, ap);
+ len = (size_t)vsnprintf(NULL, 0, fmt, ap2);
+ va_end(ap2);
+ if (*out_lenp + len + 1 > *out_sizep) {
+ *out_sizep = *out_lenp + len + 1;
+ *out_datap = realloc(*out_datap, *out_sizep);
+ if (!*out_datap) {
+ fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno));
+ exit(1);
+ }
+ }
+ vsprintf(&(*out_datap)[*out_lenp], fmt, ap);
+ *out_lenp += len;
+ va_end(ap);
main(int argc, char *argv[])
@@ -94,6 +121,7 @@ main(int argc, char *argv[])
const char *input_file = NULL;
const char *output_file = NULL;
const char *symbol = NULL;
+ size_t symlen = 1;
int iterations = -1;
int unshebang = 0;
long int tmp;
@@ -101,8 +129,16 @@ main(int argc, char *argv[])
char *in_data = NULL, *out_data = NULL;
size_t in_size = 0, in_len = 0, in_off = 0;
size_t out_size = 0, out_len = 0, out_off = 0;
- int in_fd, out_fd, do_close;
+ int in_fd, out_fd, do_close, fds_in[2], fds_out[2];
+ struct pollfd pfds[2];
+ size_t npfds;
+ char buffer[4096], c, *quotes = NULL, quote;
+ size_t brackets, nquotes, quotes_size = 0;
+ int symb, esc, dollar;
+ size_t len, j, lineno, no = 0, cnt;
+ int i, n, status, state, entered;
ssize_t r;
+ pid_t pid;
case 'D':
@@ -154,6 +190,9 @@ main(int argc, char *argv[])
if (symbol)
symbol = EARGF(usage());
+ symlen = strlen(symbol);
+ if (!symlen)
+ usage();
case 'u':
if (unshebang == 2)
@@ -164,8 +203,11 @@ main(int argc, char *argv[])
- if (argc)
+ if (argc) {
+ if (!**argv)
+ usage();
shell = (void *)argv;
+ }
if (setenv("_GPP", argv0, 1))
fprintf(stderr, "%s: setenv _GPP %s 1: %s\n", argv0, argv0, strerror(errno));
@@ -217,17 +259,289 @@ main(int argc, char *argv[])
while (iterations--) {
- /* TODO parse: in -> out */
+ entered = 0;
+ state = 0;
+ lineno = 0;
+ brackets = nquotes = 0;
+ symb = esc = dollar = 0;
+ while (in_off < in_len) {
+ if (state == 2) {
+ preprocess:
+ c = in_data[in_off++];
+ if (c == '\n') {
+ state = 0;
+ brackets = nquotes = 0;
+ symb = esc = dollar = 0;
+ append(&out_data, &out_len, &out_size, "'\n");
+ } else if (brackets) {
+ if (esc) {
+ esc = 0;
+ } else if (nquotes) {
+ if (dollar) {
+ dollar = 0;
+ if (c == '(') {
+ quote = ')';
+ goto add_to_quotes;
+ } else if (c == '{') {
+ quote = '}';
+ goto add_to_quotes;
+ }
+ } else if (c == quotes[nquotes - 1]) {
+ nquotes -= 1;
+ } else if ((quotes[nquotes - 1] == ')' || quotes[nquotes - 1] == '}') &&
+ (c == '"' || c == '\'' || c == '`')) {
+ quote = c;
+ goto add_to_quotes;
+ } else if (c == '\\' && quotes[nquotes - 1] != '\'') {
+ esc = 1;
+ } else if (c == '$') {
+ dollar = 1;
+ }
+ } else if (c == '"' || c == '\'' || c == '`') {
+ quote = c;
+ add_to_quotes:
+ if (nquotes == quotes_size) {
+ quotes = realloc(quotes, quotes_size += 1);
+ if (!quotes) {
+ fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ }
+ quotes[nquotes++] = quote;
+ } else if (c == ')' || c == '}') {
+ if (!--brackets) {
+ append(&out_data, &out_len, &out_size, "%c\"'", c);
+ continue;
+ }
+ } else if (c == '(' || c == '}') {
+ brackets += 1;
+ } else if (c == '\\') {
+ esc = 1;
+ }
+ append(&out_data, &out_len, &out_size, "%c", c);
+ } else if (c == *symbol && in_len - (in_off - 1) >= symlen &&
+ !memcmp(&in_data[in_off - 1], symbol, symlen)) {
+ if (symb)
+ append(&out_data, &out_len, &out_size, "%s", symbol);
+ symb ^= 1;
+ in_off += symlen - 1;
+ } else if (symb) {
+ symb = 0;
+ if (c == '(' || c == '{') {
+ brackets += 1;
+ append(&out_data, &out_len, &out_size, "'\"$%c", c);
+ } else if (c == '\'') {
+ append(&out_data, &out_len, &out_size, "%s'\\''", symbol);
+ } else {
+ append(&out_data, &out_len, &out_size, "%s%c", symbol, c);
+ }
+ } else if (c == '\'') {
+ append(&out_data, &out_len, &out_size, "'\\''");
+ } else {
+ if (out_len == out_size) {
+ out_data = realloc(out_data, out_size += 4096);
+ if (!out_data) {
+ fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ } else {
+ if ((out_data[out_len++] = c) == '\n')
+ state = 0;
+ }
+ }
+ } else if (state == 1) {
+ append_char:
+ if (out_len == out_size) {
+ out_data = realloc(out_data, out_size += 4096);
+ if (!out_data) {
+ fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ } else {
+ if ((out_data[out_len++] = in_data[in_off++]) == '\n')
+ state = 0;
+ }
+ } else {
+ lineno += 1;
+ if (in_len - in_off > symlen && !memcmp(&in_data[in_off], symbol, symlen) &&
+ (in_data[in_off + symlen] == '<' || in_data[in_off + symlen] == '>')) {
+ state = 1;
+ entered = in_data[in_off + symlen] == '<';
+ in_off += symlen + 1;
+ } else if (entered) {
+ goto append_char;
+ } else {
+ append(&out_data, &out_len, &out_size, "printf '\\000%%zu\\000%%s\\n' %zu '", lineno);
+ state = 2;
+ goto preprocess;
+ }
+ }
+ }
+ if (state == 2)
+ append(&out_data, &out_len, &out_size, "'");
in_len = 0;
in_off = 0;
- /* TODO shell: out -> in */
+ if (pipe(fds_in) || pipe(fds_out)) {
+ fprintf(stderr, "%s: pipe: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ pid = fork();
+ switch (pid) {
+ case -1:
+ fprintf(stderr, "%s: fork: %s\n", argv0, strerror(errno));
+ return 1;
+ case 0:
+ close(fds_in[1]);
+ close(fds_out[0]);
+ if (dup2(fds_in[0], STDIN_FILENO) != STDIN_FILENO) {
+ fprintf(stderr, "%s: dup2 <pipe> STDIN_FILENO: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ if (dup2(fds_out[1], STDOUT_FILENO) != STDOUT_FILENO) {
+ fprintf(stderr, "%s: dup2 <pipe> STDOUT_FILENO: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ close(fds_in[0]);
+ close(fds_out[1]);
+ execvp(*shell, (void *)shell);
+ fprintf(stderr, "%s: execvp %s: %s\n", argv0, *shell, strerror(errno));
+ return 1;
+ default:
+ close(fds_in[0]);
+ close(fds_out[1]);
+ break;
+ }
+ pfds[0].fd = fds_in[1];
+ pfds[0].events = POLLOUT;
+ pfds[1].fd = fds_out[0];
+ pfds[1].events = POLLIN;
+ npfds = 2;
+ lineno = 1;
+ state = 0;
+ while (npfds) {
+ n = poll(pfds, npfds, -1);
+ if (n < 0) {
+ fprintf(stderr, "%s: poll: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ for (i = 0; i < n; i++) {
+ if (!pfds[i].revents)
+ continue;
+ if (pfds[i].fd == fds_in[1]) {
+ if (out_off == out_len) {
+ if (close(fds_in[1])) {
+ fprintf(stderr, "%s: write <pipe>: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ pfds[i] = pfds[--npfds];
+ continue;
+ }
+ r = write(fds_in[1], &out_data[out_off], out_len - out_off);
+ if (r <= 0) {
+ fprintf(stderr, "%s: write <pipe>: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ out_off += (size_t)r;
+ } else {
+ r = read(fds_out[0], buffer, sizeof(buffer));
+ if (r <= 0) {
+ if (r < 0 || close(fds_out[0])) {
+ fprintf(stderr, "%s: read <pipe>: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ pfds[i] = pfds[--npfds];
+ continue;
+ }
+ len = (size_t)r;
+ for (j = 0; j < len; j++) {
+ switch (state) {
+ case 0:
+ no = 0;
+ if (!buffer[j]) {
+ state = 1;
+ } else {
+ state = 3;
+ goto state3;
+ }
+ break;
+ case 1:
+ if (isdigit(buffer[j])) {
+ if (buffer[j] == '0') {
+ append(&in_data, &in_len, &in_size, "%c", 0);
+ state = 3;
+ goto state3;
+ }
+ state = 2;
+ }
+ /* fall through */
+ case 2:
+ if (isdigit(buffer[j])) {
+ if (buffer[j] > (SIZE_MAX - (buffer[j] & 15)) / 10) {
+ append(&in_data, &in_len, &in_size, "%c%zu", 0, no);
+ state = 3;
+ goto state3;
+ }
+ no = no * 10 + (buffer[j] & 15);
+ } else if (!buffer[j]) {
+ if (no > lineno) {
+ cnt = no - lineno;
+ lineno = no;
+ if (in_len + cnt > in_size) {
+ in_size = in_len + cnt;
+ in_data = realloc(in_data, in_size);
+ if (!in_data) {
+ fprintf(stderr, "%s: realloc: %s\n",
+ argv0, strerror(errno));
+ return 1;
+ }
+ }
+ while (cnt--)
+ in_data[in_len++] = '\n';
+ }
+ state = 3;
+ } else {
+ append(&in_data, &in_len, &in_size, "%c%zu", 0, no);
+ state = 3;
+ goto state3;
+ }
+ break;
+ default:
+ state3:
+ if (in_len == in_size) {
+ in_size += 4096;
+ in_data = realloc(in_data, in_size);
+ if (!in_data) {
+ fprintf(stderr, "%s: realloc: %s\n", argv0, strerror(errno));
+ return 1;
+ }
+ }
+ in_data[in_len++] = buffer[j];
+ if (buffer[j] == '\n') {
+ lineno += 1;
+ state = 0;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (waitpid(pid, &status, 0) != pid) {
+ fprintf(stderr, "%s: waitpid %s <&status> 0: %s\n", argv0, *shell, strerror(errno));
+ return 1;
+ }
+ if (status)
+ return WIFEXITED(status) ? WEXITSTATUS(status) : 1;
out_len = 0;
out_off = 0;
+ in_off = 0;
+ free(quotes);
out_fd = xcreate(output_file);
while (in_off < in_len) {
diff --git a/gpp.py b/gpp.py
deleted file mode 100755
index 68a317f..0000000
--- a/gpp.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# -*- coding: utf-8 -*-
-import os
-import sys
-import shlex
-from subprocess import Popen, PIPE
-if sys.version_info.major < 3:
- def bytes(string):
- r = bytearray(len(string))
- b = buffer(r)
- r[:] = string
- return r
-if sys.version_info.major < 3:
- def bytelist(string):
- return [ord(c) for c in string]
- bytelist = list
-symbol = '@'
-encoding = sys.getdefaultencoding()
-iterations = 1
-input_file = '/dev/stdin'
-output_file = '/dev/stdout'
-unshebang = 0
-args = sys.argv
-# If the file is executed, the first command line argument will be the first argument in the shebang,
-# the second will be the rest of the arguments in the command line as is and the third and final will
-# be the executed file.
-if len(args) == 3:
- if os.path.exists(args[2]) and os.path.isfile(args[2]) and ((os.stat(args[2]).st_mode & 0o111) != 0):
- args[1 : 2] = shlex.split(args[1])
-for i in range(1, len(args)):
- arg = args[i]
- i += 1
- if arg in ('-s', '--symbol'): symbol = sys.argv[i]
- elif arg in ('-e', '--encoding'): encoding = sys.argv[i]
- elif arg in ('-n', '--iterations'): iterations = int(sys.argv[i])
- elif arg in ('-u', '--unshebang'): unshebang += 1; continue
- elif arg in ('-i', '--input'): input_file = sys.argv[i]
- elif arg in ('-o', '--output'): output_file = sys.argv[i]
- elif arg in ('-f', '--file'):
- input_file = sys.argv[i]
- output_file = sys.argv[i]
- elif arg in ('-D', '--export'):
- export = sys.argv[i]
- if '=' not in export:
- export += '=1'
- export = (export.split('=')[0], '='.join(export.split('=')[1:]))
- os.putenv(export[0], export[1])
- else:
- continue
- i += 1
-if input_file == '-': input_file = '/dev/stdin'
-if output_file == '-': output_file = '/dev/stdout'
-symbol = bytelist(symbol.encode(encoding))
-symlen = len(symbol)
-if iterations < 1:
- if input_file != output_file:
- data = None
- with open(input_file, 'rb') as file:
- data = file.read()
- with open(output_file, 'wb') as file:
- file.write(data)
- file.flush()
- sys.exit(0)
-def linesplit(bs):
- rc = []
- elem = []
- for b in bs:
- if b == 10:
- rc.append(elem)
- elem = []
- else:
- elem.append(b)
- rc.append(elem)
- return rc
-def linejoin(bss):
- rc = []
- if len(bss) > 0:
- rc += bss[0]
- for bs in bss[1:]:
- rc.append(10)
- rc += bs
- return rc
-data = None
-with open(input_file, 'rb') as file:
- data = file.read()
-data = linesplit(bytelist(data))
-if unshebang == 1:
- if (len(data[0]) >= 2) and (data[0][0] == ord('#')) and (data[0][1] == ord('!')):
- data[0] = []
-if unshebang >= 2:
- if (len(data[0]) >= 2) and (data[0][0] == ord('#')) and (data[0][1] == ord('!')):
- data[0] = data[1]
- data[1] = []
-def pp(line):
- rc = []
- symb = False
- brackets = 0
- esc = False
- dollar = False
- quote = []
- n = len(line)
- i = 0
- rc.append(ord('\''))
- while i < n:
- c = line[i]
- i += 1
- if brackets > 0:
- if esc:
- esc = False
- elif len(quote) > 0:
- if dollar:
- dollar = False
- if c == ord('('):
- quote.append(ord(')'))
- elif c == ord('{'):
- quote.append(ord('}'))
- elif c == quote[-1]:
- quote[:] = quote[:-1]
- elif (quote[-1] in (ord(')'), ord('}'))) and (c in (ord('"'), ord('\''), ord('`'))):
- quote.append(c)
- elif (c == ord('\\')) and (quote[-1] != ord('\'')):
- esc = True
- elif c == ord('$'):
- dollar = True
- elif c in (ord('"'), ord('\''), ord('`')):
- quote.append(c)
- elif c in (ord(')'), ord('}')):
- brackets -= 1
- if brackets == 0:
- rc.append(c)
- rc.append(ord('"'))
- rc.append(ord('\''))
- continue
- elif c in (ord('('), ord('{')):
- brackets += 1
- elif c == ord('\\'):
- esc = True
- rc.append(c)
- elif line[i - 1 : i + symlen - 1] == symbol:
- if symb:
- rc += symbol
- symb = not symb
- i += symlen - 1
- elif symb:
- symb = False
- if c in (ord('('), ord('{')):
- brackets += 1
- rc.append(ord('\''))
- rc.append(ord('"'))
- rc.append(ord('$'))
- else:
- rc += symbol
- if c == ord('\''):
- rc.append(c)
- rc.append(ord('\\'))
- rc.append(c)
- rc.append(c)
- elif c == ord('\''):
- rc.append(c)
- rc.append(ord('\\'))
- rc.append(c)
- rc.append(c)
- else:
- rc.append(c)
- rc.append(ord('\''))
- return rc
-for _ in range(iterations):
- entered = False
- bashed = []
- for lineno in range(len(data)):
- line = data[lineno]
- if (len(line) > symlen) and (line[:symlen] == symbol) and (line[symlen] in (ord('<'), ord('>'))):
- bashed.append(line[symlen + 1:])
- entered = line[symlen] == ord('<')
- elif entered:
- bashed.append(line)
- else:
- buf = bytelist(('echo $\'\\e%i\\e\'' % lineno).encode())
- bashed.append(buf + pp(line))
- bashed = bytes(linejoin(bashed))
- bash = Popen(["bash"], stdin = PIPE, stdout = PIPE, stderr = sys.stderr)
- bashed = bash.communicate(bashed)[0]
- if bash.returncode != 0:
- sys.exit(bash.returncode)
- bashed = linesplit(bytelist(bashed))
- data = []
- lineno = -1
- for line in bashed:
- no = -1
- if (len(line) > 0) and (line[0] == 0o33):
- no = 0
- for i in range(1, len(line)):
- if line[i] == 0o33:
- line = line[i + 1:]
- break
- no = no * 10 + (line[i] - ord('0'))
- if no > lineno:
- while no != lineno + 1:
- data.append([])
- lineno += 1
- data.append(line)
- lineno += 1
-data = bytes(linejoin(data))
-with open(output_file, 'wb') as file:
- file.write(data)
- file.flush()