aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Makefile15
-rw-r--r--README19
-rw-r--r--apsh.c719
-rw-r--r--common.h156
-rw-r--r--config.mk2
-rw-r--r--parser.c271
-rw-r--r--preparser.c51
-rw-r--r--tokeniser.c419
8 files changed, 952 insertions, 700 deletions
diff --git a/Makefile b/Makefile
index 16394bb..696478c 100644
--- a/Makefile
+++ b/Makefile
@@ -4,9 +4,13 @@ CONFIGFILE = config.mk
include $(CONFIGFILE)
OBJ =\
- apsh.o
+ apsh.o\
+ preparser.o\
+ tokeniser.o\
+ parser.o
HDR =\
+ common.h\
config.h
all: apsh
@@ -18,10 +22,17 @@ $(OBJ): $(@:.o=.c) $(HDR)
apsh: $(OBJ)
$(CC) -o $@ $(OBJ) $(LDFLAGS)
+install: apsh
+ mkdir -p -- "$(DESTDIR)$(PREFIX)/bin/"
+ cp -- apsh "$(DESTDIR)$(PREFIX)/bin/"
+
+uninstall:
+ -rm -f -- "$(DESTDIR)$(PREFIX)/bin/apsh"
+
clean:
-rm -f -- *.o *.su apsh
.SUFFIXES:
.SUFFIXES: .o .c
-.PHONY: all clean
+.PHONY: all install uninstall clean
diff --git a/README b/README
index aa00934..f8505c1 100644
--- a/README
+++ b/README
@@ -1,2 +1,21 @@
NAME
apsh — advanced piping shell
+
+DESCRIPTION
+ apsh is a shell designed to give the user the ability
+ to create advanced pipelines. To this end, all forks
+ apsh makes of itself share exported and unexported
+ variables, with the exception of when the ( ) syntax
+ is used to fork the shell, in which case they are
+ inherited but unshared.
+
+ apsh has support for <( ) and >( ), as well as <>( )
+ which creates a socket instead of a pipe and connects
+ both ends. Similarly <>| is like |, except it creates
+ a bidirectional socket instead of a pipe. apsh also
+ lets the user create pipes and sockets before then
+ are used.
+
+ Additionally if ( ) or (( )) is used as an argument
+ in to a command, the code is formatted and parsed
+ the as a string to the command as that argument.
diff --git a/apsh.c b/apsh.c
index f732b20..4e7183b 100644
--- a/apsh.c
+++ b/apsh.c
@@ -1,709 +1,32 @@
/* See LICENSE file for copyright and license details. */
-#include <libsimple.h>
-#include <libsimple-arg.h>
-#include "config.h"
+#include "common.h"
USAGE("");
-enum argument_type {
- VERBATIM,
- ESCAPED,
- SPECIAL,
- FUNCTION_MARK,
- SUBSHELL_INPUT, /* >(...) */
- SUBSHELL_OUTPUT, /* <(...) */
- SUBSHELL_INPUT_OUTPUT, /* <>(...) ## create socket for both input and output of subshell */
- SUBSHELL_SUBSTITUTION,
- SUBSHELL, /* (...) or ((...)) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */
- REDIRECTION /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */
-};
-enum redirection_type {
- REDIRECT_INPUT,
- REDIRECT_INPUT_TO_FD,
- REDIRECT_OUTPUT,
- REDIRECT_OUTPUT_APPEND,
- REDIRECT_OUTPUT_CLOBBERING,
- REDIRECT_OUTPUT_TO_FD,
- REDIRECT_INPUT_OUTPUT,
- REDIRECT_INPUT_OUTPUT_TO_FD,
- HERE_STRING,
- HERE_DOCUMENT,
- HERE_DOCUMENT_INDENTED
-};
-
-enum command_terminal {
- DOUBLE_SEMICOLON,
- SEMICOLON,
- AMPERSAND,
- SOCKET_PIPE,
- PIPE,
- PIPE_AMPERSAND,
- AND,
- OR
-};
-
-enum shell_terminator {
- END_OF_FILE,
- ROUND,
- ROUND_ROUND,
- SQUARE,
- BACKTICK,
-};
-
-struct parser_state;
-
-struct argument {
- enum argument_type type;
- union {
- struct { /* VERBATIM, ESCAPED */
- char *text;
- size_t length;
- };
- char symbol; /* SPECIAL */
- struct parser_state *root; /* SUBSHELL, SUBSHELL_* */
- }; /* none for FUNCTION_MARK, REDIRECTION */
- struct argument *next_part;
-};
-
-struct redirection {
- enum redirection_type type;
- struct argument *left_hand_side;
-};
-
-struct command {
- enum command_terminal terminal;
- struct argument **arguments;
- size_t narguments;
- struct redirection **redirections;
- size_t nredirections;
-};
-
-struct parser_state {
- struct parser_state *parent;
- struct command **commands;
- size_t ncommands;
- struct argument **arguments;
- size_t narguments;
- struct redirection **redirections;
- size_t nredirections;
- struct argument *current_argument;
- struct argument *current_argument_end;
- enum shell_terminator exit_on;
- char at_dollar;
- char is_expr_shell;
- char need_right_hand_side;
-};
-
-struct here_document {
- struct redirection *redirection;
- struct argument *argument;
- struct here_document *next;
-};
-
-static size_t line_number = 1;
-static int tty_input = 0;
-
-static struct parser_state *state;
-static struct here_document *here_documents_first = NULL;
-static struct here_document **here_documents_next = &here_documents_first;
-
-static void flush_dollar(void);
-static void verbatim(const char *text, size_t text_length, int from_quote);
-
-static void
-whitespace(int strict)
-{
- flush_dollar();
-
- if (state->need_right_hand_side) {
- if (strict)
- eprintf("premature end of command\n");
- return;
- }
-
- if (state->current_argument) {
- state->arguments = erealloc(state->arguments, (state->narguments + 1) * sizeof(*state->arguments));
- state->arguments[state->narguments++] = state->current_argument;
- state->current_argument = NULL;
- state->current_argument_end = NULL;
- }
-}
-
-static void
-terminate_command(enum command_terminal terminal)
-{
- whitespace(1);
-
- state->commands = erealloc(state->commands, (state->ncommands + 1) * sizeof(*state->commands));
- state->commands[state->ncommands] = ecalloc(1, sizeof(**state->commands));
- state->commands[state->ncommands]->terminal = terminal;
- state->commands[state->ncommands]->arguments = state->arguments;
- state->commands[state->ncommands]->narguments = state->narguments;
- state->commands[state->ncommands]->redirections = state->redirections;
- state->commands[state->ncommands]->nredirections = state->nredirections;
- state->ncommands += 1;
- state->arguments = NULL;
- state->narguments = 0;
- state->redirections = NULL;
- state->nredirections = 0;
-
- if (!state->parent) {
- if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) {
- /* TODO unless in a special construct such as while, case, for, if, or {, run and clear
- * also require that any here-document is specified (count them and run when given)
- */
- }
- }
-}
-
-static void
-semicolon(int maybe)
-{
- if (!maybe || state->narguments)
- terminate_command(SEMICOLON);
-}
-
-static void
-end_subshell(void)
-{
- semicolon(1);
- /* TODO validate subshell content */
- state = state->parent;
-}
-
-static void
-add_redirection(enum redirection_type type)
-{
- state->redirections = erealloc(state->redirections, (state->nredirections + 1) * sizeof(*state->redirections));
- state->redirections[state->nredirections] = ecalloc(1, sizeof(**state->redirections));
- state->redirections[state->nredirections]->type = type;
- if (state->current_argument) {
- if (state->current_argument->type == REDIRECTION) {
- whitespace(1);
- } else {
- state->redirections[state->nredirections]->left_hand_side = state->current_argument;
- state->current_argument = NULL;
- state->current_argument_end = NULL;
- }
- }
- state->current_argument_end = state->current_argument = calloc(1, sizeof(*state->current_argument));
- state->current_argument_end->type = REDIRECTION;
- if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) {
- *here_documents_next = emalloc(sizeof(**here_documents_next));
- (*here_documents_next)->redirection = state->redirections[state->nredirections];
- (*here_documents_next)->argument = state->current_argument;
- (*here_documents_next)->next = NULL;
- here_documents_next = &(*here_documents_next)->next;
- }
- state->nredirections += 1;
- state->need_right_hand_side = 1;
-}
-
-static void
-add_shell_io(enum argument_type type, enum shell_terminator exit_on)
-{
- struct parser_state *new_state;
-
- state->need_right_hand_side = 0;
-
- if (!state->current_argument_end)
- state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument));
- else
- state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument));
-
- new_state = ecalloc(1, sizeof(*new_state));
- new_state->parent = state;
- new_state->exit_on = exit_on;
-
- state->current_argument_end->type = type;
- state->current_argument_end->root = state;
-
- state = new_state;
-}
-
-static void
-add_function_mark(void)
+void
+initialise_parser_context(struct parser_context *ctx)
{
- whitespace(1);
- if (!state->current_argument_end)
- state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument));
- else
- state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument));
- state->current_argument_end->type = FUNCTION_MARK;
- whitespace(1);
-}
-
-static void
-parse_symbol(const char *token, size_t token_length)
-{
- struct parser_state *old_state = state;
-
- while (token_length) {
- if (state->at_dollar) {
- state->at_dollar = 0;
- if (token_length >= 2 && token[0] == '(' && token[1] == '(') {
- add_shell_io(SUBSHELL_SUBSTITUTION, ROUND_ROUND);
- state->is_expr_shell = 1;
- token = &token[2];
- token_length -= 2;
- } else if (token_length >= 1 && token[0] == '(') {
- add_shell_io(SUBSHELL_SUBSTITUTION, ROUND);
- token = &token[1];
- token_length -= 1;
- } else if (token_length >= 1 && token[0] == '[') {
- add_shell_io(SUBSHELL_SUBSTITUTION, SQUARE);
- state->is_expr_shell = 1;
- token = &token[1];
- token_length -= 1;
- } else if (token_length >= 1 && token[0] == '{') { /* TODO */
- token = &token[1];
- token_length -= 1;
- } else {
- state->at_dollar = 1;
- flush_dollar();
- continue;
- }
- }
-
- if (token_length >= 3 && token[0] == '<' && token[1] == '<' && token[2] == '<') {
- add_redirection(HERE_STRING);
- token = &token[3];
- token_length -= 3;
-
- } else if (token_length >= 3 && token[0] == '<' && token[1] == '<' && token[2] == '-') {
- add_redirection(HERE_DOCUMENT_INDENTED);
- token = &token[3];
- token_length -= 3;
-
- } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '(') {
- add_shell_io(SUBSHELL_INPUT_OUTPUT, ROUND);
- token = &token[3];
- token_length -= 3;
-
- } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '|') {
- terminate_command(SOCKET_PIPE);
- token = &token[3];
- token_length -= 3;
-
- } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '&') {
- add_redirection(REDIRECT_INPUT_OUTPUT_TO_FD);
- token = &token[3];
- token_length -= 3;
-
- } else if (token_length >= 2 && token[0] == ')' && token[1] == ')') {
- if (state->exit_on == ROUND_ROUND)
- end_subshell();
- else
- eprintf("stray )) at line %zu\n", line_number);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '(' && token[1] == ')') {
- add_function_mark();
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '(' && token[1] == '(') {
- add_shell_io(SUBSHELL, ROUND_ROUND);
- state->is_expr_shell = 1;
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == ';' && token[1] == ';') {
- terminate_command(DOUBLE_SEMICOLON);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '<' && token[1] == '(') {
- add_shell_io(SUBSHELL_OUTPUT, ROUND);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '<' && token[1] == '<') {
- add_redirection(HERE_DOCUMENT);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '<' && token[1] == '>') {
- add_redirection(REDIRECT_INPUT_OUTPUT);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '<' && token[1] == '&') {
- add_redirection(REDIRECT_INPUT_TO_FD);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '>' && token[1] == '(') {
- add_shell_io(SUBSHELL_INPUT, ROUND);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '>' && token[1] == '>') {
- add_redirection(REDIRECT_OUTPUT_APPEND);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '>' && token[1] == '&') {
- add_redirection(REDIRECT_OUTPUT_TO_FD);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '>' && token[1] == '|') {
- add_redirection(REDIRECT_OUTPUT_CLOBBERING);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '|' && token[1] == '&') {
- terminate_command(PIPE_AMPERSAND);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '|' && token[1] == '|') {
- terminate_command(OR);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 2 && token[0] == '&' && token[1] == '&') {
- terminate_command(AND);
- token = &token[2];
- token_length -= 2;
-
- } else if (token_length >= 1 && token[0] == ')') {
- if (state->exit_on == ROUND)
- end_subshell();
- else
- eprintf("stray ) at line %zu\n", line_number);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == ']') {
- if (state->exit_on == SQUARE)
- end_subshell();
- else
- verbatim(token, 1, 0);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == '(') {
- add_shell_io(SUBSHELL, ROUND);
- state->is_expr_shell = old_state->is_expr_shell;
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == ';') {
- semicolon(0);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == '<') {
- add_redirection(REDIRECT_INPUT);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == '>') {
- add_redirection(REDIRECT_OUTPUT);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == '|') {
- terminate_command(PIPE);
- token = &token[1];
- token_length -= 1;
-
- } else if (token_length >= 1 && token[0] == '&') {
- terminate_command(AMPERSAND);
- token = &token[1];
- token_length -= 1;
-
- } else {
- verbatim(token, 1, 0);
- token = &token[1];
- token_length -= 1;
- }
- }
-}
-
-static void
-symbol(char *token, size_t token_length, size_t escaped_newlines)
-{
- size_t new_length, r, w;
- if (escaped_newlines) {
- r = w = 0;
- new_length = token_length - 2 * escaped_newlines;
- while (escaped_newlines--) {
- if (token[r] == '\\')
- r += 2;
- else
- token[w++] = token[r++];
- }
- memcpy(&token[w], &token[r], token_length - r);
- token_length = new_length;
- }
- parse_symbol(token, token_length);
-}
-
-static void
-backtick(void)
-{
- flush_dollar();
- if (state->exit_on == BACKTICK)
- end_subshell();
- else
- add_shell_io(SUBSHELL_SUBSTITUTION, BACKTICK);
-}
-
-static void
-double_quote(void)
-{
- flush_dollar();
- /* TODO */
-}
-
-static void
-verbatim(const char *text, size_t text_length, int from_quote)
-{
- struct argument *argend;
-
- state->need_right_hand_side = 0;
-
- if (from_quote && state->at_dollar) {
- state->at_dollar = 0;
- if (!state->current_argument_end)
- state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument));
- else
- state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument));
- state->current_argument_end->type = ESCAPED;
- } else {
- flush_dollar();
- if (!state->current_argument_end) {
- state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument));
- state->current_argument_end->type = VERBATIM;
- } else if (state->current_argument_end->type != VERBATIM) {
- state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument));
- state->current_argument_end->type = VERBATIM;
- }
- }
-
- argend = state->current_argument_end;
-
- argend->text = erealloc(argend->text, argend->length + text_length + 1);
- memcpy(&argend->text[argend->length], text, text_length);
- argend->length += text_length;
- argend->text[argend->length] = '\0';
-}
-
-static void
-flush_dollar(void)
-{
- if (state->at_dollar) {
- state->at_dollar = 0;
- verbatim("$", 1, 0);
- }
-}
-
-static void
-append_special(char symbol)
-{
- state->need_right_hand_side = 0;
-
- if (!state->current_argument_end)
- state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument));
- else
- state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument));
-
- state->current_argument_end->type = SPECIAL;
- state->current_argument_end->symbol = symbol;
-}
-
-static void
-unverbatim(const char *text, size_t text_length)
-{
- size_t verbatim_length;
-
- /* TODO handle state->dollar */
-
- while (text_length) {
- for (verbatim_length = 0; verbatim_length < text_length; verbatim_length++)
- if (*text == '*' || *text == '?' || *text == '[' || *text == ']' ||
- *text == ',' || *text == '.' || *text == '{' || *text == '}' ||
- *text == '~' || *text == '!' || *text == '=')
- break;
- if (verbatim_length) {
- verbatim(text, verbatim_length, 0);
- text = &text[verbatim_length];
- text_length -= verbatim_length;
- } else {
- append_special(*text);
- text = &text[1];
- text_length -= 1;
- }
- }
-}
-
-static void
-dollar(void)
-{
- /* TODO forbid $ if giving argument to here-document */
- if (state->at_dollar)
- unverbatim("$", 1);
- else
- state->at_dollar = 1;
-}
-
-static int
-end_of_file(void)
-{
- semicolon(1);
- return !(state->parent || state->ncommands);
-}
-
-static size_t
-parse(char *code, size_t code_len, int end_of_file_reached)
-{
-#define IS_SYMBOL(C)\
- ((!state->is_expr_shell && (\
- (C) == '|' || (C) == '&' || (C) == ';' || \
- (C) == '<' || (C) == '>' || (C) == '-')) || \
- (C) == '{' || (C) == '}' || \
- (C) == '(' || (C) == ')' || \
- (C) == '[' || (C) == ']')
-
- static int she_is_comment = 1;
- static int in_comment = 0;
- static int at_line_beginning = 1;
-
- size_t read_bytes = 0;
- size_t token_len;
- size_t new_lines;
-
- for (; read_bytes < code_len; read_bytes += token_len, code = &code[token_len]) {
- if (at_line_beginning) {
- if (here_documents_first) {
- /* TODO read until terminator, remove indentation if <<- and then parse in "-mode but accept " */
- }
- at_line_beginning = 0;
- }
-
- if (in_comment) {
- if (*code == '\n') {
- in_comment = 0;
- } else {
- token_len = 1;
- continue;
- }
- }
-
- if (*code == '\0') {
- if (!tty_input)
- weprintf("ignoring NUL byte at line %zu\n", line_number);
-
- } else if (*code == '\n') {
- line_number += 1;
- she_is_comment = 1;
- whitespace(0);
- semicolon(1);
- token_len = 1;
- at_line_beginning = 1;
-
- } else if (isspace(*code)) {
- she_is_comment = 1;
- whitespace(0);
- for (token_len = 1; token_len < code_len - read_bytes; token_len++)
- if (!isspace(code[token_len]) || code[token_len] == '\n')
- break;
-
- } else if (*code == '#' && she_is_comment) {
- in_comment = 1;
- token_len = 1;
-
- } else if (IS_SYMBOL(*code)) {
- she_is_comment = 1;
- new_lines = 0;
- for (token_len = 1; token_len < code_len - read_bytes; token_len++) {
- if (code[token_len] == '\\' && token_len + 1 < code_len - read_bytes && code[token_len] == '\n') {
- new_lines += 1;
- } else if (!IS_SYMBOL(code[token_len])) {
- symbol(code, token_len, new_lines);
- line_number += new_lines;
- goto next;
- }
- }
- if (end_of_file_reached) {
- symbol(code, token_len, new_lines);
- line_number += new_lines;
- } else {
- break;
- }
-
- } else if (*code == '\\') {
- she_is_comment = 0;
- if (code_len - read_bytes < 2)
- break;
- token_len = 2;
- if (code[1] == '\n')
- line_number += 1;
- else
- verbatim(&code[1], 1, 0);
-
- } else if (*code == '$') {
- she_is_comment = 0;
- dollar();
- token_len = 1;
-
- } else if (*code == '`') {
- she_is_comment = 1;
- backtick();
- token_len = 1;
-
- } else if (*code == '"') {
- she_is_comment = 0;
- double_quote();
-
- } else if (*code == '\'') {
- she_is_comment = 0;
- new_lines = 0;
- for (token_len = 1; token_len < code_len - read_bytes; token_len++) {
- if (code[token_len] == '\'') {
- token_len += 1;
- if (!state->at_dollar || code[token_len - 2] != '\\') {
- verbatim(&code[1], token_len - 2, 1);
- line_number += new_lines;
- goto next;
- }
- } else if (code[token_len] == '\n') {
- new_lines += 1;
- }
- }
- break;
-
- } else {
- she_is_comment = 0;
- for (token_len = 1; token_len < code_len - read_bytes; token_len++)
- if (isspace(*code) || IS_SYMBOL(*code) || *code == '\\' ||
- *code == '$' || *code == '`' || *code == '"' || *code == '\'')
- break;
- unverbatim(code, token_len);
- }
-
- next:;
- }
-
- return read_bytes;
-
-#undef IS_SYMBOL
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->preparser_line_number = 1;
+ ctx->tokeniser_line_number = 1;
+ ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack));
+ ctx->mode_stack->mode = NORMAL_MODE;
+ ctx->mode_stack->she_is_comment = 1;
+ ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state));
+ ctx->here_documents_next = &ctx->here_documents_first;
}
int
main(int argc, char *argv[])
{
+ struct parser_context ctx;
char *buffer = NULL;
size_t buffer_size = 0;
size_t buffer_head = 0;
size_t buffer_tail = 0;
ssize_t r;
- size_t n;
+ size_t n, nremoved;
ARGBEGIN {
default:
@@ -713,12 +36,11 @@ main(int argc, char *argv[])
if (argc)
usage();
- tty_input = isatty(STDIN_FILENO);
- if (tty_input)
+ initialise_parser_context(&ctx);
+ ctx.tty_input = isatty(STDIN_FILENO);
+ if (ctx.tty_input)
weprintf("apsh is currently not implemented to be interactive\n");
- state = ecalloc(1, sizeof(*state));
-
for (;;) {
if (buffer_size - buffer_head < PARSE_RINGBUFFER_MIN_AVAILABLE) {
if (buffer_tail && buffer_head - buffer_tail <= buffer_tail) {
@@ -739,11 +61,14 @@ main(int argc, char *argv[])
n = (size_t)r;
buffer_head += n;
- buffer_tail += parse(&buffer[buffer_tail], buffer_head - buffer_tail, 0);
+ buffer_tail += n = parse(&ctx, &buffer[buffer_tail], buffer_head - buffer_tail, &nremoved);
+ buffer_head -= nremoved;
}
- buffer_tail += parse(&buffer[buffer_tail], buffer_head - buffer_tail, 1);
- if (buffer_tail != buffer_head || !end_of_file())
+ ctx.end_of_file_reached = 1;
+ buffer_tail += parse(&ctx, &buffer[buffer_tail], buffer_head - buffer_tail, &nremoved);
+ buffer_head -= nremoved;
+ if (buffer_tail != buffer_head || ctx.premature_end_of_file)
eprintf("premature end of file reached\n");
free(buffer);
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..b5def37
--- /dev/null
+++ b/common.h
@@ -0,0 +1,156 @@
+/* See LICENSE file for copyright and license details. */
+#include <libsimple.h>
+#include <libsimple-arg.h>
+#include "config.h"
+
+
+enum argument_type {
+ /* .text and .length */
+ QUOTED, /* \ or '…' or $'…' */
+ UNQUOTED, /* normal */
+ /* .child */
+ QUOTE_EXPRESSION, /* "…" */
+ BACKQUOTE_EXPRESSION, /* `…` */
+ ARITHMETIC_EXPRESSION, /* $((…)) */
+ VARIABLE_SUBSTITUTION, /* ${…} */
+ SUBSHELL_SUBSTITUTION, /* $(…) */
+ PROCESS_SUBSTITUTION_INPUT, /* >(…) */
+ PROCESS_SUBSTITUTION_OUTPUT, /* <(…) */
+ PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */
+ SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */
+ ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */
+ /* (none) */
+ REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */
+ FUNCTION_MARK /* () */
+};
+
+enum redirection_type {
+ REDIRECT_INPUT,
+ REDIRECT_INPUT_TO_FD,
+ REDIRECT_OUTPUT,
+ REDIRECT_OUTPUT_APPEND,
+ REDIRECT_OUTPUT_CLOBBER,
+ REDIRECT_OUTPUT_TO_FD,
+ REDIRECT_OUTPUT_AND_STDERR,
+ REDIRECT_OUTPUT_AND_STDERR_APPEND,
+ REDIRECT_OUTPUT_AND_STDERR_CLOBBER,
+ REDIRECT_OUTPUT_AND_STDERR_TO_FD,
+ REDIRECT_INPUT_OUTPUT,
+ REDIRECT_INPUT_OUTPUT_TO_FD,
+ HERE_STRING,
+ HERE_DOCUMENT,
+ HERE_DOCUMENT_INDENTED
+};
+
+enum tokeniser_mode {
+ NORMAL_MODE,
+ COMMENT_MODE,
+ BQ_QUOTE_MODE,
+ DQ_QUOTE_MODE,
+ RRB_QUOTE_MODE,
+ RB_QUOTE_MODE,
+ SB_QUOTE_MODE,
+ CB_QUOTE_MODE,
+ HERE_DOCUMENT_MODE
+};
+
+enum command_terminal {
+ DOUBLE_SEMICOLON,
+ SEMICOLON,
+ AMPERSAND,
+ SOCKET_PIPE,
+ PIPE,
+ PIPE_AMPERSAND,
+ AND,
+ OR
+};
+
+struct parser_state;
+
+struct argument {
+ enum argument_type type;
+ union {
+ struct {
+ char *text;
+ size_t length;
+ };
+ struct parser_state *child;
+ };
+ size_t line_number;
+ struct argument *next_part;
+};
+
+struct redirection {
+ enum redirection_type type;
+ struct argument *left_hand_side;
+};
+
+struct command {
+ enum command_terminal terminal;
+ struct argument **arguments;
+ size_t narguments;
+ struct redirection **redirections;
+ size_t nredirections;
+};
+
+struct parser_state {
+ struct parser_state *parent;
+ struct command **commands;
+ size_t ncommands;
+ struct argument **arguments;
+ size_t narguments;
+ struct redirection **redirections;
+ size_t nredirections;
+ struct argument *current_argument;
+ struct argument *current_argument_end;
+ char need_right_hand_side;
+};
+
+struct here_document {
+ struct redirection *redirection;
+ struct argument *argument;
+ struct here_document *next;
+};
+
+struct mode_stack {
+ enum tokeniser_mode mode;
+ int she_is_comment;
+ struct mode_stack *previous;
+};
+
+struct parser_context {
+ int tty_input;
+ int end_of_file_reached;
+ int premature_end_of_file;
+ size_t preparser_offset;
+ size_t preparser_line_number;
+ size_t line_continuations;
+ size_t tokeniser_line_number;
+ struct mode_stack *mode_stack;
+ struct parser_state *parser_state;
+ struct here_document *here_documents_first;
+ struct here_document **here_documents_next;
+};
+
+
+/* apsh.c */
+void initialise_parser_context(struct parser_context *ctx);
+
+/* preparser.c */
+size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp);
+
+/* tokeniser.c */
+void push_mode(struct parser_context *ctx, enum tokeniser_mode mode);
+void pop_mode(struct parser_context *ctx);
+size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len);
+
+/* parser.c */
+void push_end_of_file(struct parser_context *ctx);
+void push_whitespace(struct parser_context *ctx, int strict);
+void push_semicolon(struct parser_context *ctx, int maybe);
+size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len);
+void push_quoted(struct parser_context *ctx, char *text, size_t text_len);
+void push_escaped(struct parser_context *ctx, char *text, size_t text_len);
+void push_unquoted(struct parser_context *ctx, char *text, size_t text_len);
+void push_enter(struct parser_context *ctx, enum argument_type type);
+void push_leave(struct parser_context *ctx);
diff --git a/config.mk b/config.mk
index 15b1181..c6a635e 100644
--- a/config.mk
+++ b/config.mk
@@ -4,5 +4,5 @@ MANPREFIX = /usr/share/man
CC = cc
CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE
-CFLAGS = -std=c99 -Wall -g
+CFLAGS = -std=c11 -Wall -g
LDFLAGS = -lsimple
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..c3da716
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,271 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+void
+push_end_of_file(struct parser_context *ctx)
+{
+ push_semicolon(ctx, 1);
+ if (ctx->parser_state->parent || ctx->parser_state->ncommands)
+ ctx->premature_end_of_file = 1;
+}
+
+
+void
+push_whitespace(struct parser_context *ctx, int strict)
+{
+ if (ctx->parser_state->need_right_hand_side) {
+ if (strict)
+ eprintf("premature end of command\n");
+ return;
+ }
+
+ if (ctx->parser_state->current_argument) {
+ ctx->parser_state->arguments = erealloc(ctx->parser_state->arguments,
+ (ctx->parser_state->narguments + 1) *
+ sizeof(*ctx->parser_state->arguments));
+ ctx->parser_state->arguments[ctx->parser_state->narguments++] = ctx->parser_state->current_argument;
+ ctx->parser_state->current_argument = NULL;
+ ctx->parser_state->current_argument_end = NULL;
+ }
+}
+
+
+static void
+push_command_terminal(struct parser_context *ctx, enum command_terminal terminal)
+{
+ struct command *new_command;
+
+ push_whitespace(ctx, 1);
+
+ ctx->parser_state->commands = erealloc(ctx->parser_state->commands,
+ (ctx->parser_state->ncommands + 1) *
+ sizeof(*ctx->parser_state->commands));
+ new_command = ecalloc(1, sizeof(*new_command));
+ ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command;
+ new_command->terminal = terminal;
+ new_command->arguments = ctx->parser_state->arguments;
+ new_command->narguments = ctx->parser_state->narguments;
+ new_command->redirections = ctx->parser_state->redirections;
+ new_command->nredirections = ctx->parser_state->nredirections;
+ ctx->parser_state->arguments = NULL;
+ ctx->parser_state->narguments = 0;
+ ctx->parser_state->redirections = NULL;
+ ctx->parser_state->nredirections = 0;
+
+ if (!ctx->parser_state->parent) {
+ if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) {
+ /* TODO unless in a special construct such as while, case, for, if, or {, run and clear
+ * also require that any here-document is specified (count them and run when given);
+ * if terminal == AMPERSAND: perform </dev/null first, and reset exist status to 0
+ */
+ }
+ }
+}
+
+
+void
+push_semicolon(struct parser_context *ctx, int maybe)
+{
+ if (!maybe || ctx->parser_state->narguments)
+ push_command_terminal(ctx, SEMICOLON);
+}
+
+
+static void
+push_new_argument_part(struct parser_context *ctx, enum argument_type type)
+{
+ struct argument *new_part;
+
+ new_part = ecalloc(1, sizeof(*new_part));
+ new_part->type = type;
+ new_part->line_number = ctx->tokeniser_line_number;
+
+ if (ctx->parser_state->current_argument_end) {
+ ctx->parser_state->current_argument_end->next_part = new_part;
+ ctx->parser_state->current_argument_end = new_part;
+ } else {
+ ctx->parser_state->current_argument = new_part;
+ ctx->parser_state->current_argument_end = new_part;
+ }
+}
+
+
+static void
+push_redirection(struct parser_context *ctx, enum redirection_type type)
+{
+ struct redirection *new_redirection;
+ struct argument *new_argument;
+ struct here_document *new_here_document;
+
+ new_redirection = ecalloc(1, sizeof(*new_redirection));
+ new_redirection->type = type;
+
+ ctx->parser_state->redirections = erealloc(ctx->parser_state->redirections,
+ (ctx->parser_state->nredirections + 1) *
+ sizeof(*ctx->parser_state->redirections));
+ ctx->parser_state->redirections[ctx->parser_state->nredirections++] = new_redirection;
+
+ if (ctx->parser_state->current_argument) {
+ if (ctx->parser_state->current_argument->type == REDIRECTION ||
+ ctx->parser_state->current_argument_end->type == QUOTED ||
+ ctx->parser_state->current_argument_end->type == QUOTE_EXPRESSION ||
+ type == REDIRECT_OUTPUT_AND_STDERR ||
+ type == REDIRECT_OUTPUT_AND_STDERR_APPEND ||
+ type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER ||
+ type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) {
+ push_whitespace(ctx, 1);
+ } else {
+ new_redirection->left_hand_side = ctx->parser_state->current_argument;
+ }
+ }
+
+ new_argument = ecalloc(1, sizeof(*new_argument));
+ new_argument->type = REDIRECTION;
+ new_argument->line_number = ctx->tokeniser_line_number;
+ ctx->parser_state->current_argument = new_argument;
+
+ if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) {
+ new_here_document = emalloc(sizeof(*new_here_document));
+ new_here_document->redirection = new_redirection;
+ new_here_document->argument = new_argument;
+ new_here_document->next = NULL;
+ *ctx->here_documents_next = new_here_document;
+ ctx->here_documents_next = &new_here_document->next;
+ }
+
+ ctx->parser_state->need_right_hand_side = 1;
+}
+
+
+static void
+push_shell_io(struct parser_context *ctx, enum argument_type type, enum tokeniser_mode mode)
+{
+ push_mode(ctx, mode);
+ push_enter(ctx, type);
+}
+
+
+static void
+push_function_mark(struct parser_context *ctx)
+{
+ push_whitespace(ctx, 1);
+ push_new_argument_part(ctx, FUNCTION_MARK);
+ push_whitespace(ctx, 1);
+}
+
+
+size_t
+push_symbol(struct parser_context *ctx, char *token, size_t token_len)
+{
+#define LIST_SYMBOLS(_)\
+ _("<<<", push_redirection(ctx, HERE_STRING))\
+ _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
+ _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
+ _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\
+ _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
+ _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
+ _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
+ _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
+ _("()", push_function_mark(ctx))\
+ _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
+ _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
+ _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
+ _("<<", push_redirection(ctx, HERE_DOCUMENT))\
+ _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
+ _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
+ _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
+ _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
+ _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
+ _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
+ _("||", push_command_terminal(ctx, OR))\
+ _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
+ _("&&", push_command_terminal(ctx, AND))\
+ _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\
+ _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
+ _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
+ _(";", push_semicolon(ctx, 0))\
+ _("<", push_redirection(ctx, REDIRECT_INPUT))\
+ _(">", push_redirection(ctx, REDIRECT_OUTPUT))\
+ _("|", push_command_terminal(ctx, PIPE))\
+ _("&", push_command_terminal(ctx, AMPERSAND))
+
+#define X(SYMBOL, ACTION)\
+ if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\
+ ACTION;\
+ return token_len;\
+ }
+ LIST_SYMBOLS(X)
+#undef X
+
+ push_unquoted(ctx, token, 1);
+ return 1;
+}
+
+
+static void
+push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument_type type)
+{
+ struct argument *arg_part;
+
+ ctx->parser_state->need_right_hand_side = 0;
+
+ if (!ctx->parser_state->current_argument_end ||
+ ctx->parser_state->current_argument_end->type != type ||
+ ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
+ push_new_argument_part(ctx, type);
+ arg_part = ctx->parser_state->current_argument_end;
+
+ arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1);
+ memcpy(&arg_part->text[arg_part->length], text, text_len);
+ arg_part->length += text_len;
+ arg_part->text[arg_part->length] = '\0';
+}
+
+
+void
+push_quoted(struct parser_context *ctx, char *text, size_t text_len)
+{
+ push_text(ctx, text, text_len, QUOTED);
+}
+
+
+void
+push_escaped(struct parser_context *ctx, char *text, size_t text_len)
+{
+ /* TODO resolve backslashes in text */
+ push_text(ctx, text, text_len, QUOTED);
+}
+
+
+void
+push_unquoted(struct parser_context *ctx, char *text, size_t text_len)
+{
+ push_text(ctx, text, text_len, UNQUOTED);
+}
+
+
+void
+push_enter(struct parser_context *ctx, enum argument_type type)
+{
+ struct parser_state *new_state;
+
+ ctx->parser_state->need_right_hand_side = 0;
+ push_new_argument_part(ctx, type);
+
+ new_state = ecalloc(1, sizeof(*new_state));
+ new_state->parent = ctx->parser_state;
+ ctx->parser_state->current_argument_end->child = new_state;
+ ctx->parser_state = new_state;
+}
+
+
+void
+push_leave(struct parser_context *ctx)
+{
+ if (ctx->mode_stack->mode == NORMAL_MODE)
+ push_semicolon(ctx, 1);
+ /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */
+ /* TODO validate subshell content */
+ ctx->parser_state = ctx->parser_state->parent;
+}
diff --git a/preparser.c b/preparser.c
new file mode 100644
index 0000000..840209f
--- /dev/null
+++ b/preparser.c
@@ -0,0 +1,51 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+size_t
+parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp)
+{
+ int end_of_file_reached;
+ size_t bytes_parsed = 0;
+
+ end_of_file_reached = ctx->end_of_file_reached;
+ ctx->end_of_file_reached = 0;
+ *nremovedp = 0;
+
+ while (ctx->preparser_offset < code_len) {
+ if (code[ctx->preparser_offset] == '\0') {
+ if (!ctx->tty_input)
+ weprintf("ignoring NUL byte at line %zu\n", ctx->preparser_line_number);
+ memmove(&code[ctx->preparser_offset],
+ &code[ctx->preparser_offset + 1],
+ (code_len -= 1) - ctx->preparser_offset);
+ *nremovedp += 1;
+
+ } else if (code[ctx->preparser_offset] == '\n') {
+ ctx->preparser_line_number += 1;
+ ctx->preparser_offset += 1;
+
+ } else if (code[ctx->preparser_offset] == '\\') {
+ if (ctx->preparser_offset + 1 == code_len)
+ break;
+ if (code[ctx->preparser_offset + 1] == '\n') {
+ bytes_parsed += parse_preparsed(ctx, &code[bytes_parsed], ctx->preparser_offset - bytes_parsed);
+ memmove(&code[ctx->preparser_offset],
+ &code[ctx->preparser_offset + 2],
+ (code_len -= 2) - ctx->preparser_offset);
+ *nremovedp += 2;
+ ctx->line_continuations += 1;
+ } else {
+ ctx->preparser_offset += 2;
+ }
+
+ } else {
+ ctx->preparser_offset += 1;
+ }
+ }
+
+ ctx->end_of_file_reached = end_of_file_reached;
+ bytes_parsed += parse_preparsed(ctx, &code[bytes_parsed], ctx->preparser_offset - bytes_parsed);
+ ctx->preparser_offset -= bytes_parsed;
+ return bytes_parsed;
+}
diff --git a/tokeniser.c b/tokeniser.c
new file mode 100644
index 0000000..63ff2fd
--- /dev/null
+++ b/tokeniser.c
@@ -0,0 +1,419 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+void
+push_mode(struct parser_context *ctx, enum tokeniser_mode mode)
+{
+ struct mode_stack *new = emalloc(sizeof(*new));
+ new->mode = mode;
+ new->she_is_comment = 1;
+ new->previous = ctx->mode_stack;
+ ctx->mode_stack = new;
+}
+
+
+void
+pop_mode(struct parser_context *ctx)
+{
+ struct mode_stack *old = ctx->mode_stack;
+ ctx->mode_stack = ctx->mode_stack->previous;
+ free(old);
+}
+
+
+size_t
+parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
+{
+#define IS_SYMBOL(C) ((C) == '<' || (C) == '>' || (C) == '&' || (C) == '|' ||\
+ (C) == '(' || (C) == ')' || (C) == ';' || (C) == '-')
+
+ size_t bytes_read = 0;
+ size_t token_len;
+
+ for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) {
+ switch (ctx->mode_stack->mode) {
+ case NORMAL_MODE:
+ if (*code == '#' && ctx->mode_stack->she_is_comment) {
+ token_len = 1;
+ push_mode(ctx, COMMENT_MODE);
+
+ } else if (*code == '\n') {
+ token_len = 1;
+ ctx->mode_stack->she_is_comment = 1;
+ push_whitespace(ctx, 0);
+ push_semicolon(ctx, 1);
+ ctx->tokeniser_line_number += 1;
+ if (ctx->here_documents_first)
+ push_mode(ctx, HERE_DOCUMENT_MODE);
+
+ } else if (isspace(*code)) {
+ ctx->mode_stack->she_is_comment = 1;
+ push_whitespace(ctx, 0);
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
+ if (!isspace(code[token_len]) || code[token_len] == '\n')
+ break;
+
+ } else if (*code == ')' && ctx->mode_stack->previous) {
+ token_len = 1;
+ ctx->mode_stack->she_is_comment = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+
+ } else if (IS_SYMBOL(*code)) {
+ ctx->mode_stack->she_is_comment = 1;
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
+ if (!IS_SYMBOL(code[token_len]))
+ goto symbol_end;
+ if (!ctx->end_of_file_reached)
+ goto need_more;
+ symbol_end:
+ token_len = push_symbol(ctx, code, token_len);
+
+ } else if (*code == '\\') {
+ ctx->mode_stack->she_is_comment = 0;
+ backslash_mode:
+ if (code_len - bytes_read < 2)
+ goto need_more;
+ token_len = 2;
+ push_quoted(ctx, &code[1], 1);
+
+ } else if (*code == '\'') {
+ ctx->mode_stack->she_is_comment = 0;
+ sqoute_mode:
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
+ if (code[token_len] == '\'')
+ goto squote_end;
+ goto need_more;
+ squote_end:
+ token_len += 1;
+ push_quoted(ctx, &code[1], token_len - 2);
+
+ } else if (*code == '"') {
+ ctx->mode_stack->she_is_comment = 0;
+ dquote_mode:
+ token_len = 1;
+ push_mode(ctx, DQ_QUOTE_MODE);
+ push_enter(ctx, QUOTE_EXPRESSION);
+
+ } else if (*code == '`') {
+ ctx->mode_stack->she_is_comment = 0;
+ bquote_mode:
+ token_len = 1;
+ push_mode(ctx, BQ_QUOTE_MODE);
+ push_enter(ctx, BACKQUOTE_EXPRESSION);
+
+ } else if (*code == '$') {
+ ctx->mode_stack->she_is_comment = 0;
+ dollar_mode:
+ if (code_len - bytes_read < 2) {
+ if (ctx->end_of_file_reached) {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ } else {
+ goto need_more;
+ }
+
+ } else if (code[1] == '(') {
+ if (code_len - bytes_read < 3) {
+ goto need_more;
+
+ } else if (code[2] == '(') {
+ token_len = 3;
+ push_mode(ctx, RRB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+
+ } else {
+ token_len = 2;
+ push_mode(ctx, NORMAL_MODE);
+ push_enter(ctx, SUBSHELL_SUBSTITUTION);
+ }
+
+ } else if (code[1] == '[') {
+ token_len = 2;
+ push_mode(ctx, SB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+
+ } else if (code[1] == '{') {
+ token_len = 2;
+ push_mode(ctx, CB_QUOTE_MODE);
+ push_enter(ctx, VARIABLE_SUBSTITUTION);
+
+ } else if (code[1] == '\'') {
+ for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) {
+ if (code[token_len] == '\\') {
+ if (token_len + 1 == code_len - bytes_read) {
+ token_len += 1;
+ } else {
+ goto need_more;
+ }
+ } else if (code[token_len] == '\'') {
+ goto dollar_squote_end;
+ }
+ }
+ dollar_squote_end:
+ token_len += 1;
+ push_escaped(ctx, &code[2], token_len - 3);
+
+ } else {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ }
+
+ } else {
+ ctx->mode_stack->she_is_comment = 0;
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
+ if (isspace(code[token_len]) || IS_SYMBOL(code[token_len]) ||
+ code[token_len] == '\'' || code[token_len] == '"' ||
+ code[token_len] == '\\' || code[token_len] == '$' ||
+ code[token_len] == '`')
+ break;
+ }
+ push_unquoted(ctx, code, token_len);
+ }
+ break;
+
+
+ case COMMENT_MODE:
+ if (*code == '\n') {
+ token_len = 0; /* do not consume */
+ pop_mode(ctx);
+ } else {
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
+ if (code[token_len] == '\n')
+ break;
+ }
+ break;
+
+
+ case HERE_DOCUMENT_MODE:
+ /* TODO read until terminator, remove all <tab> (including on the
+ * line of the terminator) if <<- and then if terminator was
+ * unquoted, parse in " "-mode but accept " */
+ break;
+
+
+ case BQ_QUOTE_MODE:
+ if (*code == '\\') {
+ if (code_len - bytes_read < 2) {
+ goto need_more;
+ } else {
+ token_len = 2;
+ push_unquoted(ctx, code, 2);
+ }
+
+ } else if (*code == '`') {
+ token_len = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+
+ } else if (*code == '\n') {
+ token_len = 1;
+ ctx->tokeniser_line_number += 1;
+ push_unquoted(ctx, code, 1);
+
+ } else {
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
+ if (code[token_len] == '\n' || code[token_len] == '\\' || code[token_len] == '`')
+ break;
+ push_unquoted(ctx, code, token_len);
+ }
+ break;
+
+
+ case DQ_QUOTE_MODE:
+ if (*code == '"') {
+ token_len = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+ } else {
+ goto common_quote_mode;
+ }
+ break;
+
+ case RRB_QUOTE_MODE:
+ if (*code == ')') {
+ if (code_len - bytes_read < 2) {
+ goto need_more;
+ } else if (code[1] == ')') {
+ token_len = 2;
+ pop_mode(ctx);
+ push_leave(ctx);
+ } else {
+ goto common_quote_mode;
+ }
+ } else {
+ goto common_quote_mode;
+ }
+ break;
+
+ case RB_QUOTE_MODE:
+ if (*code == ')') {
+ token_len = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+ } else {
+ goto common_quote_mode;
+ }
+ break;
+
+ case SB_QUOTE_MODE:
+ if (*code == ']') {
+ token_len = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+ } else {
+ goto common_quote_mode;
+ }
+ break;
+
+ common_quote_mode:
+ if (*code == '(' && ctx->mode_stack->mode != DQ_QUOTE_MODE) {
+ if (code_len - bytes_read < 2) {
+ goto need_more;
+
+ } else if (code[1] == '(') {
+ token_len = 2;
+ push_mode(ctx, RRB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+
+ } else {
+ token_len = 1;
+ push_mode(ctx, RB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+ }
+
+ } else if (*code == '$') {
+ if (code_len - bytes_read < 2) {
+ if (ctx->end_of_file_reached) {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ } else {
+ goto need_more;
+ }
+
+ } else if (code[1] == '(') {
+ if (code_len - bytes_read < 3) {
+ goto need_more;
+
+ } else if (code[2] == '(') {
+ token_len = 3;
+ push_mode(ctx, RRB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+
+ } else {
+ token_len = 2;
+ push_mode(ctx, NORMAL_MODE);
+ push_enter(ctx, SUBSHELL_SUBSTITUTION);
+ }
+
+ } else if (code[1] == '[') {
+ token_len = 2;
+ push_mode(ctx, SB_QUOTE_MODE);
+ push_enter(ctx, ARITHMETIC_EXPRESSION);
+
+ } else if (code[1] == '{') {
+ token_len = 2;
+ push_mode(ctx, CB_QUOTE_MODE);
+ push_enter(ctx, VARIABLE_SUBSTITUTION);
+
+ } else {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ }
+
+ } else if (*code == '\\') {
+ if (code_len - bytes_read < 2) {
+ if (ctx->end_of_file_reached) {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ } else {
+ goto need_more;
+ }
+
+ } else if (code[1] == '$' || code[1] == '`' || code[1] == '"' || code[1] == '\\') {
+ token_len = 1;
+ push_quoted(ctx, &code[1], 1);
+
+ } else {
+ token_len = 1;
+ push_unquoted(ctx, code, 1);
+ }
+
+ } else if (*code == '`') {
+ goto bquote_mode;
+
+ } else if (*code == '\n') {
+ token_len = 1;
+ ctx->tokeniser_line_number += 1;
+ push_unquoted(ctx, code, 1);
+
+ } else {
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
+ if (code[token_len] == '"' || code[token_len] == ')' ||
+ code[token_len] == ']' || code[token_len] == '(' ||
+ code[token_len] == '$' || code[token_len] == '\\' ||
+ code[token_len] == '`' || code[token_len] == '\n')
+ break;
+ }
+ push_unquoted(ctx, code, token_len);
+ }
+ break;
+
+
+ case CB_QUOTE_MODE:
+ if (*code == '}') {
+ token_len = 1;
+ pop_mode(ctx);
+ push_leave(ctx);
+
+ } else if (*code == '\\') {
+ goto backslash_mode;
+
+ } else if (*code == '\'') {
+ goto sqoute_mode;
+
+ } else if (*code == '"') {
+ goto dquote_mode;
+
+ } else if (*code == '`') {
+ goto bquote_mode;
+
+ } else if (*code == '$') {
+ goto dollar_mode;
+
+ } else if (*code == '\n') {
+ token_len = 1;
+ ctx->tokeniser_line_number += 1;
+ push_unquoted(ctx, code, 1);
+
+ } else {
+ for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
+ if (code[token_len] == '}' || code[token_len] == '\\' ||
+ code[token_len] == '\'' || code[token_len] == '"' ||
+ code[token_len] == '`' || code[token_len] == '$' ||
+ code[token_len] == '\n')
+ break;
+ }
+ push_unquoted(ctx, code, token_len);
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+ if (ctx->line_continuations) {
+ ctx->tokeniser_line_number += ctx->line_continuations;
+ ctx->line_continuations = 0;
+ }
+ }
+
+ if (bytes_read == code_len && ctx->end_of_file_reached)
+ push_end_of_file(ctx);
+
+need_more:
+ return bytes_read;
+
+#undef IS_SYMBOL
+}