From 39c405d92483c2f70df361ecd5836ecef3cf5e7f Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Tue, 6 Jul 2021 02:34:22 +0200 Subject: Second commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 15 +- README | 19 ++ apsh.c | 719 ++---------------------------------------------------------- common.h | 156 +++++++++++++ config.mk | 2 +- parser.c | 271 +++++++++++++++++++++++ preparser.c | 51 +++++ tokeniser.c | 419 +++++++++++++++++++++++++++++++++++ 8 files changed, 952 insertions(+), 700 deletions(-) create mode 100644 common.h create mode 100644 parser.c create mode 100644 preparser.c create mode 100644 tokeniser.c diff --git a/Makefile b/Makefile index 16394bb..696478c 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,13 @@ CONFIGFILE = config.mk include $(CONFIGFILE) OBJ =\ - apsh.o + apsh.o\ + preparser.o\ + tokeniser.o\ + parser.o HDR =\ + common.h\ config.h all: apsh @@ -18,10 +22,17 @@ $(OBJ): $(@:.o=.c) $(HDR) apsh: $(OBJ) $(CC) -o $@ $(OBJ) $(LDFLAGS) +install: apsh + mkdir -p -- "$(DESTDIR)$(PREFIX)/bin/" + cp -- apsh "$(DESTDIR)$(PREFIX)/bin/" + +uninstall: + -rm -f -- "$(DESTDIR)$(PREFIX)/bin/apsh" + clean: -rm -f -- *.o *.su apsh .SUFFIXES: .SUFFIXES: .o .c -.PHONY: all clean +.PHONY: all install uninstall clean diff --git a/README b/README index aa00934..f8505c1 100644 --- a/README +++ b/README @@ -1,2 +1,21 @@ NAME apsh — advanced piping shell + +DESCRIPTION + apsh is a shell designed to give the user the ability + to create advanced pipelines. To this end, all forks + apsh makes of itself share exported and unexported + variables, with the exception of when the ( ) syntax + is used to fork the shell, in which case they are + inherited but unshared. + + apsh has support for <( ) and >( ), as well as <>( ) + which creates a socket instead of a pipe and connects + both ends. Similarly <>| is like |, except it creates + a bidirectional socket instead of a pipe. apsh also + lets the user create pipes and sockets before then + are used. + + Additionally if ( ) or (( )) is used as an argument + in to a command, the code is formatted and parsed + the as a string to the command as that argument. diff --git a/apsh.c b/apsh.c index f732b20..4e7183b 100644 --- a/apsh.c +++ b/apsh.c @@ -1,709 +1,32 @@ /* See LICENSE file for copyright and license details. */ -#include -#include -#include "config.h" +#include "common.h" USAGE(""); -enum argument_type { - VERBATIM, - ESCAPED, - SPECIAL, - FUNCTION_MARK, - SUBSHELL_INPUT, /* >(...) */ - SUBSHELL_OUTPUT, /* <(...) */ - SUBSHELL_INPUT_OUTPUT, /* <>(...) ## create socket for both input and output of subshell */ - SUBSHELL_SUBSTITUTION, - SUBSHELL, /* (...) or ((...)) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */ - REDIRECTION /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */ -}; -enum redirection_type { - REDIRECT_INPUT, - REDIRECT_INPUT_TO_FD, - REDIRECT_OUTPUT, - REDIRECT_OUTPUT_APPEND, - REDIRECT_OUTPUT_CLOBBERING, - REDIRECT_OUTPUT_TO_FD, - REDIRECT_INPUT_OUTPUT, - REDIRECT_INPUT_OUTPUT_TO_FD, - HERE_STRING, - HERE_DOCUMENT, - HERE_DOCUMENT_INDENTED -}; - -enum command_terminal { - DOUBLE_SEMICOLON, - SEMICOLON, - AMPERSAND, - SOCKET_PIPE, - PIPE, - PIPE_AMPERSAND, - AND, - OR -}; - -enum shell_terminator { - END_OF_FILE, - ROUND, - ROUND_ROUND, - SQUARE, - BACKTICK, -}; - -struct parser_state; - -struct argument { - enum argument_type type; - union { - struct { /* VERBATIM, ESCAPED */ - char *text; - size_t length; - }; - char symbol; /* SPECIAL */ - struct parser_state *root; /* SUBSHELL, SUBSHELL_* */ - }; /* none for FUNCTION_MARK, REDIRECTION */ - struct argument *next_part; -}; - -struct redirection { - enum redirection_type type; - struct argument *left_hand_side; -}; - -struct command { - enum command_terminal terminal; - struct argument **arguments; - size_t narguments; - struct redirection **redirections; - size_t nredirections; -}; - -struct parser_state { - struct parser_state *parent; - struct command **commands; - size_t ncommands; - struct argument **arguments; - size_t narguments; - struct redirection **redirections; - size_t nredirections; - struct argument *current_argument; - struct argument *current_argument_end; - enum shell_terminator exit_on; - char at_dollar; - char is_expr_shell; - char need_right_hand_side; -}; - -struct here_document { - struct redirection *redirection; - struct argument *argument; - struct here_document *next; -}; - -static size_t line_number = 1; -static int tty_input = 0; - -static struct parser_state *state; -static struct here_document *here_documents_first = NULL; -static struct here_document **here_documents_next = &here_documents_first; - -static void flush_dollar(void); -static void verbatim(const char *text, size_t text_length, int from_quote); - -static void -whitespace(int strict) -{ - flush_dollar(); - - if (state->need_right_hand_side) { - if (strict) - eprintf("premature end of command\n"); - return; - } - - if (state->current_argument) { - state->arguments = erealloc(state->arguments, (state->narguments + 1) * sizeof(*state->arguments)); - state->arguments[state->narguments++] = state->current_argument; - state->current_argument = NULL; - state->current_argument_end = NULL; - } -} - -static void -terminate_command(enum command_terminal terminal) -{ - whitespace(1); - - state->commands = erealloc(state->commands, (state->ncommands + 1) * sizeof(*state->commands)); - state->commands[state->ncommands] = ecalloc(1, sizeof(**state->commands)); - state->commands[state->ncommands]->terminal = terminal; - state->commands[state->ncommands]->arguments = state->arguments; - state->commands[state->ncommands]->narguments = state->narguments; - state->commands[state->ncommands]->redirections = state->redirections; - state->commands[state->ncommands]->nredirections = state->nredirections; - state->ncommands += 1; - state->arguments = NULL; - state->narguments = 0; - state->redirections = NULL; - state->nredirections = 0; - - if (!state->parent) { - if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) { - /* TODO unless in a special construct such as while, case, for, if, or {, run and clear - * also require that any here-document is specified (count them and run when given) - */ - } - } -} - -static void -semicolon(int maybe) -{ - if (!maybe || state->narguments) - terminate_command(SEMICOLON); -} - -static void -end_subshell(void) -{ - semicolon(1); - /* TODO validate subshell content */ - state = state->parent; -} - -static void -add_redirection(enum redirection_type type) -{ - state->redirections = erealloc(state->redirections, (state->nredirections + 1) * sizeof(*state->redirections)); - state->redirections[state->nredirections] = ecalloc(1, sizeof(**state->redirections)); - state->redirections[state->nredirections]->type = type; - if (state->current_argument) { - if (state->current_argument->type == REDIRECTION) { - whitespace(1); - } else { - state->redirections[state->nredirections]->left_hand_side = state->current_argument; - state->current_argument = NULL; - state->current_argument_end = NULL; - } - } - state->current_argument_end = state->current_argument = calloc(1, sizeof(*state->current_argument)); - state->current_argument_end->type = REDIRECTION; - if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) { - *here_documents_next = emalloc(sizeof(**here_documents_next)); - (*here_documents_next)->redirection = state->redirections[state->nredirections]; - (*here_documents_next)->argument = state->current_argument; - (*here_documents_next)->next = NULL; - here_documents_next = &(*here_documents_next)->next; - } - state->nredirections += 1; - state->need_right_hand_side = 1; -} - -static void -add_shell_io(enum argument_type type, enum shell_terminator exit_on) -{ - struct parser_state *new_state; - - state->need_right_hand_side = 0; - - if (!state->current_argument_end) - state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument)); - else - state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument)); - - new_state = ecalloc(1, sizeof(*new_state)); - new_state->parent = state; - new_state->exit_on = exit_on; - - state->current_argument_end->type = type; - state->current_argument_end->root = state; - - state = new_state; -} - -static void -add_function_mark(void) +void +initialise_parser_context(struct parser_context *ctx) { - whitespace(1); - if (!state->current_argument_end) - state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument)); - else - state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument)); - state->current_argument_end->type = FUNCTION_MARK; - whitespace(1); -} - -static void -parse_symbol(const char *token, size_t token_length) -{ - struct parser_state *old_state = state; - - while (token_length) { - if (state->at_dollar) { - state->at_dollar = 0; - if (token_length >= 2 && token[0] == '(' && token[1] == '(') { - add_shell_io(SUBSHELL_SUBSTITUTION, ROUND_ROUND); - state->is_expr_shell = 1; - token = &token[2]; - token_length -= 2; - } else if (token_length >= 1 && token[0] == '(') { - add_shell_io(SUBSHELL_SUBSTITUTION, ROUND); - token = &token[1]; - token_length -= 1; - } else if (token_length >= 1 && token[0] == '[') { - add_shell_io(SUBSHELL_SUBSTITUTION, SQUARE); - state->is_expr_shell = 1; - token = &token[1]; - token_length -= 1; - } else if (token_length >= 1 && token[0] == '{') { /* TODO */ - token = &token[1]; - token_length -= 1; - } else { - state->at_dollar = 1; - flush_dollar(); - continue; - } - } - - if (token_length >= 3 && token[0] == '<' && token[1] == '<' && token[2] == '<') { - add_redirection(HERE_STRING); - token = &token[3]; - token_length -= 3; - - } else if (token_length >= 3 && token[0] == '<' && token[1] == '<' && token[2] == '-') { - add_redirection(HERE_DOCUMENT_INDENTED); - token = &token[3]; - token_length -= 3; - - } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '(') { - add_shell_io(SUBSHELL_INPUT_OUTPUT, ROUND); - token = &token[3]; - token_length -= 3; - - } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '|') { - terminate_command(SOCKET_PIPE); - token = &token[3]; - token_length -= 3; - - } else if (token_length >= 3 && token[0] == '<' && token[1] == '>' && token[2] == '&') { - add_redirection(REDIRECT_INPUT_OUTPUT_TO_FD); - token = &token[3]; - token_length -= 3; - - } else if (token_length >= 2 && token[0] == ')' && token[1] == ')') { - if (state->exit_on == ROUND_ROUND) - end_subshell(); - else - eprintf("stray )) at line %zu\n", line_number); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '(' && token[1] == ')') { - add_function_mark(); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '(' && token[1] == '(') { - add_shell_io(SUBSHELL, ROUND_ROUND); - state->is_expr_shell = 1; - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == ';' && token[1] == ';') { - terminate_command(DOUBLE_SEMICOLON); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '<' && token[1] == '(') { - add_shell_io(SUBSHELL_OUTPUT, ROUND); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '<' && token[1] == '<') { - add_redirection(HERE_DOCUMENT); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '<' && token[1] == '>') { - add_redirection(REDIRECT_INPUT_OUTPUT); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '<' && token[1] == '&') { - add_redirection(REDIRECT_INPUT_TO_FD); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '>' && token[1] == '(') { - add_shell_io(SUBSHELL_INPUT, ROUND); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '>' && token[1] == '>') { - add_redirection(REDIRECT_OUTPUT_APPEND); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '>' && token[1] == '&') { - add_redirection(REDIRECT_OUTPUT_TO_FD); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '>' && token[1] == '|') { - add_redirection(REDIRECT_OUTPUT_CLOBBERING); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '|' && token[1] == '&') { - terminate_command(PIPE_AMPERSAND); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '|' && token[1] == '|') { - terminate_command(OR); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 2 && token[0] == '&' && token[1] == '&') { - terminate_command(AND); - token = &token[2]; - token_length -= 2; - - } else if (token_length >= 1 && token[0] == ')') { - if (state->exit_on == ROUND) - end_subshell(); - else - eprintf("stray ) at line %zu\n", line_number); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == ']') { - if (state->exit_on == SQUARE) - end_subshell(); - else - verbatim(token, 1, 0); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == '(') { - add_shell_io(SUBSHELL, ROUND); - state->is_expr_shell = old_state->is_expr_shell; - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == ';') { - semicolon(0); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == '<') { - add_redirection(REDIRECT_INPUT); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == '>') { - add_redirection(REDIRECT_OUTPUT); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == '|') { - terminate_command(PIPE); - token = &token[1]; - token_length -= 1; - - } else if (token_length >= 1 && token[0] == '&') { - terminate_command(AMPERSAND); - token = &token[1]; - token_length -= 1; - - } else { - verbatim(token, 1, 0); - token = &token[1]; - token_length -= 1; - } - } -} - -static void -symbol(char *token, size_t token_length, size_t escaped_newlines) -{ - size_t new_length, r, w; - if (escaped_newlines) { - r = w = 0; - new_length = token_length - 2 * escaped_newlines; - while (escaped_newlines--) { - if (token[r] == '\\') - r += 2; - else - token[w++] = token[r++]; - } - memcpy(&token[w], &token[r], token_length - r); - token_length = new_length; - } - parse_symbol(token, token_length); -} - -static void -backtick(void) -{ - flush_dollar(); - if (state->exit_on == BACKTICK) - end_subshell(); - else - add_shell_io(SUBSHELL_SUBSTITUTION, BACKTICK); -} - -static void -double_quote(void) -{ - flush_dollar(); - /* TODO */ -} - -static void -verbatim(const char *text, size_t text_length, int from_quote) -{ - struct argument *argend; - - state->need_right_hand_side = 0; - - if (from_quote && state->at_dollar) { - state->at_dollar = 0; - if (!state->current_argument_end) - state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument)); - else - state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument)); - state->current_argument_end->type = ESCAPED; - } else { - flush_dollar(); - if (!state->current_argument_end) { - state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument)); - state->current_argument_end->type = VERBATIM; - } else if (state->current_argument_end->type != VERBATIM) { - state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument)); - state->current_argument_end->type = VERBATIM; - } - } - - argend = state->current_argument_end; - - argend->text = erealloc(argend->text, argend->length + text_length + 1); - memcpy(&argend->text[argend->length], text, text_length); - argend->length += text_length; - argend->text[argend->length] = '\0'; -} - -static void -flush_dollar(void) -{ - if (state->at_dollar) { - state->at_dollar = 0; - verbatim("$", 1, 0); - } -} - -static void -append_special(char symbol) -{ - state->need_right_hand_side = 0; - - if (!state->current_argument_end) - state->current_argument_end = state->current_argument = ecalloc(1, sizeof(struct argument)); - else - state->current_argument_end = state->current_argument_end->next_part = ecalloc(1, sizeof(struct argument)); - - state->current_argument_end->type = SPECIAL; - state->current_argument_end->symbol = symbol; -} - -static void -unverbatim(const char *text, size_t text_length) -{ - size_t verbatim_length; - - /* TODO handle state->dollar */ - - while (text_length) { - for (verbatim_length = 0; verbatim_length < text_length; verbatim_length++) - if (*text == '*' || *text == '?' || *text == '[' || *text == ']' || - *text == ',' || *text == '.' || *text == '{' || *text == '}' || - *text == '~' || *text == '!' || *text == '=') - break; - if (verbatim_length) { - verbatim(text, verbatim_length, 0); - text = &text[verbatim_length]; - text_length -= verbatim_length; - } else { - append_special(*text); - text = &text[1]; - text_length -= 1; - } - } -} - -static void -dollar(void) -{ - /* TODO forbid $ if giving argument to here-document */ - if (state->at_dollar) - unverbatim("$", 1); - else - state->at_dollar = 1; -} - -static int -end_of_file(void) -{ - semicolon(1); - return !(state->parent || state->ncommands); -} - -static size_t -parse(char *code, size_t code_len, int end_of_file_reached) -{ -#define IS_SYMBOL(C)\ - ((!state->is_expr_shell && (\ - (C) == '|' || (C) == '&' || (C) == ';' || \ - (C) == '<' || (C) == '>' || (C) == '-')) || \ - (C) == '{' || (C) == '}' || \ - (C) == '(' || (C) == ')' || \ - (C) == '[' || (C) == ']') - - static int she_is_comment = 1; - static int in_comment = 0; - static int at_line_beginning = 1; - - size_t read_bytes = 0; - size_t token_len; - size_t new_lines; - - for (; read_bytes < code_len; read_bytes += token_len, code = &code[token_len]) { - if (at_line_beginning) { - if (here_documents_first) { - /* TODO read until terminator, remove indentation if <<- and then parse in "-mode but accept " */ - } - at_line_beginning = 0; - } - - if (in_comment) { - if (*code == '\n') { - in_comment = 0; - } else { - token_len = 1; - continue; - } - } - - if (*code == '\0') { - if (!tty_input) - weprintf("ignoring NUL byte at line %zu\n", line_number); - - } else if (*code == '\n') { - line_number += 1; - she_is_comment = 1; - whitespace(0); - semicolon(1); - token_len = 1; - at_line_beginning = 1; - - } else if (isspace(*code)) { - she_is_comment = 1; - whitespace(0); - for (token_len = 1; token_len < code_len - read_bytes; token_len++) - if (!isspace(code[token_len]) || code[token_len] == '\n') - break; - - } else if (*code == '#' && she_is_comment) { - in_comment = 1; - token_len = 1; - - } else if (IS_SYMBOL(*code)) { - she_is_comment = 1; - new_lines = 0; - for (token_len = 1; token_len < code_len - read_bytes; token_len++) { - if (code[token_len] == '\\' && token_len + 1 < code_len - read_bytes && code[token_len] == '\n') { - new_lines += 1; - } else if (!IS_SYMBOL(code[token_len])) { - symbol(code, token_len, new_lines); - line_number += new_lines; - goto next; - } - } - if (end_of_file_reached) { - symbol(code, token_len, new_lines); - line_number += new_lines; - } else { - break; - } - - } else if (*code == '\\') { - she_is_comment = 0; - if (code_len - read_bytes < 2) - break; - token_len = 2; - if (code[1] == '\n') - line_number += 1; - else - verbatim(&code[1], 1, 0); - - } else if (*code == '$') { - she_is_comment = 0; - dollar(); - token_len = 1; - - } else if (*code == '`') { - she_is_comment = 1; - backtick(); - token_len = 1; - - } else if (*code == '"') { - she_is_comment = 0; - double_quote(); - - } else if (*code == '\'') { - she_is_comment = 0; - new_lines = 0; - for (token_len = 1; token_len < code_len - read_bytes; token_len++) { - if (code[token_len] == '\'') { - token_len += 1; - if (!state->at_dollar || code[token_len - 2] != '\\') { - verbatim(&code[1], token_len - 2, 1); - line_number += new_lines; - goto next; - } - } else if (code[token_len] == '\n') { - new_lines += 1; - } - } - break; - - } else { - she_is_comment = 0; - for (token_len = 1; token_len < code_len - read_bytes; token_len++) - if (isspace(*code) || IS_SYMBOL(*code) || *code == '\\' || - *code == '$' || *code == '`' || *code == '"' || *code == '\'') - break; - unverbatim(code, token_len); - } - - next:; - } - - return read_bytes; - -#undef IS_SYMBOL + memset(ctx, 0, sizeof(*ctx)); + ctx->preparser_line_number = 1; + ctx->tokeniser_line_number = 1; + ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack)); + ctx->mode_stack->mode = NORMAL_MODE; + ctx->mode_stack->she_is_comment = 1; + ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state)); + ctx->here_documents_next = &ctx->here_documents_first; } int main(int argc, char *argv[]) { + struct parser_context ctx; char *buffer = NULL; size_t buffer_size = 0; size_t buffer_head = 0; size_t buffer_tail = 0; ssize_t r; - size_t n; + size_t n, nremoved; ARGBEGIN { default: @@ -713,12 +36,11 @@ main(int argc, char *argv[]) if (argc) usage(); - tty_input = isatty(STDIN_FILENO); - if (tty_input) + initialise_parser_context(&ctx); + ctx.tty_input = isatty(STDIN_FILENO); + if (ctx.tty_input) weprintf("apsh is currently not implemented to be interactive\n"); - state = ecalloc(1, sizeof(*state)); - for (;;) { if (buffer_size - buffer_head < PARSE_RINGBUFFER_MIN_AVAILABLE) { if (buffer_tail && buffer_head - buffer_tail <= buffer_tail) { @@ -739,11 +61,14 @@ main(int argc, char *argv[]) n = (size_t)r; buffer_head += n; - buffer_tail += parse(&buffer[buffer_tail], buffer_head - buffer_tail, 0); + buffer_tail += n = parse(&ctx, &buffer[buffer_tail], buffer_head - buffer_tail, &nremoved); + buffer_head -= nremoved; } - buffer_tail += parse(&buffer[buffer_tail], buffer_head - buffer_tail, 1); - if (buffer_tail != buffer_head || !end_of_file()) + ctx.end_of_file_reached = 1; + buffer_tail += parse(&ctx, &buffer[buffer_tail], buffer_head - buffer_tail, &nremoved); + buffer_head -= nremoved; + if (buffer_tail != buffer_head || ctx.premature_end_of_file) eprintf("premature end of file reached\n"); free(buffer); diff --git a/common.h b/common.h new file mode 100644 index 0000000..b5def37 --- /dev/null +++ b/common.h @@ -0,0 +1,156 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include "config.h" + + +enum argument_type { + /* .text and .length */ + QUOTED, /* \ or '…' or $'…' */ + UNQUOTED, /* normal */ + /* .child */ + QUOTE_EXPRESSION, /* "…" */ + BACKQUOTE_EXPRESSION, /* `…` */ + ARITHMETIC_EXPRESSION, /* $((…)) */ + VARIABLE_SUBSTITUTION, /* ${…} */ + SUBSHELL_SUBSTITUTION, /* $(…) */ + PROCESS_SUBSTITUTION_INPUT, /* >(…) */ + PROCESS_SUBSTITUTION_OUTPUT, /* <(…) */ + PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */ + SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */ + ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */ + /* (none) */ + REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */ + FUNCTION_MARK /* () */ +}; + +enum redirection_type { + REDIRECT_INPUT, + REDIRECT_INPUT_TO_FD, + REDIRECT_OUTPUT, + REDIRECT_OUTPUT_APPEND, + REDIRECT_OUTPUT_CLOBBER, + REDIRECT_OUTPUT_TO_FD, + REDIRECT_OUTPUT_AND_STDERR, + REDIRECT_OUTPUT_AND_STDERR_APPEND, + REDIRECT_OUTPUT_AND_STDERR_CLOBBER, + REDIRECT_OUTPUT_AND_STDERR_TO_FD, + REDIRECT_INPUT_OUTPUT, + REDIRECT_INPUT_OUTPUT_TO_FD, + HERE_STRING, + HERE_DOCUMENT, + HERE_DOCUMENT_INDENTED +}; + +enum tokeniser_mode { + NORMAL_MODE, + COMMENT_MODE, + BQ_QUOTE_MODE, + DQ_QUOTE_MODE, + RRB_QUOTE_MODE, + RB_QUOTE_MODE, + SB_QUOTE_MODE, + CB_QUOTE_MODE, + HERE_DOCUMENT_MODE +}; + +enum command_terminal { + DOUBLE_SEMICOLON, + SEMICOLON, + AMPERSAND, + SOCKET_PIPE, + PIPE, + PIPE_AMPERSAND, + AND, + OR +}; + +struct parser_state; + +struct argument { + enum argument_type type; + union { + struct { + char *text; + size_t length; + }; + struct parser_state *child; + }; + size_t line_number; + struct argument *next_part; +}; + +struct redirection { + enum redirection_type type; + struct argument *left_hand_side; +}; + +struct command { + enum command_terminal terminal; + struct argument **arguments; + size_t narguments; + struct redirection **redirections; + size_t nredirections; +}; + +struct parser_state { + struct parser_state *parent; + struct command **commands; + size_t ncommands; + struct argument **arguments; + size_t narguments; + struct redirection **redirections; + size_t nredirections; + struct argument *current_argument; + struct argument *current_argument_end; + char need_right_hand_side; +}; + +struct here_document { + struct redirection *redirection; + struct argument *argument; + struct here_document *next; +}; + +struct mode_stack { + enum tokeniser_mode mode; + int she_is_comment; + struct mode_stack *previous; +}; + +struct parser_context { + int tty_input; + int end_of_file_reached; + int premature_end_of_file; + size_t preparser_offset; + size_t preparser_line_number; + size_t line_continuations; + size_t tokeniser_line_number; + struct mode_stack *mode_stack; + struct parser_state *parser_state; + struct here_document *here_documents_first; + struct here_document **here_documents_next; +}; + + +/* apsh.c */ +void initialise_parser_context(struct parser_context *ctx); + +/* preparser.c */ +size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp); + +/* tokeniser.c */ +void push_mode(struct parser_context *ctx, enum tokeniser_mode mode); +void pop_mode(struct parser_context *ctx); +size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len); + +/* parser.c */ +void push_end_of_file(struct parser_context *ctx); +void push_whitespace(struct parser_context *ctx, int strict); +void push_semicolon(struct parser_context *ctx, int maybe); +size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len); +void push_quoted(struct parser_context *ctx, char *text, size_t text_len); +void push_escaped(struct parser_context *ctx, char *text, size_t text_len); +void push_unquoted(struct parser_context *ctx, char *text, size_t text_len); +void push_enter(struct parser_context *ctx, enum argument_type type); +void push_leave(struct parser_context *ctx); diff --git a/config.mk b/config.mk index 15b1181..c6a635e 100644 --- a/config.mk +++ b/config.mk @@ -4,5 +4,5 @@ MANPREFIX = /usr/share/man CC = cc CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 -D_GNU_SOURCE -CFLAGS = -std=c99 -Wall -g +CFLAGS = -std=c11 -Wall -g LDFLAGS = -lsimple diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..c3da716 --- /dev/null +++ b/parser.c @@ -0,0 +1,271 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +push_end_of_file(struct parser_context *ctx) +{ + push_semicolon(ctx, 1); + if (ctx->parser_state->parent || ctx->parser_state->ncommands) + ctx->premature_end_of_file = 1; +} + + +void +push_whitespace(struct parser_context *ctx, int strict) +{ + if (ctx->parser_state->need_right_hand_side) { + if (strict) + eprintf("premature end of command\n"); + return; + } + + if (ctx->parser_state->current_argument) { + ctx->parser_state->arguments = erealloc(ctx->parser_state->arguments, + (ctx->parser_state->narguments + 1) * + sizeof(*ctx->parser_state->arguments)); + ctx->parser_state->arguments[ctx->parser_state->narguments++] = ctx->parser_state->current_argument; + ctx->parser_state->current_argument = NULL; + ctx->parser_state->current_argument_end = NULL; + } +} + + +static void +push_command_terminal(struct parser_context *ctx, enum command_terminal terminal) +{ + struct command *new_command; + + push_whitespace(ctx, 1); + + ctx->parser_state->commands = erealloc(ctx->parser_state->commands, + (ctx->parser_state->ncommands + 1) * + sizeof(*ctx->parser_state->commands)); + new_command = ecalloc(1, sizeof(*new_command)); + ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command; + new_command->terminal = terminal; + new_command->arguments = ctx->parser_state->arguments; + new_command->narguments = ctx->parser_state->narguments; + new_command->redirections = ctx->parser_state->redirections; + new_command->nredirections = ctx->parser_state->nredirections; + ctx->parser_state->arguments = NULL; + ctx->parser_state->narguments = 0; + ctx->parser_state->redirections = NULL; + ctx->parser_state->nredirections = 0; + + if (!ctx->parser_state->parent) { + if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) { + /* TODO unless in a special construct such as while, case, for, if, or {, run and clear + * also require that any here-document is specified (count them and run when given); + * if terminal == AMPERSAND: perform parser_state->narguments) + push_command_terminal(ctx, SEMICOLON); +} + + +static void +push_new_argument_part(struct parser_context *ctx, enum argument_type type) +{ + struct argument *new_part; + + new_part = ecalloc(1, sizeof(*new_part)); + new_part->type = type; + new_part->line_number = ctx->tokeniser_line_number; + + if (ctx->parser_state->current_argument_end) { + ctx->parser_state->current_argument_end->next_part = new_part; + ctx->parser_state->current_argument_end = new_part; + } else { + ctx->parser_state->current_argument = new_part; + ctx->parser_state->current_argument_end = new_part; + } +} + + +static void +push_redirection(struct parser_context *ctx, enum redirection_type type) +{ + struct redirection *new_redirection; + struct argument *new_argument; + struct here_document *new_here_document; + + new_redirection = ecalloc(1, sizeof(*new_redirection)); + new_redirection->type = type; + + ctx->parser_state->redirections = erealloc(ctx->parser_state->redirections, + (ctx->parser_state->nredirections + 1) * + sizeof(*ctx->parser_state->redirections)); + ctx->parser_state->redirections[ctx->parser_state->nredirections++] = new_redirection; + + if (ctx->parser_state->current_argument) { + if (ctx->parser_state->current_argument->type == REDIRECTION || + ctx->parser_state->current_argument_end->type == QUOTED || + ctx->parser_state->current_argument_end->type == QUOTE_EXPRESSION || + type == REDIRECT_OUTPUT_AND_STDERR || + type == REDIRECT_OUTPUT_AND_STDERR_APPEND || + type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER || + type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) { + push_whitespace(ctx, 1); + } else { + new_redirection->left_hand_side = ctx->parser_state->current_argument; + } + } + + new_argument = ecalloc(1, sizeof(*new_argument)); + new_argument->type = REDIRECTION; + new_argument->line_number = ctx->tokeniser_line_number; + ctx->parser_state->current_argument = new_argument; + + if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) { + new_here_document = emalloc(sizeof(*new_here_document)); + new_here_document->redirection = new_redirection; + new_here_document->argument = new_argument; + new_here_document->next = NULL; + *ctx->here_documents_next = new_here_document; + ctx->here_documents_next = &new_here_document->next; + } + + ctx->parser_state->need_right_hand_side = 1; +} + + +static void +push_shell_io(struct parser_context *ctx, enum argument_type type, enum tokeniser_mode mode) +{ + push_mode(ctx, mode); + push_enter(ctx, type); +} + + +static void +push_function_mark(struct parser_context *ctx) +{ + push_whitespace(ctx, 1); + push_new_argument_part(ctx, FUNCTION_MARK); + push_whitespace(ctx, 1); +} + + +size_t +push_symbol(struct parser_context *ctx, char *token, size_t token_len) +{ +#define LIST_SYMBOLS(_)\ + _("<<<", push_redirection(ctx, HERE_STRING))\ + _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\ + _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\ + _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\ + _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\ + _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\ + _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\ + _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\ + _("()", push_function_mark(ctx))\ + _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\ + _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\ + _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\ + _("<<", push_redirection(ctx, HERE_DOCUMENT))\ + _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\ + _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\ + _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\ + _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\ + _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\ + _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\ + _("||", push_command_terminal(ctx, OR))\ + _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\ + _("&&", push_command_terminal(ctx, AND))\ + _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\ + _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\ + _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\ + _(";", push_semicolon(ctx, 0))\ + _("<", push_redirection(ctx, REDIRECT_INPUT))\ + _(">", push_redirection(ctx, REDIRECT_OUTPUT))\ + _("|", push_command_terminal(ctx, PIPE))\ + _("&", push_command_terminal(ctx, AMPERSAND)) + +#define X(SYMBOL, ACTION)\ + if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\ + ACTION;\ + return token_len;\ + } + LIST_SYMBOLS(X) +#undef X + + push_unquoted(ctx, token, 1); + return 1; +} + + +static void +push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument_type type) +{ + struct argument *arg_part; + + ctx->parser_state->need_right_hand_side = 0; + + if (!ctx->parser_state->current_argument_end || + ctx->parser_state->current_argument_end->type != type || + ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number) + push_new_argument_part(ctx, type); + arg_part = ctx->parser_state->current_argument_end; + + arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1); + memcpy(&arg_part->text[arg_part->length], text, text_len); + arg_part->length += text_len; + arg_part->text[arg_part->length] = '\0'; +} + + +void +push_quoted(struct parser_context *ctx, char *text, size_t text_len) +{ + push_text(ctx, text, text_len, QUOTED); +} + + +void +push_escaped(struct parser_context *ctx, char *text, size_t text_len) +{ + /* TODO resolve backslashes in text */ + push_text(ctx, text, text_len, QUOTED); +} + + +void +push_unquoted(struct parser_context *ctx, char *text, size_t text_len) +{ + push_text(ctx, text, text_len, UNQUOTED); +} + + +void +push_enter(struct parser_context *ctx, enum argument_type type) +{ + struct parser_state *new_state; + + ctx->parser_state->need_right_hand_side = 0; + push_new_argument_part(ctx, type); + + new_state = ecalloc(1, sizeof(*new_state)); + new_state->parent = ctx->parser_state; + ctx->parser_state->current_argument_end->child = new_state; + ctx->parser_state = new_state; +} + + +void +push_leave(struct parser_context *ctx) +{ + if (ctx->mode_stack->mode == NORMAL_MODE) + push_semicolon(ctx, 1); + /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */ + /* TODO validate subshell content */ + ctx->parser_state = ctx->parser_state->parent; +} diff --git a/preparser.c b/preparser.c new file mode 100644 index 0000000..840209f --- /dev/null +++ b/preparser.c @@ -0,0 +1,51 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +size_t +parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp) +{ + int end_of_file_reached; + size_t bytes_parsed = 0; + + end_of_file_reached = ctx->end_of_file_reached; + ctx->end_of_file_reached = 0; + *nremovedp = 0; + + while (ctx->preparser_offset < code_len) { + if (code[ctx->preparser_offset] == '\0') { + if (!ctx->tty_input) + weprintf("ignoring NUL byte at line %zu\n", ctx->preparser_line_number); + memmove(&code[ctx->preparser_offset], + &code[ctx->preparser_offset + 1], + (code_len -= 1) - ctx->preparser_offset); + *nremovedp += 1; + + } else if (code[ctx->preparser_offset] == '\n') { + ctx->preparser_line_number += 1; + ctx->preparser_offset += 1; + + } else if (code[ctx->preparser_offset] == '\\') { + if (ctx->preparser_offset + 1 == code_len) + break; + if (code[ctx->preparser_offset + 1] == '\n') { + bytes_parsed += parse_preparsed(ctx, &code[bytes_parsed], ctx->preparser_offset - bytes_parsed); + memmove(&code[ctx->preparser_offset], + &code[ctx->preparser_offset + 2], + (code_len -= 2) - ctx->preparser_offset); + *nremovedp += 2; + ctx->line_continuations += 1; + } else { + ctx->preparser_offset += 2; + } + + } else { + ctx->preparser_offset += 1; + } + } + + ctx->end_of_file_reached = end_of_file_reached; + bytes_parsed += parse_preparsed(ctx, &code[bytes_parsed], ctx->preparser_offset - bytes_parsed); + ctx->preparser_offset -= bytes_parsed; + return bytes_parsed; +} diff --git a/tokeniser.c b/tokeniser.c new file mode 100644 index 0000000..63ff2fd --- /dev/null +++ b/tokeniser.c @@ -0,0 +1,419 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +void +push_mode(struct parser_context *ctx, enum tokeniser_mode mode) +{ + struct mode_stack *new = emalloc(sizeof(*new)); + new->mode = mode; + new->she_is_comment = 1; + new->previous = ctx->mode_stack; + ctx->mode_stack = new; +} + + +void +pop_mode(struct parser_context *ctx) +{ + struct mode_stack *old = ctx->mode_stack; + ctx->mode_stack = ctx->mode_stack->previous; + free(old); +} + + +size_t +parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) +{ +#define IS_SYMBOL(C) ((C) == '<' || (C) == '>' || (C) == '&' || (C) == '|' ||\ + (C) == '(' || (C) == ')' || (C) == ';' || (C) == '-') + + size_t bytes_read = 0; + size_t token_len; + + for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) { + switch (ctx->mode_stack->mode) { + case NORMAL_MODE: + if (*code == '#' && ctx->mode_stack->she_is_comment) { + token_len = 1; + push_mode(ctx, COMMENT_MODE); + + } else if (*code == '\n') { + token_len = 1; + ctx->mode_stack->she_is_comment = 1; + push_whitespace(ctx, 0); + push_semicolon(ctx, 1); + ctx->tokeniser_line_number += 1; + if (ctx->here_documents_first) + push_mode(ctx, HERE_DOCUMENT_MODE); + + } else if (isspace(*code)) { + ctx->mode_stack->she_is_comment = 1; + push_whitespace(ctx, 0); + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) + if (!isspace(code[token_len]) || code[token_len] == '\n') + break; + + } else if (*code == ')' && ctx->mode_stack->previous) { + token_len = 1; + ctx->mode_stack->she_is_comment = 1; + pop_mode(ctx); + push_leave(ctx); + + } else if (IS_SYMBOL(*code)) { + ctx->mode_stack->she_is_comment = 1; + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) + if (!IS_SYMBOL(code[token_len])) + goto symbol_end; + if (!ctx->end_of_file_reached) + goto need_more; + symbol_end: + token_len = push_symbol(ctx, code, token_len); + + } else if (*code == '\\') { + ctx->mode_stack->she_is_comment = 0; + backslash_mode: + if (code_len - bytes_read < 2) + goto need_more; + token_len = 2; + push_quoted(ctx, &code[1], 1); + + } else if (*code == '\'') { + ctx->mode_stack->she_is_comment = 0; + sqoute_mode: + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) + if (code[token_len] == '\'') + goto squote_end; + goto need_more; + squote_end: + token_len += 1; + push_quoted(ctx, &code[1], token_len - 2); + + } else if (*code == '"') { + ctx->mode_stack->she_is_comment = 0; + dquote_mode: + token_len = 1; + push_mode(ctx, DQ_QUOTE_MODE); + push_enter(ctx, QUOTE_EXPRESSION); + + } else if (*code == '`') { + ctx->mode_stack->she_is_comment = 0; + bquote_mode: + token_len = 1; + push_mode(ctx, BQ_QUOTE_MODE); + push_enter(ctx, BACKQUOTE_EXPRESSION); + + } else if (*code == '$') { + ctx->mode_stack->she_is_comment = 0; + dollar_mode: + if (code_len - bytes_read < 2) { + if (ctx->end_of_file_reached) { + token_len = 1; + push_unquoted(ctx, code, 1); + } else { + goto need_more; + } + + } else if (code[1] == '(') { + if (code_len - bytes_read < 3) { + goto need_more; + + } else if (code[2] == '(') { + token_len = 3; + push_mode(ctx, RRB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + + } else { + token_len = 2; + push_mode(ctx, NORMAL_MODE); + push_enter(ctx, SUBSHELL_SUBSTITUTION); + } + + } else if (code[1] == '[') { + token_len = 2; + push_mode(ctx, SB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + + } else if (code[1] == '{') { + token_len = 2; + push_mode(ctx, CB_QUOTE_MODE); + push_enter(ctx, VARIABLE_SUBSTITUTION); + + } else if (code[1] == '\'') { + for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) { + if (code[token_len] == '\\') { + if (token_len + 1 == code_len - bytes_read) { + token_len += 1; + } else { + goto need_more; + } + } else if (code[token_len] == '\'') { + goto dollar_squote_end; + } + } + dollar_squote_end: + token_len += 1; + push_escaped(ctx, &code[2], token_len - 3); + + } else { + token_len = 1; + push_unquoted(ctx, code, 1); + } + + } else { + ctx->mode_stack->she_is_comment = 0; + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) { + if (isspace(code[token_len]) || IS_SYMBOL(code[token_len]) || + code[token_len] == '\'' || code[token_len] == '"' || + code[token_len] == '\\' || code[token_len] == '$' || + code[token_len] == '`') + break; + } + push_unquoted(ctx, code, token_len); + } + break; + + + case COMMENT_MODE: + if (*code == '\n') { + token_len = 0; /* do not consume */ + pop_mode(ctx); + } else { + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) + if (code[token_len] == '\n') + break; + } + break; + + + case HERE_DOCUMENT_MODE: + /* TODO read until terminator, remove all (including on the + * line of the terminator) if <<- and then if terminator was + * unquoted, parse in " "-mode but accept " */ + break; + + + case BQ_QUOTE_MODE: + if (*code == '\\') { + if (code_len - bytes_read < 2) { + goto need_more; + } else { + token_len = 2; + push_unquoted(ctx, code, 2); + } + + } else if (*code == '`') { + token_len = 1; + pop_mode(ctx); + push_leave(ctx); + + } else if (*code == '\n') { + token_len = 1; + ctx->tokeniser_line_number += 1; + push_unquoted(ctx, code, 1); + + } else { + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) + if (code[token_len] == '\n' || code[token_len] == '\\' || code[token_len] == '`') + break; + push_unquoted(ctx, code, token_len); + } + break; + + + case DQ_QUOTE_MODE: + if (*code == '"') { + token_len = 1; + pop_mode(ctx); + push_leave(ctx); + } else { + goto common_quote_mode; + } + break; + + case RRB_QUOTE_MODE: + if (*code == ')') { + if (code_len - bytes_read < 2) { + goto need_more; + } else if (code[1] == ')') { + token_len = 2; + pop_mode(ctx); + push_leave(ctx); + } else { + goto common_quote_mode; + } + } else { + goto common_quote_mode; + } + break; + + case RB_QUOTE_MODE: + if (*code == ')') { + token_len = 1; + pop_mode(ctx); + push_leave(ctx); + } else { + goto common_quote_mode; + } + break; + + case SB_QUOTE_MODE: + if (*code == ']') { + token_len = 1; + pop_mode(ctx); + push_leave(ctx); + } else { + goto common_quote_mode; + } + break; + + common_quote_mode: + if (*code == '(' && ctx->mode_stack->mode != DQ_QUOTE_MODE) { + if (code_len - bytes_read < 2) { + goto need_more; + + } else if (code[1] == '(') { + token_len = 2; + push_mode(ctx, RRB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + + } else { + token_len = 1; + push_mode(ctx, RB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + } + + } else if (*code == '$') { + if (code_len - bytes_read < 2) { + if (ctx->end_of_file_reached) { + token_len = 1; + push_unquoted(ctx, code, 1); + } else { + goto need_more; + } + + } else if (code[1] == '(') { + if (code_len - bytes_read < 3) { + goto need_more; + + } else if (code[2] == '(') { + token_len = 3; + push_mode(ctx, RRB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + + } else { + token_len = 2; + push_mode(ctx, NORMAL_MODE); + push_enter(ctx, SUBSHELL_SUBSTITUTION); + } + + } else if (code[1] == '[') { + token_len = 2; + push_mode(ctx, SB_QUOTE_MODE); + push_enter(ctx, ARITHMETIC_EXPRESSION); + + } else if (code[1] == '{') { + token_len = 2; + push_mode(ctx, CB_QUOTE_MODE); + push_enter(ctx, VARIABLE_SUBSTITUTION); + + } else { + token_len = 1; + push_unquoted(ctx, code, 1); + } + + } else if (*code == '\\') { + if (code_len - bytes_read < 2) { + if (ctx->end_of_file_reached) { + token_len = 1; + push_unquoted(ctx, code, 1); + } else { + goto need_more; + } + + } else if (code[1] == '$' || code[1] == '`' || code[1] == '"' || code[1] == '\\') { + token_len = 1; + push_quoted(ctx, &code[1], 1); + + } else { + token_len = 1; + push_unquoted(ctx, code, 1); + } + + } else if (*code == '`') { + goto bquote_mode; + + } else if (*code == '\n') { + token_len = 1; + ctx->tokeniser_line_number += 1; + push_unquoted(ctx, code, 1); + + } else { + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) { + if (code[token_len] == '"' || code[token_len] == ')' || + code[token_len] == ']' || code[token_len] == '(' || + code[token_len] == '$' || code[token_len] == '\\' || + code[token_len] == '`' || code[token_len] == '\n') + break; + } + push_unquoted(ctx, code, token_len); + } + break; + + + case CB_QUOTE_MODE: + if (*code == '}') { + token_len = 1; + pop_mode(ctx); + push_leave(ctx); + + } else if (*code == '\\') { + goto backslash_mode; + + } else if (*code == '\'') { + goto sqoute_mode; + + } else if (*code == '"') { + goto dquote_mode; + + } else if (*code == '`') { + goto bquote_mode; + + } else if (*code == '$') { + goto dollar_mode; + + } else if (*code == '\n') { + token_len = 1; + ctx->tokeniser_line_number += 1; + push_unquoted(ctx, code, 1); + + } else { + for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) { + if (code[token_len] == '}' || code[token_len] == '\\' || + code[token_len] == '\'' || code[token_len] == '"' || + code[token_len] == '`' || code[token_len] == '$' || + code[token_len] == '\n') + break; + } + push_unquoted(ctx, code, token_len); + } + break; + + default: + abort(); + } + + if (ctx->line_continuations) { + ctx->tokeniser_line_number += ctx->line_continuations; + ctx->line_continuations = 0; + } + } + + if (bytes_read == code_len && ctx->end_of_file_reached) + push_end_of_file(ctx); + +need_more: + return bytes_read; + +#undef IS_SYMBOL +} -- cgit v1.2.3-70-g09d2