From bc9033fdf30424c34008e651fdbbba5da8c8fc40 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Tue, 13 Jul 2021 02:44:18 +0200 Subject: Third commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 5 +- apsh.c | 49 ++- common.h | 164 ++++++++- interpreter.c | 970 +++++++++++++++++++++++++++++++++++++++++++++++++++++ parser.c | 395 ++++++++++++++++++---- preparser.c | 2 +- regular_builtins.c | 67 ++++ special_builtins.c | 11 + tokeniser.c | 268 ++++++++++++++- 9 files changed, 1829 insertions(+), 102 deletions(-) create mode 100644 interpreter.c create mode 100644 regular_builtins.c create mode 100644 special_builtins.c diff --git a/Makefile b/Makefile index 696478c..bf1daeb 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,10 @@ OBJ =\ apsh.o\ preparser.o\ tokeniser.o\ - parser.o + parser.o\ + interpreter.o\ + special_builtins.o\ + regular_builtins.o HDR =\ common.h\ diff --git a/apsh.c b/apsh.c index 4e7183b..90743ad 100644 --- a/apsh.c +++ b/apsh.c @@ -4,19 +4,39 @@ USAGE(""); +int login_shell; +int posix_mode; + + void -initialise_parser_context(struct parser_context *ctx) +initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser) { memset(ctx, 0, sizeof(*ctx)); - ctx->preparser_line_number = 1; - ctx->tokeniser_line_number = 1; - ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack)); - ctx->mode_stack->mode = NORMAL_MODE; - ctx->mode_stack->she_is_comment = 1; - ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state)); - ctx->here_documents_next = &ctx->here_documents_first; + if (need_tokeniser) { + ctx->preparser_line_number = 1; + ctx->tokeniser_line_number = 1; + ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack)); + ctx->mode_stack->she_is_comment = 1; + ctx->here_document_stack = ecalloc(1, sizeof(*ctx->here_document_stack)); + ctx->here_document_stack->next = &ctx->here_document_stack->first; + } + if (need_parser) { + ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state)); + } + ctx->interpreter_state = ecalloc(1, sizeof(*ctx->interpreter_state)); +} + + +static int +is_sh(char *name) +{ + if (!strcmp(name, "sh")) + return 1; + name = strrchr(name, '/'); + return name && !strcmp(name, "/sh"); } + int main(int argc, char *argv[]) { @@ -36,8 +56,11 @@ main(int argc, char *argv[]) if (argc) usage(); - initialise_parser_context(&ctx); - ctx.tty_input = isatty(STDIN_FILENO); + login_shell = (argv0[0] == '-'); + posix_mode = is_sh(&argv0[login_shell]); + + initialise_parser_context(&ctx, 1, 1); + ctx.tty_input = (char)isatty(STDIN_FILENO); if (ctx.tty_input) weprintf("apsh is currently not implemented to be interactive\n"); @@ -71,6 +94,12 @@ main(int argc, char *argv[]) if (buffer_tail != buffer_head || ctx.premature_end_of_file) eprintf("premature end of file reached\n"); + free(ctx.parser_state->commands); + free(ctx.parser_state->arguments); + free(ctx.parser_state->redirections); + free(ctx.parser_state); + free(ctx.here_document_stack); + free(ctx.interpreter_state); free(buffer); return 0; } diff --git a/common.h b/common.h index b5def37..52481c0 100644 --- a/common.h +++ b/common.h @@ -4,11 +4,35 @@ #include "config.h" +#if defined(__GNUC__) +# define CONST_FUNC __attribute__((__const__)) +# define PURE_FUNC __attribute__((__pure__)) +#else +# define CONST_FUNC +# define PURE_FUNC +#endif + + +#define BUILTIN_USAGE(FUNCTION_NAME, SYNOPSIS)\ + BUILTIN_NUSAGE(1, FUNCTION_NAME, SYNOPSIS) + +#define BUILTIN_NUSAGE(STATUS, FUNCTION_NAME, SYNOPSIS)\ + static void\ + FUNCTION_NAME(void)\ + {\ + const char *syn = SYNOPSIS ? SYNOPSIS : "";\ + fprintf(stderr, "usage: %s%s%s\n", argv0, *syn ? " " : "", syn);\ + exit(STATUS);\ + } + + enum argument_type { /* .text and .length */ QUOTED, /* \ or '…' or $'…' */ UNQUOTED, /* normal */ - /* .child */ + VARIABLE, /* used by interpreter, not parser */ + OPERATOR, /* used by interpreter for ${}, not parser */ + /* .child, but changed to .command by interpreter */ QUOTE_EXPRESSION, /* "…" */ BACKQUOTE_EXPRESSION, /* `…` */ ARITHMETIC_EXPRESSION, /* $((…)) */ @@ -19,27 +43,46 @@ enum argument_type { PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */ SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */ ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */ + /* .command */ + COMMAND, /* used by interpreter, not parser */ /* (none) */ REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */ FUNCTION_MARK /* () */ }; +enum nesting_type { + MAIN_BODY, + CODE_ROOT, + TEXT_ROOT, + VARIABLE_SUBSTITUTION_BRACKET, + CURLY_NESTING, + IF_STATEMENT, + IF_CONDITIONAL, + IF_CLAUSE, + ELSE_CLAUSE, + UNTIL_STATEMENT, + WHILE_STATEMENT, + REPEAT_CONDITIONAL, + DO_CLAUSE, + FOR_STATEMENT +}; + enum redirection_type { REDIRECT_INPUT, - REDIRECT_INPUT_TO_FD, + REDIRECT_INPUT_TO_FD, /* but close if right-hand side is "-" */ REDIRECT_OUTPUT, REDIRECT_OUTPUT_APPEND, REDIRECT_OUTPUT_CLOBBER, - REDIRECT_OUTPUT_TO_FD, + REDIRECT_OUTPUT_TO_FD, /* ditto */ REDIRECT_OUTPUT_AND_STDERR, REDIRECT_OUTPUT_AND_STDERR_APPEND, REDIRECT_OUTPUT_AND_STDERR_CLOBBER, - REDIRECT_OUTPUT_AND_STDERR_TO_FD, + REDIRECT_OUTPUT_AND_STDERR_TO_FD, /* ditto */ REDIRECT_INPUT_OUTPUT, - REDIRECT_INPUT_OUTPUT_TO_FD, + REDIRECT_INPUT_OUTPUT_TO_FD, /* ditto */ HERE_STRING, - HERE_DOCUMENT, - HERE_DOCUMENT_INDENTED + HERE_DOCUMENT, /* eliminated during parse */ + HERE_DOCUMENT_INDENTED /* eliminated during parse */ }; enum tokeniser_mode { @@ -51,21 +94,45 @@ enum tokeniser_mode { RB_QUOTE_MODE, SB_QUOTE_MODE, CB_QUOTE_MODE, + HERE_DOCUMENT_MODE_INITIALISATION, HERE_DOCUMENT_MODE }; enum command_terminal { DOUBLE_SEMICOLON, SEMICOLON, + NEWLINE, AMPERSAND, SOCKET_PIPE, PIPE, PIPE_AMPERSAND, + AMPERSAND_PIPE, /* synonym for |& to match &> */ AND, OR }; +enum interpreter_requirement { + NEED_COMMAND = 0, + NEED_COMMAND_END, + NO_REQUIREMENT, + NEED_FUNCTION_BODY, + NEED_VARIABLE_NAME, + NEED_IN_OR_DO, + NEED_DO, + NEED_VALUE, + NEED_PREFIX_OR_VARIABLE_NAME, + NEED_INDEX_OR_OPERATOR_OR_END, + NEED_INDEX_OR_SUFFIX_OR_END, + NEED_INDEX_OR_END, + NEED_OPERATOR_OR_END, + NEED_AT_OPERAND, + NEED_TEXT_OR_SLASH, + NEED_TEXT_OR_COLON, + NEED_END +}; + struct parser_state; +struct interpreter_state; struct argument { enum argument_type type; @@ -75,7 +142,13 @@ struct argument { size_t length; }; struct parser_state *child; + struct interpreter_state *command; }; + /* (TODO) need to be able to track locations of functions, dots, evals, and maybe aliases, + * as well as filenames, so a more complex tracking method is required, basically + * a reversed tree (stack with reference counted nodes) with filename and linenumber + * nodes, with type annotation; however for memory efficiency, .line_number shall + * still be used for the leaves */ size_t line_number; struct argument *next_part; }; @@ -83,19 +156,23 @@ struct argument { struct redirection { enum redirection_type type; struct argument *left_hand_side; + struct argument *right_hand_side; /* set by interpreter, not parser */ }; struct command { enum command_terminal terminal; + char have_bang; /* set by interpreter */ + size_t terminal_line_number; /* (TODO) same idea as in `struct argument` */ struct argument **arguments; size_t narguments; struct redirection **redirections; size_t nredirections; + size_t redirections_offset; /* used by interpreter */ }; struct parser_state { struct parser_state *parent; - struct command **commands; + struct command **commands; /* in text nodes, all text will be in at most one argument in a single dummy command */ size_t ncommands; struct argument **arguments; size_t narguments; @@ -109,6 +186,9 @@ struct parser_state { struct here_document { struct redirection *redirection; struct argument *argument; + struct argument *argument_end; + char *terminator; + size_t terminator_length; struct here_document *next; }; @@ -118,23 +198,52 @@ struct mode_stack { struct mode_stack *previous; }; +struct here_document_stack { + char indented; + char verbatim; + char interpret_when_empty; + size_t line_offset; + struct here_document *first; + struct here_document **next; + struct here_document_stack *previous; +}; + +struct interpreter_state { + enum nesting_type dealing_with; + enum interpreter_requirement requirement; + char allow_newline; + char disallow_bang; /* disallow rather than allow, so that default value is 0 */ + char have_bang; + struct command **commands; /* normally the results are stored here */ + size_t ncommands; + struct argument **arguments; /* for TEXT_ROOT and VARIABLE_SUBSTITUTION_BRACKET, results are stored here */ + size_t narguments; + struct redirection **redirections; + size_t nredirections; + struct interpreter_state *parent; +}; + struct parser_context { - int tty_input; - int end_of_file_reached; - int premature_end_of_file; + char tty_input; + char end_of_file_reached; + char premature_end_of_file; + char do_not_run; size_t preparser_offset; size_t preparser_line_number; size_t line_continuations; size_t tokeniser_line_number; + size_t interpreter_offset; struct mode_stack *mode_stack; struct parser_state *parser_state; - struct here_document *here_documents_first; - struct here_document **here_documents_next; + struct here_document_stack *here_document_stack; + struct interpreter_state *interpreter_state; }; /* apsh.c */ -void initialise_parser_context(struct parser_context *ctx); +extern int login_shell; +extern int posix_mode; +void initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser); /* preparser.c */ size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp); @@ -142,15 +251,40 @@ size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nr /* tokeniser.c */ void push_mode(struct parser_context *ctx, enum tokeniser_mode mode); void pop_mode(struct parser_context *ctx); +int check_extension(const char *token, size_t line_number); size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len); /* parser.c */ +PURE_FUNC const char *get_redirection_token(enum redirection_type type); void push_end_of_file(struct parser_context *ctx); void push_whitespace(struct parser_context *ctx, int strict); -void push_semicolon(struct parser_context *ctx, int maybe); +void push_semicolon(struct parser_context *ctx, int actually_newline); size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len); void push_quoted(struct parser_context *ctx, char *text, size_t text_len); void push_escaped(struct parser_context *ctx, char *text, size_t text_len); void push_unquoted(struct parser_context *ctx, char *text, size_t text_len); void push_enter(struct parser_context *ctx, enum argument_type type); void push_leave(struct parser_context *ctx); + +/* interpreter.c */ +void interpret_and_eliminate(struct parser_context *ctx); + +/* special_builtins.c */ +#define LIST_SPECIAL_BUILTINS(_)\ + _(":", colon_main, CONST_FUNC) + +/* regular_builtins.c */ +#define LIST_REGULAR_BUILTINS(_)\ + _("true", true_main, CONST_FUNC)\ + _("false", false_main, CONST_FUNC)\ + _("pwd", pwd_main,) +/* "true" and "false" are defined as regular built-in shell utilities + * (that must be searched before PATH), not as stand-alone utilities, + * in POSIX (but vice verse in LSB). "pwd" is defined both as regular + * built-in shell utility and as a stand-alone utility. */ + +#define X(SH_NAME, C_FUNCTION, C_ATTRIBUTES)\ + C_ATTRIBUTES int C_FUNCTION(int argc, char **argv); +LIST_SPECIAL_BUILTINS(X) +LIST_REGULAR_BUILTINS(X) +#undef X diff --git a/interpreter.c b/interpreter.c new file mode 100644 index 0000000..e4bca1a --- /dev/null +++ b/interpreter.c @@ -0,0 +1,970 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +#define LIST_RESERVED_WORDS(_)\ + _("!", BANG)\ + _("{", OPEN_CURLY)\ + _("}", CLOSE_CURLY)\ + _("case", CASE) /* (TODO) case patterns requires update to tokeniser */\ + _("do", DO)\ + _("done", DONE)\ + _("elif", ELIF)\ + _("else", ELSE)\ + _("esac", ESAC)\ + _("fi", FI)\ + _("for", FOR)\ + _("if", IF)\ + _("in", IN)\ + _("then", THEN)\ + _("until", UNTIL)\ + _("while", WHILE) + +#define X(S, C) ,C +enum reserved_word { + NOT_A_RESERVED_WORD = 0 + LIST_RESERVED_WORDS(X) +}; +#undef X + + +PURE_FUNC +static enum reserved_word +get_reserved_word(struct argument *argument) +{ + if (argument->type != UNQUOTED || argument->next_part) + return NOT_A_RESERVED_WORD; +#define X(S, C)\ + if (argument->length == sizeof(S) - 1 && !strcmp(argument->text, S))\ + return C; + LIST_RESERVED_WORDS(X) +#undef X + return NOT_A_RESERVED_WORD; +} + + +static void +stray_command_terminal(struct command *command) +{ + switch (command->terminal) { + case DOUBLE_SEMICOLON: eprintf("stray ';;' at line %zu\n", command->terminal_line_number); return; + case SEMICOLON: eprintf("stray ';' at line %zu\n", command->terminal_line_number); return; + case NEWLINE: eprintf("stray at line %zu\n", command->terminal_line_number); return; + case AMPERSAND: eprintf("stray '&' at line %zu\n", command->terminal_line_number); return; + case SOCKET_PIPE: eprintf("stray '<>|' at line %zu\n", command->terminal_line_number); return; + case PIPE: eprintf("stray '|' at line %zu\n", command->terminal_line_number); return; + case PIPE_AMPERSAND: eprintf("stray '|&' at line %zu\n", command->terminal_line_number); return; + case AMPERSAND_PIPE: eprintf("stray '&|' at line %zu\n", command->terminal_line_number); return; + case AND: eprintf("stray '&&' at line %zu\n", command->terminal_line_number); return; + case OR: eprintf("stray '||' at line %zu\n", command->terminal_line_number); return; + default: + abort(); + } +} + + +static void +stray_reserved_word(struct argument *argument) +{ + eprintf("stray '%s' at line %zu\n", argument->text, argument->line_number); +} + + +static void +stray_redirection(struct command *command, struct argument *argument) +{ + enum redirection_type type = command->redirections[command->redirections_offset]->type; + eprintf("stray '%s' at line %zu\n", get_redirection_token(type), argument->line_number); +} + + +static void +free_text_argument(struct argument **argumentp) +{ + struct argument *argument = *argumentp; + *argumentp = argument->next_part; + free(argument->text); + free(argument); +} + + +static void +push_interpreted_argument(struct parser_context *ctx, struct argument *argument) +{ + ctx->interpreter_state->arguments = erealloc(ctx->interpreter_state->arguments, + (ctx->interpreter_state->narguments + 1) * + sizeof(*ctx->interpreter_state->arguments)); + ctx->interpreter_state->arguments[ctx->interpreter_state->narguments] = argument; + ctx->interpreter_state->narguments += 1; +} + + +static void +push_state(struct parser_context *ctx, enum nesting_type dealing_with, size_t line_number) +{ + struct interpreter_state *new_state; + struct argument *new_argument; + new_state = ecalloc(1, sizeof(*new_state)); + new_state->parent = ctx->interpreter_state; + new_state->dealing_with = dealing_with; + new_argument = calloc(1, sizeof(*new_argument)); + new_argument->type = COMMAND; + new_argument->command = new_state; + new_argument->line_number = line_number; + push_interpreted_argument(ctx, new_argument); + ctx->interpreter_state = new_state; +} + + +static void +pop_state(struct parser_context *ctx) +{ + ctx->interpreter_state = ctx->interpreter_state->parent; +} + + +static void +push_command(struct parser_context *ctx, struct command *command) +{ + free(command->redirections); + free(command->arguments); + command->redirections = ctx->interpreter_state->redirections; + command->nredirections = ctx->interpreter_state->nredirections; + command->arguments = ctx->interpreter_state->arguments; + command->narguments = ctx->interpreter_state->narguments; + command->have_bang = ctx->interpreter_state->have_bang; + ctx->interpreter_state->redirections = NULL; + ctx->interpreter_state->nredirections = 0; + ctx->interpreter_state->arguments = NULL; + ctx->interpreter_state->narguments = 0; + ctx->interpreter_state->have_bang = 0; + ctx->parser_state->commands[ctx->interpreter_offset] = NULL; + + ctx->interpreter_state->commands = erealloc(ctx->interpreter_state->commands, + (ctx->interpreter_state->ncommands + 1) * + sizeof(*ctx->interpreter_state->commands)); + ctx->interpreter_state->commands[ctx->interpreter_state->ncommands] = command; + ctx->interpreter_state->ncommands += 1; +} + + +static void +interpret_nested_code(struct argument *argument, enum nesting_type dealing_with, enum interpreter_requirement requirement) +{ + struct parser_state *code = argument->child; + struct parser_context ctx; + + initialise_parser_context(&ctx, 0, 0); + ctx.parser_state = code; + ctx.interpreter_state->dealing_with = dealing_with; + ctx.interpreter_state->requirement = requirement; + + interpret_and_eliminate(&ctx); + + if (ctx.parser_state->ncommands) + eprintf("premature end of subexpression at line %zu\n", argument->line_number); + + free(ctx.parser_state->commands); + free(ctx.parser_state->arguments); + free(ctx.parser_state->redirections); + + argument->command = ctx.interpreter_state; + free(code); +} + + +static void +validate_identifier_name(struct argument *argument, const char *type, const char *reserved_word) +{ + const char *s; + + if (!argument->text[0] || isdigit(argument->text[0])) + goto illegal; + + for (s = argument->text; *s; s++) + if (!isalpha(*s) && !isdigit(*s) && *s != '_') + goto illegal; + + return; + +illegal: + eprintf("illegal %s \"%s\" at line %zu for '%s'\n", + type, argument->text, argument->line_number, reserved_word); +} + + +static void +interpret_unquoted_text(struct argument **argumentp) +{ + struct argument *argument = *argumentp; + struct argument *new_argument; + char *text = argument->text; + char *beginning = text, *end = text; + size_t addendum_length; + int can_append = 1; + + while (*end && *end != '$') + end++; + + if (!*end) + return; + + if (end != beginning) { + argument->length = (size_t)(end - beginning); + argument->text = emalloc(argument->length + 1); + memcpy(argument->text, beginning, argument->length); + argument->text[argument->length] = '\0'; + } + + do { + beginning = &end[1]; + switch (*beginning) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (isdigit(beginning[1])) { + weprintf("multiple digits found immediately after '$' at line %zu, " + "only taking one for position argument\n", argument->line_number); + } + /* fall through */ + case '@': + case '*': + case '?': + case '#': + case '-': + case '$': + case '!': + end = &beginning[1]; + break; + case '~': + if (check_extension("$~", argument->line_number)) { + /* Get user home, so you can use it in arguments (in the way Bash allows ~ to be used; + * be we cannot because we don't want to violate POSIX needlessly) that look like + * variable assignments. Instead of limiting usernames to [a-z_][a-z0-9_-]*[$]? + * we will limit them only to [a-zA-Z0-9_-]\+[$]? and accept $ at the end even though + * it is stupid */ + end = &beginning[1]; + if (isalpha(*end) || isdigit(*end) || *end == '0' || *end == '-') { + for (end = &end[1]; *end; end++) + if (!isalpha(*end) && !isdigit(*end) && *end != '0' && *end != '-') + break; + if (*end == '$') + end = &end[1]; + } + } else { + beginning--; + goto append_text; + } + break; + default: + if (isalpha(*beginning) || *beginning == '_') { + for (end = &beginning[1]; isdigit(*end) || isalpha(*end) || *end == '_'; end++); + } else { + beginning--; + goto append_text; + } + } + + new_argument = ecalloc(1, sizeof(*new_argument)); + new_argument->next_part = argument->next_part; + argument = *argumentp = argument->next_part = new_argument; + argument->type = VARIABLE; + argument->length = (size_t)(end - beginning); + argument->text = emalloc(argument->length + 1); + memcpy(argument->text, beginning, argument->length); + argument->text[argument->length] = '\0'; + + beginning = end; + can_append = 0; + + append_text: + while (*end && *end != '$') + end++; + + if (end != beginning) { + if (can_append) { + addendum_length = (size_t)(end - beginning); + argument->text = erealloc(argument->text, argument->length + addendum_length + 1); + memcpy(&argument->text[argument->length], beginning, addendum_length); + argument->length += addendum_length; + argument->text[argument->length] = '\0'; + } else { + new_argument = ecalloc(1, sizeof(*new_argument)); + new_argument->next_part = argument->next_part; + argument = *argumentp = argument->next_part = new_argument; + argument->type = UNQUOTED; + argument->length = (size_t)(end - beginning); + argument->text = emalloc(argument->length + 1); + memcpy(argument->text, beginning, argument->length); + argument->text[argument->length] = '\0'; + } + can_append = 1; + } + + } while (*end); + + free(text); +} + + +static void +translate_text_argument(struct argument *argument) +{ + struct interpreter_state *nested_state; + + for (; argument; argument = argument->next_part) { + switch (argument->type) { + case QUOTED: + /* keep as is */ + break; + + case UNQUOTED: + interpret_unquoted_text(&argument); + break; + + case QUOTE_EXPRESSION: + case ARITHMETIC_EXPRESSION: + case ARITHMETIC_SUBSHELL: + /* ARITHMETIC_EXPRESSION and ARITHMETIC_SUBSHELL can only be interpreted + * when evaluated as substitution can be used to insert operators */ + interpret_nested_code(argument, TEXT_ROOT, 0); + break; + + case VARIABLE_SUBSTITUTION: + interpret_nested_code(argument, VARIABLE_SUBSTITUTION_BRACKET, NEED_PREFIX_OR_VARIABLE_NAME); + nested_state = argument->command; + if (nested_state->requirement != NEED_INDEX_OR_OPERATOR_OR_END && + nested_state->requirement != NEED_INDEX_OR_END && + nested_state->requirement != NEED_OPERATOR_OR_END && + nested_state->requirement != NEED_END) { + eprintf("invalid variable substitution at line %zu\n", argument->line_number); + } + break; + + case BACKQUOTE_EXPRESSION: + case SUBSHELL_SUBSTITUTION: + case PROCESS_SUBSTITUTION_INPUT: + case PROCESS_SUBSTITUTION_OUTPUT: + case PROCESS_SUBSTITUTION_INPUT_OUTPUT: + case SUBSHELL: + interpret_nested_code(argument, CODE_ROOT, NEED_COMMAND); + break; + + default: + case COMMAND: + case REDIRECTION: + case FUNCTION_MARK: + case VARIABLE: + abort(); + } + } +} + + +static void +push_redirection(struct command *command, struct argument **argumentp) +{ + struct redirection *redirection; + struct argument *argument, *argument_end, *last_part; + + redirection = command->redirections[command->redirections_offset]; + command->redirections[command->redirections_offset] = NULL; + command->redirections_offset += 1; + + argument = *argumentp; + *argumentp = argument->next_part; + + redirection->right_hand_side = *argumentp; + last_part = NULL; + for (argument_end = redirection->right_hand_side; argument_end; argument_end = argument_end->next_part) { + if (argument_end->type != QUOTED && + argument_end->type != UNQUOTED && + argument_end->type != QUOTE_EXPRESSION && + argument_end->type != BACKQUOTE_EXPRESSION && + argument_end->type != ARITHMETIC_EXPRESSION && + argument_end->type != VARIABLE_SUBSTITUTION && + argument_end->type != SUBSHELL_SUBSTITUTION) + break; + last_part = argument_end; + } + + if (!last_part) { + eprintf("missing right-hand side of '%s' at line %zu\n", + get_redirection_token(redirection->type), argument->line_number); + } + + *argumentp = last_part->next_part; + last_part->next_part = NULL; + free(argument); + + if (redirection->left_hand_side) + translate_text_argument(redirection->left_hand_side); + translate_text_argument(redirection->right_hand_side); +} + + +static void +push_argument(struct parser_context *ctx, struct argument **argumentp) +{ + struct argument *argument = *argumentp, *last_part; + + if (argument->type == REDIRECTION || argument->type == FUNCTION_MARK) { + *argumentp = argument->next_part; + argument->next_part = NULL; + + } else { + for (last_part = argument; last_part->next_part; last_part = last_part->next_part) + if (last_part->next_part->type == REDIRECTION || last_part->next_part->type == FUNCTION_MARK) + break; + *argumentp = last_part->next_part; + last_part->next_part = NULL; + + translate_text_argument(argument); + } + + push_interpreted_argument(ctx, argument); +} + + +static void +push_typed_text(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length, enum argument_type type) +{ + struct argument *new_argument; + + new_argument = ecalloc(1, sizeof(new_argument)); + new_argument->type = type; + new_argument->line_number = argument->line_number; + new_argument->length = text_length; + new_argument->text = emalloc(text_length + 1); + memcpy(new_argument->text, text, text_length); + new_argument->text[text_length] = '\0'; + + push_interpreted_argument(ctx, new_argument); +} + + +static void +push_unquoted_segment(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length) /* TODO (must handle $) */ +{ +} + + +static void +push_variable(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length) +{ + push_typed_text(ctx, argument, text, text_length, VARIABLE); +} + + +static void +push_operator(struct parser_context *ctx, struct argument *argument, char *token, size_t token_length) +{ + push_typed_text(ctx, argument, token, token_length, OPERATOR); +} + + +static void +push_variable_substitution_argument(struct parser_context *ctx, struct command *command, struct argument **argumentp) +{ +#define IS_SPECIAL_PARAMETER(C)\ + ((C) == '@' || (C) == '*' || (C) == '?' || (C) == '#' || (C) == '$' || (C) == '!') + + struct argument *argument; + size_t length, line_number; + char *s; + + argument = *argumentp; + *argumentp = argument->next_part; + argument->next_part = NULL; + + line_number = argument->line_number; + + if (argument->type == UNQUOTED) { + for (s = argument->text; *s;) { + if (ctx->interpreter_state->requirement == NEED_PREFIX_OR_VARIABLE_NAME) { + if (s[0] == '_' || isalnum(s[0]) || (s[0] == '~' && check_extension("~", line_number))) { + ctx->interpreter_state->requirement = NEED_INDEX_OR_OPERATOR_OR_END; + variable_or_tilde: + length = 1; + while (s[length] == '_' || isalnum(s[length]) || (s[0] == '~' && s[length] == '-')) + length += 1; + if (s[0] == '~' && s[length] == '$') + length += 1; + push_variable(ctx, argument, s, length); + s = &s[length]; + } else if (IS_SPECIAL_PARAMETER(s[1])) { + if (s[0] == '!' && check_extension("!", line_number)) + ctx->interpreter_state->requirement = NEED_INDEX_OR_SUFFIX_OR_END; + else if (s[0] == '#') + ctx->interpreter_state->requirement = NEED_INDEX_OR_END; + else + goto bad_syntax; + push_operator(ctx, argument, &s[0], 1); + push_variable(ctx, argument, &s[1], 1); + s = &s[2]; + } else if (s[1] == '_' || isalnum(s[1]) || (s[1] == '~' && check_extension("~", line_number))) { + if (s[0] == '!' && check_extension("!", line_number)) + ctx->interpreter_state->requirement = NEED_INDEX_OR_SUFFIX_OR_END; + else if (s[0] == '#') + ctx->interpreter_state->requirement = NEED_INDEX_OR_END; + else + goto bad_syntax; + push_operator(ctx, argument, s, 1); + s = &s[1]; + goto variable_or_tilde; + } else if (IS_SPECIAL_PARAMETER(s[0])) { + ctx->interpreter_state->requirement = NEED_INDEX_OR_OPERATOR_OR_END; + push_variable(ctx, argument, s, 1); + s = &s[1]; + } else { + goto bad_syntax; + } + + } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_OPERATOR_OR_END) { + if (s[0] == '[') { + ctx->interpreter_state->requirement = NEED_OPERATOR_OR_END; + index: + /* TODO push INDEX substate that exits on ] */ + } else { + operator: + ctx->interpreter_state->requirement = NO_REQUIREMENT; + if (s[0] == ':' && (s[1] == '-' || s[1] == '=' || s[1] == '?' || s[1] == '+')) { + length = 2; + } else if (s[0] == '-' || s[0] == '=' || s[0] == '?' || s[0] == '+') { + length = 1; + } else if (s[0] == '%' || s[0] == '#' || + (s[0] == ',' && check_extension(s[1] == s[0] ? ",," : ",", line_number)) || + (s[0] == '^' && check_extension(s[1] == s[0] ? "^^" : "^", line_number))) { + if (s[1] == s[0]) + length = 2; + else + length = 1; + } else if (s[0] == '/' && check_extension("/", line_number)) { + ctx->interpreter_state->requirement = NEED_TEXT_OR_SLASH; + length = 1; + } else if (s[0] == ':' && check_extension(":", line_number)) { + ctx->interpreter_state->requirement = NEED_TEXT_OR_COLON; + length = 1; + } else if (s[0] == '@' && check_extension("@", line_number)) { + ctx->interpreter_state->requirement = NEED_AT_OPERAND; + length = 1; + } else { + goto bad_syntax; + } + push_operator(ctx, argument, s, 2); + s = &s[length]; + } + + } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_SUFFIX_OR_END) { + ctx->interpreter_state->requirement = NEED_END; + if (s[0] == '[') { + goto index; + } else if (s[0] == '*' || s[0] == '@') { + push_operator(ctx, argument, s, 1); + s = &s[1]; + } else { + goto bad_syntax; + } + + } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_END) { + ctx->interpreter_state->requirement = NEED_END; + if (s[0] == '[') { + goto index; + } else { + goto bad_syntax; + } + + } else if (ctx->interpreter_state->requirement == NEED_OPERATOR_OR_END) { + if (s[0] == '[') + goto bad_syntax; + else + goto operator; + + } else if (ctx->interpreter_state->requirement == NEED_END) { + goto bad_syntax; + + } else if (ctx->interpreter_state->requirement == NEED_AT_OPERAND) { + if (*s == 'U' || *s == 'u' || *s == 'L' || *s == 'Q' || *s == 'E' || + *s == 'P' || *s == 'A' || *s == 'K' || *s == 'a') { + ctx->interpreter_state->requirement = NEED_END; + push_operator(ctx, argument, s, 1); + s = &s[1]; + } else { + goto bad_syntax; + } + + } else if (ctx->interpreter_state->requirement == NEED_TEXT_OR_SLASH) { + length = 0; + while (s[length] && s[length] != '/') + length += 1; + if (length) { + push_unquoted_segment(ctx, argument, s, length); + s = &s[length]; + } + if (s[0]) { + ctx->interpreter_state->requirement = NO_REQUIREMENT; + push_operator(ctx, argument, s, 1); + s = &s[1]; + } + + } else if (ctx->interpreter_state->requirement == NEED_TEXT_OR_COLON) { + length = 0; + while (s[length] && s[length] != ':') + length += 1; + if (length) { + push_unquoted_segment(ctx, argument, s, length); + s = &s[length]; + } + if (s[0]) { + ctx->interpreter_state->requirement = NO_REQUIREMENT; + push_operator(ctx, argument, s, 1); + s = &s[1]; + } + + } else { + push_unquoted_segment(ctx, argument, s, length); + } + } + free(argument->text); + free(argument); + } else { + if (ctx->interpreter_state->requirement != NO_REQUIREMENT && + ctx->interpreter_state->requirement != NEED_TEXT_OR_SLASH && + ctx->interpreter_state->requirement != NEED_TEXT_OR_COLON) { + goto bad_syntax; + } else if (argument->type == QUOTED) { + push_interpreted_argument(ctx, argument); + } else { + push_argument(ctx, &argument); + } + } + + return; + +bad_syntax: + eprintf("stray '%c' in bracketed variable substitution at line %zu\n", *s, line_number); + +#undef IS_SPECIAL_PARAMETER +} + + +void +interpret_and_eliminate(struct parser_context *ctx) +{ + size_t interpreted = 0, arg_i; + struct command *command; + struct argument *argument, *next_argument; + enum reserved_word reserved_word; + + if (ctx->here_document_stack && ctx->here_document_stack->first) { + ctx->here_document_stack->interpret_when_empty = 1; + return; + } + + for (; ctx->interpreter_offset < ctx->parser_state->ncommands; ctx->interpreter_offset++) { + command = ctx->parser_state->commands[ctx->interpreter_offset]; + argument = NULL; + + if (ctx->interpreter_state->dealing_with == TEXT_ROOT) { + ctx->interpreter_state->requirement = NEED_VALUE; + } else if (ctx->interpreter_state->dealing_with != FOR_STATEMENT && + ctx->interpreter_state->dealing_with != VARIABLE_SUBSTITUTION_BRACKET) { + ctx->interpreter_state->requirement = NEED_COMMAND; + } + + for (arg_i = 0; argument || arg_i < command->narguments; arg_i += !argument) { + if (!argument) + argument = command->arguments[arg_i]; + + /* TODO Implement alias substitution + * + * Unless a word was quoted/backslashed, it is subject + * to alias substitution if it is the first argument + * of a command (after any previous alias substitution) + * or if it immediately follows an alias substitution + * resulting in an unquoted whitespace at the end. + * However, if the word is a reserved word (which may + * indeed the name of an alias) it shall not be subject + * to alias substitution if it has meaning in the context + * it appears in (for example: alias while=x be expanded + * for followed by the expansion of alias echo='echo ' + * but not if it is the first word in a command). Creating + * aliases named after reserved words is stupid and we + * should only allow it in POSIX mode. + * + * (Alias substitution occurs before the grammar is + * interpreted, meaning definition an alias does not + * modify already declared function that use a command + * with the same name as the alias.) + * + * The result of alias substition is subject to + * alias substition, however (to avoid infinite loop), + * already expanded aliases shall not be recognised. + */ + + if (ctx->interpreter_state->requirement == NEED_COMMAND && + (reserved_word = get_reserved_word(argument))) { + switch (reserved_word) { + case BANG: + if (ctx->interpreter_state->disallow_bang) + stray_reserved_word(argument); + ctx->interpreter_state->disallow_bang = 1; + ctx->interpreter_state->have_bang = 1; + break; + + case OPEN_CURLY: + open_curly: + push_state(ctx, CURLY_NESTING, argument->line_number); + goto new_command; + + case CLOSE_CURLY: + if (ctx->interpreter_state->dealing_with != CURLY_NESTING) + stray_reserved_word(argument); + pop_state(ctx); + ctx->interpreter_state->requirement = NEED_COMMAND_END; + break; + + case CASE: /* (TODO) */ + eprintf("reserved word 'case' (at line %zu) has not been implemented yet\n", + argument->line_number); + /* NEWLINEs surrounding 'in' shall be ignored; ';' is not allowed */ + break; + + case DO: + if (ctx->interpreter_state->dealing_with != REPEAT_CONDITIONAL) + stray_reserved_word(argument); + pop_state(ctx); + do_keyword: + push_state(ctx, DO_CLAUSE, argument->line_number); + goto new_command; + + case DONE: + if (ctx->interpreter_state->dealing_with != DO_CLAUSE) + stray_reserved_word(argument); + pop_state(ctx); + pop_state(ctx); + ctx->interpreter_state->requirement = NEED_COMMAND_END; + break; + + case ELIF: + if (ctx->interpreter_state->dealing_with != IF_CLAUSE) + stray_reserved_word(argument); + pop_state(ctx); + push_state(ctx, IF_CONDITIONAL, argument->line_number); + goto new_command; + + case ELSE: + if (ctx->interpreter_state->dealing_with != IF_CLAUSE) + stray_reserved_word(argument); + pop_state(ctx); + push_state(ctx, ELSE_CLAUSE, argument->line_number); + goto new_command; + + case ESAC: + stray_reserved_word(argument); + break; + + case FI: + if (ctx->interpreter_state->dealing_with != IF_CLAUSE && + ctx->interpreter_state->dealing_with != ELSE_CLAUSE) + stray_reserved_word(argument); + pop_state(ctx); + pop_state(ctx); + ctx->interpreter_state->requirement = NEED_COMMAND_END; + break; + + case FOR: + push_state(ctx, FOR_STATEMENT, argument->line_number); + ctx->interpreter_state->requirement = NEED_VARIABLE_NAME; + free_text_argument(&argument); + ctx->interpreter_state->allow_newline = 1; + continue; + + case IF: + push_state(ctx, IF_STATEMENT, argument->line_number); + push_state(ctx, IF_CONDITIONAL, argument->line_number); + goto new_command; + + case IN: + stray_reserved_word(argument); + break; + + case THEN: + if (ctx->interpreter_state->dealing_with != IF_CONDITIONAL) + stray_reserved_word(argument); + pop_state(ctx); + push_state(ctx, IF_CLAUSE, argument->line_number); + goto new_command; + + case UNTIL: + push_state(ctx, UNTIL_STATEMENT, argument->line_number); + push_state(ctx, REPEAT_CONDITIONAL, argument->line_number); + goto new_command; + + case WHILE: + push_state(ctx, WHILE_STATEMENT, argument->line_number); + push_state(ctx, REPEAT_CONDITIONAL, argument->line_number); + goto new_command; + + default: + case NOT_A_RESERVED_WORD: + abort(); + } + + free_text_argument(&argument); + ctx->interpreter_state->allow_newline = 0; + continue; + + new_command: + ctx->interpreter_state->requirement = NEED_COMMAND; + free_text_argument(&argument); + ctx->interpreter_state->allow_newline = 1; + continue; + + } else if (ctx->interpreter_state->dealing_with == VARIABLE_SUBSTITUTION_BRACKET) { + push_variable_substitution_argument(ctx, command, &argument); + + } else if (argument->type == REDIRECTION) { + if (ctx->interpreter_state->dealing_with == FOR_STATEMENT) + stray_redirection(command, argument); + push_redirection(command, &argument); + if (ctx->interpreter_state->requirement != NEED_FUNCTION_BODY) + ctx->interpreter_state->requirement = NO_REQUIREMENT; /* e.g. "type == FUNCTION_MARK) { + if (ctx->interpreter_state->requirement == NEED_FUNCTION_BODY || + ctx->interpreter_state->requirement == NEED_COMMAND_END || + ctx->interpreter_state->narguments != 1 || + ctx->interpreter_state->dealing_with == FOR_STATEMENT) + eprintf("stray '()' at line %zu\n", argument->line_number); + + next_argument = argument->next_part; + argument->next_part = NULL; + push_argument(ctx, &argument); + + /* swap position of () and function name to make it easier to identify */ + argument = ctx->interpreter_state->arguments[0]; + ctx->interpreter_state->arguments[0] = ctx->interpreter_state->arguments[1]; + ctx->interpreter_state->arguments[1] = argument; + + argument = next_argument; + ctx->interpreter_state->requirement = NEED_FUNCTION_BODY; + ctx->interpreter_state->allow_newline = 1; + + } else if (ctx->interpreter_state->requirement == NEED_FUNCTION_BODY) { + reserved_word = get_reserved_word(argument); + if (reserved_word == OPEN_CURLY) { + goto open_curly; + } else if (argument->type == SUBSHELL) { + ctx->interpreter_state->requirement = NEED_COMMAND_END; + push_argument(ctx, &argument); + } else { + eprintf("required function body or redirection at line %zu;\n", argument->line_number); + } + ctx->interpreter_state->allow_newline = 0; + + } else if (ctx->interpreter_state->requirement == NEED_VARIABLE_NAME) { + if (ctx->interpreter_state->dealing_with == FOR_STATEMENT) { + if (argument->type != UNQUOTED) + eprintf("required variable name after 'for' at line %zu\n", argument->line_number); + validate_identifier_name(argument, "variable name", "for"); + argument->type = VARIABLE; + push_interpreted_argument(ctx, argument); + ctx->interpreter_state->requirement = NEED_IN_OR_DO; + ctx->interpreter_state->allow_newline = 1; + } else { + abort(); + } + + } else if (ctx->interpreter_state->requirement == NEED_DO) { + reserved_word = get_reserved_word(argument); + if (reserved_word != DO) + stray_reserved_word(argument); + goto do_keyword; + + } else if (ctx->interpreter_state->requirement == NEED_IN_OR_DO) { + reserved_word = get_reserved_word(argument); + if (reserved_word == DO) { + push_command(ctx, command); + goto do_keyword; + } else if (reserved_word == IN) { + ctx->interpreter_state->requirement = NEED_VALUE; + ctx->interpreter_state->allow_newline = 0; + } else { + stray_reserved_word(argument); + } + + } else { + if (ctx->interpreter_state->requirement == NEED_COMMAND_END) { + eprintf("required %s at line %zu after control statement\n", + "';', '&', '||', '&&', '|', '&|', '|&', '<>|', or redirection", + argument->line_number); + } + + if (ctx->interpreter_state->requirement != NEED_VALUE) + ctx->interpreter_state->requirement = NO_REQUIREMENT; + if (argument->type == SUBSHELL || argument->type == ARITHMETIC_SUBSHELL) + if (ctx->interpreter_state->narguments == 0) + ctx->interpreter_state->requirement = NEED_COMMAND_END; + + push_argument(ctx, &argument); + ctx->interpreter_state->allow_newline = 0; + } + } + + if (ctx->interpreter_state->dealing_with == TEXT_ROOT || + ctx->interpreter_state->dealing_with == VARIABLE_SUBSTITUTION_BRACKET) { + free(command->redirections); + free(command->arguments); + free(command); + continue; + } + + if (ctx->interpreter_state->allow_newline) { + ctx->interpreter_state->allow_newline = 0; + if (command->terminal == NEWLINE) { + free(command->redirections); + free(command->arguments); + free(command); + continue; + } + } + + if ((ctx->interpreter_state->requirement == NEED_COMMAND && command->narguments == arg_i) || + ctx->interpreter_state->requirement == NEED_FUNCTION_BODY || + ctx->interpreter_state->requirement == NEED_VARIABLE_NAME) + stray_command_terminal(command); + + if (ctx->interpreter_state->requirement == NEED_IN_OR_DO) { + ctx->interpreter_state->requirement = NEED_DO; + if (command->terminal != SEMICOLON && command->terminal != NEWLINE) + stray_command_terminal(command); + } + + push_command(ctx, command); + + if (command->terminal == SEMICOLON || + command->terminal == NEWLINE || + command->terminal == AMPERSAND) { + ctx->interpreter_state->disallow_bang = 0; + if (ctx->interpreter_state->dealing_with == MAIN_BODY) { + /* TODO execute and destroy queued up commands (also destroy list) */ + interpreted = ctx->interpreter_offset + 1; + } + } else if (command->terminal == DOUBLE_SEMICOLON) { + stray_command_terminal(command); + } else { + ctx->interpreter_state->disallow_bang = 1; + } + } + + memmove(&ctx->parser_state->commands[0], + &ctx->parser_state->commands[interpreted], + ctx->parser_state->ncommands - interpreted); + ctx->parser_state->ncommands -= interpreted; + ctx->interpreter_offset -= interpreted; + + if (!ctx->parser_state->ncommands) { + free(ctx->parser_state->commands); + ctx->parser_state->commands = NULL; + } +} diff --git a/parser.c b/parser.c index c3da716..957ca96 100644 --- a/parser.c +++ b/parser.c @@ -2,6 +2,46 @@ #include "common.h" +const char * +get_redirection_token(enum redirection_type type) +{ + switch (type) { + case REDIRECT_INPUT: + return "<"; + case REDIRECT_INPUT_TO_FD: + return "<&"; + case REDIRECT_OUTPUT: + return ">"; + case REDIRECT_OUTPUT_APPEND: + return ">>"; + case REDIRECT_OUTPUT_CLOBBER: + return ">|"; + case REDIRECT_OUTPUT_TO_FD: + return ">&"; + case REDIRECT_OUTPUT_AND_STDERR: + return "&>"; + case REDIRECT_OUTPUT_AND_STDERR_APPEND: + return "&>>"; + case REDIRECT_OUTPUT_AND_STDERR_CLOBBER: + return "&>|"; + case REDIRECT_OUTPUT_AND_STDERR_TO_FD: + return "&>&"; + case REDIRECT_INPUT_OUTPUT: + return "<>"; + case REDIRECT_INPUT_OUTPUT_TO_FD: + return "<>&"; + case HERE_STRING: + return "<<<"; + case HERE_DOCUMENT: + return "<<"; + case HERE_DOCUMENT_INDENTED: + return "<<-"; + default: + abort(); + } +} + + void push_end_of_file(struct parser_context *ctx) { @@ -44,6 +84,7 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal new_command = ecalloc(1, sizeof(*new_command)); ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command; new_command->terminal = terminal; + new_command->terminal_line_number = ctx->tokeniser_line_number; new_command->arguments = ctx->parser_state->arguments; new_command->narguments = ctx->parser_state->narguments; new_command->redirections = ctx->parser_state->redirections; @@ -53,22 +94,17 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal ctx->parser_state->redirections = NULL; ctx->parser_state->nredirections = 0; - if (!ctx->parser_state->parent) { - if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) { - /* TODO unless in a special construct such as while, case, for, if, or {, run and clear - * also require that any here-document is specified (count them and run when given); - * if terminal == AMPERSAND: perform parser_state->parent && !ctx->do_not_run) + if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND) + interpret_and_eliminate(ctx); } void -push_semicolon(struct parser_context *ctx, int maybe) +push_semicolon(struct parser_context *ctx, int actually_newline) { - if (!maybe || ctx->parser_state->narguments) - push_command_terminal(ctx, SEMICOLON); + if (!actually_newline || ctx->parser_state->narguments) + push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON); } @@ -81,7 +117,10 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type) new_part->type = type; new_part->line_number = ctx->tokeniser_line_number; - if (ctx->parser_state->current_argument_end) { + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + ctx->here_document_stack->first->argument_end->next_part = new_part; + ctx->here_document_stack->first->argument_end = new_part; + } else if (ctx->parser_state->current_argument_end) { ctx->parser_state->current_argument_end->next_part = new_part; ctx->parser_state->current_argument_end = new_part; } else { @@ -91,6 +130,49 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type) } +PURE_FUNC +static int +is_numeric_argument(struct argument *argument) +{ + char *p; + + do { + if (argument->type != UNQUOTED) + return 0; + + for (p = argument->text; *p; p++) + if (!isdigit(*p)) + return 0; + + } while ((argument = argument->next_part)); + + return 1; +} + + +PURE_FUNC +static int +is_variable_reference(struct argument *argument) +{ + char *p; + + if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$') + return 0; + + do { + if (argument->type != UNQUOTED) + return 0; + + for (p = argument->text; *p; p++) + if (!isalnum(*p) && *p != '_') + return p[0] == '$' && !p[1] && !argument->next_part; + + } while ((argument = argument->next_part)); + + return 0; +} + + static void push_redirection(struct parser_context *ctx, enum redirection_type type) { @@ -113,9 +195,19 @@ push_redirection(struct parser_context *ctx, enum redirection_type type) type == REDIRECT_OUTPUT_AND_STDERR || type == REDIRECT_OUTPUT_AND_STDERR_APPEND || type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER || - type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) { + type == REDIRECT_OUTPUT_AND_STDERR_TO_FD || + !is_numeric_argument(ctx->parser_state->current_argument)) { + if (is_variable_reference(ctx->parser_state->current_argument)) { + if (posix_mode) { + weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n", + get_redirection_token(type), ctx->tokeniser_line_number); + } else { + goto argument_is_left_hand_side; + } + } push_whitespace(ctx, 1); } else { + argument_is_left_hand_side: new_redirection->left_hand_side = ctx->parser_state->current_argument; } } @@ -130,8 +222,8 @@ push_redirection(struct parser_context *ctx, enum redirection_type type) new_here_document->redirection = new_redirection; new_here_document->argument = new_argument; new_here_document->next = NULL; - *ctx->here_documents_next = new_here_document; - ctx->here_documents_next = &new_here_document->next; + *ctx->here_document_stack->next = new_here_document; + ctx->here_document_stack->next = &new_here_document->next; } ctx->parser_state->need_right_hand_side = 1; @@ -159,39 +251,41 @@ size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len) { #define LIST_SYMBOLS(_)\ - _("<<<", push_redirection(ctx, HERE_STRING))\ - _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\ - _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\ - _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\ - _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\ - _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\ - _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\ - _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\ - _("()", push_function_mark(ctx))\ - _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\ - _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\ - _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\ - _("<<", push_redirection(ctx, HERE_DOCUMENT))\ - _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\ - _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\ - _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\ - _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\ - _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\ - _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\ - _("||", push_command_terminal(ctx, OR))\ - _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\ - _("&&", push_command_terminal(ctx, AND))\ - _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\ - _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\ - _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\ - _(";", push_semicolon(ctx, 0))\ - _("<", push_redirection(ctx, REDIRECT_INPUT))\ - _(">", push_redirection(ctx, REDIRECT_OUTPUT))\ - _("|", push_command_terminal(ctx, PIPE))\ - _("&", push_command_terminal(ctx, AMPERSAND)) - -#define X(SYMBOL, ACTION)\ - if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\ + _(0, "<<<", push_redirection(ctx, HERE_STRING))\ + _(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\ + _(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\ + _(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\ + _(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\ + _(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\ + _(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\ + _(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\ + _(1, "()", push_function_mark(ctx))\ + _(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\ + _(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\ + _(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\ + _(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\ + _(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\ + _(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\ + _(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\ + _(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\ + _(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\ + _(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\ + _(1, "||", push_command_terminal(ctx, OR))\ + _(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\ + _(1, "&&", push_command_terminal(ctx, AND))\ + _(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\ + _(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\ + _(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\ + _(1, ";", push_semicolon(ctx, 0))\ + _(1, "<", push_redirection(ctx, REDIRECT_INPUT))\ + _(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\ + _(1, "|", push_command_terminal(ctx, PIPE))\ + _(1, "&", push_command_terminal(ctx, AMPERSAND)) + +#define X(PORTABLE, SYMBOL, ACTION)\ + if (token_len >= sizeof(SYMBOL) - 1 &&\ + !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\ + (PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\ ACTION;\ return token_len;\ } @@ -208,13 +302,22 @@ push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument { struct argument *arg_part; - ctx->parser_state->need_right_hand_side = 0; + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + type = QUOTED; + if (ctx->here_document_stack->first->argument_end->type != type || + ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number) + push_new_argument_part(ctx, type); + arg_part = ctx->here_document_stack->first->argument_end; - if (!ctx->parser_state->current_argument_end || - ctx->parser_state->current_argument_end->type != type || - ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number) - push_new_argument_part(ctx, type); - arg_part = ctx->parser_state->current_argument_end; + } else { + ctx->parser_state->need_right_hand_side = 0; + + if (!ctx->parser_state->current_argument_end || + ctx->parser_state->current_argument_end->type != type || + ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number) + push_new_argument_part(ctx, type); + arg_part = ctx->parser_state->current_argument_end; + } arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1); memcpy(&arg_part->text[arg_part->length], text, text_len); @@ -230,11 +333,142 @@ push_quoted(struct parser_context *ctx, char *text, size_t text_len) } +static size_t +encode_utf8(char *buf, uint32_t value) +{ + size_t i, len; + + if (value <= 0x7F) { + buf[0] = (char)value; + return 1; + } + + if (value <= 0x000007FFUL) len = 2; + else if (value <= 0x0000FFFFUL) len = 3; + else if (value <= 0x001FFFFFUL) len = 4; + else if (value <= 0x03FFFFFFUL) len = 5; + else if (value <= 0x7FFFFFFFUL) len = 6; + else len = 7; + + for (i = len - 1; i; i--) { + buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80); + value >>= 6; + } + + buf[0] |= (char)(0xFF << (8 - len)); + + return len; +} + void push_escaped(struct parser_context *ctx, char *text, size_t text_len) { - /* TODO resolve backslashes in text */ - push_text(ctx, text, text_len, QUOTED); + uint32_t value; + size_t r, w, n; + for (r = w = 0; r < text_len;) { + if (text[r] == '\\' && r + 1 < text_len) { + if (text[r + 1] == 'a') { + text[w++] = '\a'; + r += 2; + } else if (text[r + 1] == 'b') { + text[w++] = '\b'; + r += 2; + } else if (text[r + 1] == 'e' || text[r + 1] == 'E') { + text[w++] = '\033'; + r += 2; + } else if (text[r + 1] == 'f') { + text[w++] = '\f'; + r += 2; + } else if (text[r + 1] == 'n') { + text[w++] = '\n'; + r += 2; + } else if (text[r + 1] == 'r') { + text[w++] = '\r'; + r += 2; + } else if (text[r + 1] == 't') { + text[w++] = '\t'; + r += 2; + } else if (text[r + 1] == 'v') { + text[w++] = '\v'; + r += 2; + } else if (text[r + 1] == '\\') { + text[w++] = '\\'; + r += 2; + } else if (text[r + 1] == '\'') { + text[w++] = '\''; + r += 2; + } else if (text[r + 1] == '"') { + text[w++] = '\"'; + r += 2; + } else if (text[r + 1] == '?') { + text[w++] = '?'; + r += 2; + } else if ('0' <= text[r + 1] && text[r + 1] <= '7') { + value = 0; + for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) { + if ((text[r] & 15) > 255 - (int)value) + break; + value *= 8; + value |= (uint32_t)(text[r] & 15); + } + if (value) { + text[w++] = (char)value; + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) { + value = 0; + for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + text[w++] = (char)value; + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) { + value = 0; + for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + w += encode_utf8(&text[w], value); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'U') { + value = 0; + for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + w += encode_utf8(&text[w], value); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'c' && text_len - r >= 3) { + if (text[r + 2] & (' ' - 1)) { + text[w++] = (char)(text[r + 2] & (' ' - 1)); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + r += 3; + } else { + text[w++] = text[r++]; + } + } else { + text[w++] = text[r++]; + } + } + push_text(ctx, text, w, QUOTED); } @@ -250,7 +484,9 @@ push_enter(struct parser_context *ctx, enum argument_type type) { struct parser_state *new_state; - ctx->parser_state->need_right_hand_side = 0; + if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE) + ctx->parser_state->need_right_hand_side = 0; + push_new_argument_part(ctx, type); new_state = ecalloc(1, sizeof(*new_state)); @@ -263,9 +499,48 @@ push_enter(struct parser_context *ctx, enum argument_type type) void push_leave(struct parser_context *ctx) { - if (ctx->mode_stack->mode == NORMAL_MODE) + struct parser_context subctx; + struct argument *argument; + char *code; + size_t code_length; + size_t parsed_length; + size_t arg_i; + + if (ctx->mode_stack->mode == NORMAL_MODE) { push_semicolon(ctx, 1); - /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */ - /* TODO validate subshell content */ + + } else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) { + initialise_parser_context(&subctx, 1, 1); + subctx.do_not_run = 1; + subctx.end_of_file_reached = 1; + code = NULL; + code_length = 0; + for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) { + argument = ctx->parser_state->arguments[arg_i]; + code = erealloc(code, code_length + argument->length); + memcpy(&code[code_length], argument->text, argument->length); + code_length += argument->length; + } + code = erealloc(code, code_length + 1); + code[code_length] = '\0'; + parsed_length = parse_preparsed(&subctx, code, code_length); + if (parsed_length < code_length || subctx.premature_end_of_file) { + eprintf("premature end of file backquote expression at line %zu\n", + ctx->parser_state->parent->current_argument_end->line_number); + } + free(code); + free(subctx.here_document_stack); + free(subctx.interpreter_state); + ctx->parser_state->parent->current_argument_end->child = subctx.parser_state; + + } else { + /* In quote modes we want everything in a dummy command + * to simplify the implementation of the interpreter. + * The command termination used here doesn't matter, + * neither does the line nummer (for it), the interpreter + * will only look at the argument list. */ + push_command_terminal(ctx, NEWLINE); + } + ctx->parser_state = ctx->parser_state->parent; } diff --git a/preparser.c b/preparser.c index 840209f..9ab8432 100644 --- a/preparser.c +++ b/preparser.c @@ -5,7 +5,7 @@ size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp) { - int end_of_file_reached; + char end_of_file_reached; size_t bytes_parsed = 0; end_of_file_reached = ctx->end_of_file_reached; diff --git a/regular_builtins.c b/regular_builtins.c new file mode 100644 index 0000000..5cdb4fe --- /dev/null +++ b/regular_builtins.c @@ -0,0 +1,67 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +true_main(int argc, char **argv) +{ + (void) argc; + (void) argv; + return 0; +} + + +int +false_main(int argc, char **argv) +{ + (void) argc; + (void) argv; + return 1; +} + + +BUILTIN_USAGE(pwd_usage, "[-L | -P]") +int +pwd_main(int argc, char **argv) +{ + void (*usage)(void) = pwd_usage; + int physical = 0; + char *cwd = NULL; + size_t size = 64 / 2; + const char *pwd; + struct stat cst, pst; + + ARGBEGIN { + case 'L': + physical = 0; + break; + case 'P': + physical = 1; + break; + default: + usage(); + } ARGEND; + + if (argc) + weprintf("ignoring operands"); /* other implementations either warn or are silent, they don't fail */ + + for (;;) { + cwd = erealloc(cwd, size *= 2); + if (getcwd(cwd, size)) + break; + if (errno != ERANGE) + eprintf("getcwd %zu:", size); + } + + if (physical || !(pwd = getenv("PWD")) || *pwd != '/' || stat(pwd, &pst) || stat(cwd, &cst)) + puts(cwd); + else if (pst.st_dev == cst.st_dev && pst.st_ino == cst.st_ino) + puts(pwd); + else + puts(cwd); + + free(cwd); + if (fflush(stdout) || ferror(stdout)) + weprintf("fflush :"); + return 0; +} diff --git a/special_builtins.c b/special_builtins.c new file mode 100644 index 0000000..20e37ae --- /dev/null +++ b/special_builtins.c @@ -0,0 +1,11 @@ +/* See LICENSE file for copyright and license details. */ +#include "common.h" + + +int +colon_main(int argc, char **argv) +{ + (void) argc; + (void) argv; + return 0; +} diff --git a/tokeniser.c b/tokeniser.c index 63ff2fd..606726b 100644 --- a/tokeniser.c +++ b/tokeniser.c @@ -5,20 +5,169 @@ void push_mode(struct parser_context *ctx, enum tokeniser_mode mode) { - struct mode_stack *new = emalloc(sizeof(*new)); - new->mode = mode; - new->she_is_comment = 1; - new->previous = ctx->mode_stack; - ctx->mode_stack = new; + struct mode_stack *new_mode_stack; + struct here_document_stack *new_here_document_stack; + + if (mode == BQ_QUOTE_MODE) + weprintf("backquote expression found at line %zu, stop it!\n", ctx->tokeniser_line_number); + + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + new_here_document_stack = ecalloc(1, sizeof(*new_here_document_stack)); + new_here_document_stack->next = &new_here_document_stack->first; + new_here_document_stack->previous = ctx->here_document_stack; + ctx->here_document_stack = new_here_document_stack; + } + + new_mode_stack = emalloc(sizeof(*new_mode_stack)); + new_mode_stack->mode = mode; + new_mode_stack->she_is_comment = 1; + new_mode_stack->previous = ctx->mode_stack; + ctx->mode_stack = new_mode_stack; } void pop_mode(struct parser_context *ctx) { - struct mode_stack *old = ctx->mode_stack; + struct mode_stack *old_mode_stack; + struct here_document_stack *old_here_document_stack; + struct here_document_stack *prev_here_document_stack; + + old_mode_stack = ctx->mode_stack; ctx->mode_stack = ctx->mode_stack->previous; - free(old); + free(old_mode_stack); + + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + if (ctx->here_document_stack->first) { + if (posix_mode) { + eprintf("subshell expression closed at line %zu before here-documents, " + "this is non-portable\n", ctx->tokeniser_line_number); + } + prev_here_document_stack = ctx->here_document_stack->previous; + *ctx->here_document_stack->next = prev_here_document_stack->first; + ctx->here_document_stack->next = prev_here_document_stack->next; + ctx->here_document_stack->previous = prev_here_document_stack->previous; + ctx->here_document_stack->interpret_when_empty = prev_here_document_stack->interpret_when_empty; + free(prev_here_document_stack); + } else { + old_here_document_stack = ctx->here_document_stack; + ctx->here_document_stack = old_here_document_stack->previous; + free(old_here_document_stack); + } + } +} + + +static void +append_and_destroy_quote_to_here_document_terminator(struct here_document *here_document, struct parser_state *quote) +{ + struct argument *terminator, *part, *next_part; + size_t i; + + terminator = here_document->argument->next_part; + + for (i = 0; i < quote->narguments; i++) { + for (part = quote->arguments[i]; part; part = next_part) { + next_part = part->next_part; + if (part->type != QUOTED && part->type != UNQUOTED) { + eprintf("use of run-time evaluated expression as right-hand side " + "of %s operator (at line %zu) is illegal\n", + here_document->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + here_document->argument->line_number); + } + terminator->text = erealloc(terminator->text, terminator->length + part->length + 1); + memcpy(&terminator->text[terminator->length], part->text, part->length); + terminator->length += part->length; + terminator->text[terminator->length] = '\0'; + free(part->text); + free(part); + } + } + + free(quote->arguments); +} + +static void +get_here_document_terminator(struct parser_context *ctx) +{ + struct argument *terminator, *next_part; + struct parser_state *child; + + terminator = ctx->here_document_stack->first->argument->next_part; + if (!terminator || (terminator->type != QUOTED && terminator->type != UNQUOTED && terminator->type != QUOTE_EXPRESSION)) { + eprintf("missing right-hand side of %s operator at line %zu\n", + ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + ctx->here_document_stack->first->argument->line_number); + } else if (terminator->type == QUOTE_EXPRESSION) { + child = terminator->child; + terminator->type = QUOTED; + terminator->text = ecalloc(1, 1); + terminator->length = 0; + append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, child); + free(child); + } + + while ((next_part = terminator->next_part)) { + switch (next_part->type) { + case QUOTED: + terminator->type = QUOTED; + /* fall through */ + case UNQUOTED: + terminator->text = erealloc(terminator->text, terminator->length + next_part->length + 1); + memcpy(&terminator->text[terminator->length], next_part->text, next_part->length); + terminator->length += next_part->length; + terminator->text[terminator->length] = '\0'; + free(next_part->text); + break; + + case QUOTE_EXPRESSION: + terminator->type = QUOTED; + append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, next_part->child); + free(next_part->child); + break; + + case BACKQUOTE_EXPRESSION: + case ARITHMETIC_EXPRESSION: + case VARIABLE_SUBSTITUTION: + case SUBSHELL_SUBSTITUTION: + case PROCESS_SUBSTITUTION_INPUT: + case PROCESS_SUBSTITUTION_OUTPUT: + case PROCESS_SUBSTITUTION_INPUT_OUTPUT: + eprintf("use of run-time evaluated expression as right-hand side of %s operator (at line %zu) is illegal\n", + ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + ctx->here_document_stack->first->argument->line_number); + return; + + case REDIRECTION: + case FUNCTION_MARK: + case SUBSHELL: + case ARITHMETIC_SUBSHELL: + /* interpreter shall recognise these as new "arguments" */ + return; + + default: + case COMMAND: /* used by interpreter */ + case VARIABLE: /* ditto */ + abort(); + } + + if (ctx->parser_state->current_argument_end == next_part) + ctx->parser_state->current_argument_end = terminator; + terminator->next_part = next_part->next_part; + free(next_part); + } +} + + +int +check_extension(const char *token, size_t line_number) +{ + if (!posix_mode) { + return 1; + } else { + weprintf("the '%s' token (at line %zu) is not portable, not parsing as it\n", token, line_number); + return 0; + } } @@ -30,6 +179,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) size_t bytes_read = 0; size_t token_len; + struct here_document *here_document; + struct here_document_stack *here_doc_stack; for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) { switch (ctx->mode_stack->mode) { @@ -44,8 +195,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_whitespace(ctx, 0); push_semicolon(ctx, 1); ctx->tokeniser_line_number += 1; - if (ctx->here_documents_first) - push_mode(ctx, HERE_DOCUMENT_MODE); + if (ctx->here_document_stack->first) + push_mode(ctx, HERE_DOCUMENT_MODE_INITIALISATION); } else if (isspace(*code)) { ctx->mode_stack->she_is_comment = 1; @@ -129,7 +280,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_enter(ctx, SUBSHELL_SUBSTITUTION); } - } else if (code[1] == '[') { + } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) { token_len = 2; push_mode(ctx, SB_QUOTE_MODE); push_enter(ctx, ARITHMETIC_EXPRESSION); @@ -139,7 +290,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_mode(ctx, CB_QUOTE_MODE); push_enter(ctx, VARIABLE_SUBSTITUTION); - } else if (code[1] == '\'') { + } else if (code[1] == '\'' && check_extension("$'", ctx->tokeniser_line_number)) { for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) { if (code[token_len] == '\\') { if (token_len + 1 == code_len - bytes_read) { @@ -186,10 +337,87 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) break; + case HERE_DOCUMENT_MODE_INITIALISATION: + here_doc_stack = ctx->here_document_stack; + here_doc_stack->indented = 0; + if (here_doc_stack->first->redirection->type == HERE_DOCUMENT_INDENTED) + here_doc_stack->indented = 1; + get_here_document_terminator(ctx); + here_doc_stack->verbatim = 0; + if (here_doc_stack->first->argument->next_part->type == QUOTED) + here_doc_stack->verbatim = 1; + here_doc_stack->first->terminator = here_doc_stack->first->argument->next_part->text; + here_doc_stack->first->terminator_length = here_doc_stack->first->argument->next_part->length; + here_doc_stack->first->argument->next_part->text = ecalloc(1, 1); + here_doc_stack->first->argument->next_part->length = 0; + here_doc_stack->first->argument->next_part->type = QUOTED; + here_doc_stack->first->argument_end = here_doc_stack->first->argument->next_part; + ctx->mode_stack->mode = HERE_DOCUMENT_MODE; + /* fall through */ + case HERE_DOCUMENT_MODE: - /* TODO read until terminator, remove all (including on the - * line of the terminator) if <<- and then if terminator was - * unquoted, parse in " "-mode but accept " */ + here_doc_stack = ctx->here_document_stack; + if (*code == '\t' && here_doc_stack->indented) { + token_len = 1; + } else { + token_len = here_doc_stack->line_offset; + for (; token_len < code_len - bytes_read; token_len += 1) { + if (code[token_len] == '\n') { + goto here_document_line_end; + } else if (!here_doc_stack->verbatim) { + if (code[token_len] == '\\') { + if (token_len + 1 == code_len - bytes_read) { + goto need_more; + } else if (code[token_len + 1] == '$' || code[token_len + 1] == '`') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + push_quoted(ctx, &code[token_len + 1], 1); + goto next; + } + token_len += 1; + } else if (code[token_len] == '$') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + bytes_read += token_len; + code = &code[token_len]; + goto quote_mode_dollar_mode; + } else if (code[token_len] == '`') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + push_mode(ctx, BQ_QUOTE_MODE); + push_enter(ctx, BACKQUOTE_EXPRESSION); + goto next; + } + } + } + goto need_more; + + here_document_line_end: + token_len += 1; + ctx->tokeniser_line_number += 1; + here_doc_stack->line_offset = 0; + here_document = here_doc_stack->first; + + if (token_len - 1 == here_document->terminator_length && + !strncmp(code, here_document->terminator, token_len - 1)) { + here_document->redirection->type = HERE_STRING; + here_doc_stack->first = here_document->next; + free(here_document->terminator); + free(here_document); + if (here_doc_stack->first) { + ctx->mode_stack->mode = HERE_DOCUMENT_MODE_INITIALISATION; + } else { + here_doc_stack->next = &here_doc_stack->first; + pop_mode(ctx); + if (here_doc_stack->interpret_when_empty) { + here_doc_stack->interpret_when_empty = 0; + interpret_and_eliminate(ctx); + } + } + } else { + push_quoted(ctx, code, token_len); + } + } break; @@ -197,6 +425,14 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) if (*code == '\\') { if (code_len - bytes_read < 2) { goto need_more; + } else if (code[1] == '\\' || code[1] == '`' || code[1] == '$') { + token_len = 2; + push_unquoted(ctx, &code[1], 1); + if (code[1] == '$') { + weprintf("meaningless \\ found before $ inside backquote expression at line " + "%zu, perhaps you mean to use \\\\$ instead to get a literal $\n", + ctx->tokeniser_line_number); + } } else { token_len = 2; push_unquoted(ctx, code, 2); @@ -284,6 +520,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) } } else if (*code == '$') { + quote_mode_dollar_mode: if (code_len - bytes_read < 2) { if (ctx->end_of_file_reached) { token_len = 1; @@ -307,7 +544,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_enter(ctx, SUBSHELL_SUBSTITUTION); } - } else if (code[1] == '[') { + } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) { token_len = 2; push_mode(ctx, SB_QUOTE_MODE); push_enter(ctx, ARITHMETIC_EXPRESSION); @@ -403,6 +640,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) abort(); } + next: if (ctx->line_continuations) { ctx->tokeniser_line_number += ctx->line_continuations; ctx->line_continuations = 0; -- cgit v1.2.3-70-g09d2