diff options
author | Mattias Andrée <maandree@kth.se> | 2021-07-13 02:44:18 +0200 |
---|---|---|
committer | Mattias Andrée <maandree@kth.se> | 2021-07-13 02:44:18 +0200 |
commit | bc9033fdf30424c34008e651fdbbba5da8c8fc40 (patch) | |
tree | 995bc6bbd067cf6bebe1a6e6f74e210b11df1a8a /parser.c | |
parent | Second commit (diff) | |
download | apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.gz apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.bz2 apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.xz |
Third commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'parser.c')
-rw-r--r-- | parser.c | 395 |
1 files changed, 335 insertions, 60 deletions
@@ -2,6 +2,46 @@ #include "common.h" +const char * +get_redirection_token(enum redirection_type type) +{ + switch (type) { + case REDIRECT_INPUT: + return "<"; + case REDIRECT_INPUT_TO_FD: + return "<&"; + case REDIRECT_OUTPUT: + return ">"; + case REDIRECT_OUTPUT_APPEND: + return ">>"; + case REDIRECT_OUTPUT_CLOBBER: + return ">|"; + case REDIRECT_OUTPUT_TO_FD: + return ">&"; + case REDIRECT_OUTPUT_AND_STDERR: + return "&>"; + case REDIRECT_OUTPUT_AND_STDERR_APPEND: + return "&>>"; + case REDIRECT_OUTPUT_AND_STDERR_CLOBBER: + return "&>|"; + case REDIRECT_OUTPUT_AND_STDERR_TO_FD: + return "&>&"; + case REDIRECT_INPUT_OUTPUT: + return "<>"; + case REDIRECT_INPUT_OUTPUT_TO_FD: + return "<>&"; + case HERE_STRING: + return "<<<"; + case HERE_DOCUMENT: + return "<<"; + case HERE_DOCUMENT_INDENTED: + return "<<-"; + default: + abort(); + } +} + + void push_end_of_file(struct parser_context *ctx) { @@ -44,6 +84,7 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal new_command = ecalloc(1, sizeof(*new_command)); ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command; new_command->terminal = terminal; + new_command->terminal_line_number = ctx->tokeniser_line_number; new_command->arguments = ctx->parser_state->arguments; new_command->narguments = ctx->parser_state->narguments; new_command->redirections = ctx->parser_state->redirections; @@ -53,22 +94,17 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal ctx->parser_state->redirections = NULL; ctx->parser_state->nredirections = 0; - if (!ctx->parser_state->parent) { - if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) { - /* TODO unless in a special construct such as while, case, for, if, or {, run and clear - * also require that any here-document is specified (count them and run when given); - * if terminal == AMPERSAND: perform </dev/null first, and reset exist status to 0 - */ - } - } + if (!ctx->parser_state->parent && !ctx->do_not_run) + if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND) + interpret_and_eliminate(ctx); } void -push_semicolon(struct parser_context *ctx, int maybe) +push_semicolon(struct parser_context *ctx, int actually_newline) { - if (!maybe || ctx->parser_state->narguments) - push_command_terminal(ctx, SEMICOLON); + if (!actually_newline || ctx->parser_state->narguments) + push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON); } @@ -81,7 +117,10 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type) new_part->type = type; new_part->line_number = ctx->tokeniser_line_number; - if (ctx->parser_state->current_argument_end) { + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + ctx->here_document_stack->first->argument_end->next_part = new_part; + ctx->here_document_stack->first->argument_end = new_part; + } else if (ctx->parser_state->current_argument_end) { ctx->parser_state->current_argument_end->next_part = new_part; ctx->parser_state->current_argument_end = new_part; } else { @@ -91,6 +130,49 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type) } +PURE_FUNC +static int +is_numeric_argument(struct argument *argument) +{ + char *p; + + do { + if (argument->type != UNQUOTED) + return 0; + + for (p = argument->text; *p; p++) + if (!isdigit(*p)) + return 0; + + } while ((argument = argument->next_part)); + + return 1; +} + + +PURE_FUNC +static int +is_variable_reference(struct argument *argument) +{ + char *p; + + if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$') + return 0; + + do { + if (argument->type != UNQUOTED) + return 0; + + for (p = argument->text; *p; p++) + if (!isalnum(*p) && *p != '_') + return p[0] == '$' && !p[1] && !argument->next_part; + + } while ((argument = argument->next_part)); + + return 0; +} + + static void push_redirection(struct parser_context *ctx, enum redirection_type type) { @@ -113,9 +195,19 @@ push_redirection(struct parser_context *ctx, enum redirection_type type) type == REDIRECT_OUTPUT_AND_STDERR || type == REDIRECT_OUTPUT_AND_STDERR_APPEND || type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER || - type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) { + type == REDIRECT_OUTPUT_AND_STDERR_TO_FD || + !is_numeric_argument(ctx->parser_state->current_argument)) { + if (is_variable_reference(ctx->parser_state->current_argument)) { + if (posix_mode) { + weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n", + get_redirection_token(type), ctx->tokeniser_line_number); + } else { + goto argument_is_left_hand_side; + } + } push_whitespace(ctx, 1); } else { + argument_is_left_hand_side: new_redirection->left_hand_side = ctx->parser_state->current_argument; } } @@ -130,8 +222,8 @@ push_redirection(struct parser_context *ctx, enum redirection_type type) new_here_document->redirection = new_redirection; new_here_document->argument = new_argument; new_here_document->next = NULL; - *ctx->here_documents_next = new_here_document; - ctx->here_documents_next = &new_here_document->next; + *ctx->here_document_stack->next = new_here_document; + ctx->here_document_stack->next = &new_here_document->next; } ctx->parser_state->need_right_hand_side = 1; @@ -159,39 +251,41 @@ size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len) { #define LIST_SYMBOLS(_)\ - _("<<<", push_redirection(ctx, HERE_STRING))\ - _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\ - _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\ - _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\ - _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\ - _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\ - _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\ - _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\ - _("()", push_function_mark(ctx))\ - _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\ - _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\ - _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\ - _("<<", push_redirection(ctx, HERE_DOCUMENT))\ - _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\ - _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\ - _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\ - _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\ - _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\ - _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\ - _("||", push_command_terminal(ctx, OR))\ - _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\ - _("&&", push_command_terminal(ctx, AND))\ - _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\ - _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\ - _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\ - _(";", push_semicolon(ctx, 0))\ - _("<", push_redirection(ctx, REDIRECT_INPUT))\ - _(">", push_redirection(ctx, REDIRECT_OUTPUT))\ - _("|", push_command_terminal(ctx, PIPE))\ - _("&", push_command_terminal(ctx, AMPERSAND)) - -#define X(SYMBOL, ACTION)\ - if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\ + _(0, "<<<", push_redirection(ctx, HERE_STRING))\ + _(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\ + _(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\ + _(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\ + _(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\ + _(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\ + _(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\ + _(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\ + _(1, "()", push_function_mark(ctx))\ + _(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\ + _(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\ + _(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\ + _(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\ + _(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\ + _(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\ + _(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\ + _(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\ + _(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\ + _(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\ + _(1, "||", push_command_terminal(ctx, OR))\ + _(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\ + _(1, "&&", push_command_terminal(ctx, AND))\ + _(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\ + _(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\ + _(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\ + _(1, ";", push_semicolon(ctx, 0))\ + _(1, "<", push_redirection(ctx, REDIRECT_INPUT))\ + _(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\ + _(1, "|", push_command_terminal(ctx, PIPE))\ + _(1, "&", push_command_terminal(ctx, AMPERSAND)) + +#define X(PORTABLE, SYMBOL, ACTION)\ + if (token_len >= sizeof(SYMBOL) - 1 &&\ + !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\ + (PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\ ACTION;\ return token_len;\ } @@ -208,13 +302,22 @@ push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument { struct argument *arg_part; - ctx->parser_state->need_right_hand_side = 0; + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + type = QUOTED; + if (ctx->here_document_stack->first->argument_end->type != type || + ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number) + push_new_argument_part(ctx, type); + arg_part = ctx->here_document_stack->first->argument_end; - if (!ctx->parser_state->current_argument_end || - ctx->parser_state->current_argument_end->type != type || - ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number) - push_new_argument_part(ctx, type); - arg_part = ctx->parser_state->current_argument_end; + } else { + ctx->parser_state->need_right_hand_side = 0; + + if (!ctx->parser_state->current_argument_end || + ctx->parser_state->current_argument_end->type != type || + ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number) + push_new_argument_part(ctx, type); + arg_part = ctx->parser_state->current_argument_end; + } arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1); memcpy(&arg_part->text[arg_part->length], text, text_len); @@ -230,11 +333,142 @@ push_quoted(struct parser_context *ctx, char *text, size_t text_len) } +static size_t +encode_utf8(char *buf, uint32_t value) +{ + size_t i, len; + + if (value <= 0x7F) { + buf[0] = (char)value; + return 1; + } + + if (value <= 0x000007FFUL) len = 2; + else if (value <= 0x0000FFFFUL) len = 3; + else if (value <= 0x001FFFFFUL) len = 4; + else if (value <= 0x03FFFFFFUL) len = 5; + else if (value <= 0x7FFFFFFFUL) len = 6; + else len = 7; + + for (i = len - 1; i; i--) { + buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80); + value >>= 6; + } + + buf[0] |= (char)(0xFF << (8 - len)); + + return len; +} + void push_escaped(struct parser_context *ctx, char *text, size_t text_len) { - /* TODO resolve backslashes in text */ - push_text(ctx, text, text_len, QUOTED); + uint32_t value; + size_t r, w, n; + for (r = w = 0; r < text_len;) { + if (text[r] == '\\' && r + 1 < text_len) { + if (text[r + 1] == 'a') { + text[w++] = '\a'; + r += 2; + } else if (text[r + 1] == 'b') { + text[w++] = '\b'; + r += 2; + } else if (text[r + 1] == 'e' || text[r + 1] == 'E') { + text[w++] = '\033'; + r += 2; + } else if (text[r + 1] == 'f') { + text[w++] = '\f'; + r += 2; + } else if (text[r + 1] == 'n') { + text[w++] = '\n'; + r += 2; + } else if (text[r + 1] == 'r') { + text[w++] = '\r'; + r += 2; + } else if (text[r + 1] == 't') { + text[w++] = '\t'; + r += 2; + } else if (text[r + 1] == 'v') { + text[w++] = '\v'; + r += 2; + } else if (text[r + 1] == '\\') { + text[w++] = '\\'; + r += 2; + } else if (text[r + 1] == '\'') { + text[w++] = '\''; + r += 2; + } else if (text[r + 1] == '"') { + text[w++] = '\"'; + r += 2; + } else if (text[r + 1] == '?') { + text[w++] = '?'; + r += 2; + } else if ('0' <= text[r + 1] && text[r + 1] <= '7') { + value = 0; + for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) { + if ((text[r] & 15) > 255 - (int)value) + break; + value *= 8; + value |= (uint32_t)(text[r] & 15); + } + if (value) { + text[w++] = (char)value; + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) { + value = 0; + for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + text[w++] = (char)value; + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) { + value = 0; + for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + w += encode_utf8(&text[w], value); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'U') { + value = 0; + for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) { + value *= 16; + value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15)); + } + if (value) { + w += encode_utf8(&text[w], value); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + } else if (text[r + 1] == 'c' && text_len - r >= 3) { + if (text[r + 2] & (' ' - 1)) { + text[w++] = (char)(text[r + 2] & (' ' - 1)); + } else { + weprintf("ignoring NUL byte result from $''-expression at line %zu\n", + ctx->tokeniser_line_number); + } + r += 3; + } else { + text[w++] = text[r++]; + } + } else { + text[w++] = text[r++]; + } + } + push_text(ctx, text, w, QUOTED); } @@ -250,7 +484,9 @@ push_enter(struct parser_context *ctx, enum argument_type type) { struct parser_state *new_state; - ctx->parser_state->need_right_hand_side = 0; + if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE) + ctx->parser_state->need_right_hand_side = 0; + push_new_argument_part(ctx, type); new_state = ecalloc(1, sizeof(*new_state)); @@ -263,9 +499,48 @@ push_enter(struct parser_context *ctx, enum argument_type type) void push_leave(struct parser_context *ctx) { - if (ctx->mode_stack->mode == NORMAL_MODE) + struct parser_context subctx; + struct argument *argument; + char *code; + size_t code_length; + size_t parsed_length; + size_t arg_i; + + if (ctx->mode_stack->mode == NORMAL_MODE) { push_semicolon(ctx, 1); - /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */ - /* TODO validate subshell content */ + + } else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) { + initialise_parser_context(&subctx, 1, 1); + subctx.do_not_run = 1; + subctx.end_of_file_reached = 1; + code = NULL; + code_length = 0; + for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) { + argument = ctx->parser_state->arguments[arg_i]; + code = erealloc(code, code_length + argument->length); + memcpy(&code[code_length], argument->text, argument->length); + code_length += argument->length; + } + code = erealloc(code, code_length + 1); + code[code_length] = '\0'; + parsed_length = parse_preparsed(&subctx, code, code_length); + if (parsed_length < code_length || subctx.premature_end_of_file) { + eprintf("premature end of file backquote expression at line %zu\n", + ctx->parser_state->parent->current_argument_end->line_number); + } + free(code); + free(subctx.here_document_stack); + free(subctx.interpreter_state); + ctx->parser_state->parent->current_argument_end->child = subctx.parser_state; + + } else { + /* In quote modes we want everything in a dummy command + * to simplify the implementation of the interpreter. + * The command termination used here doesn't matter, + * neither does the line nummer (for it), the interpreter + * will only look at the argument list. */ + push_command_terminal(ctx, NEWLINE); + } + ctx->parser_state = ctx->parser_state->parent; } |