diff options
author | Mattias Andrée <maandree@kth.se> | 2021-07-13 02:44:18 +0200 |
---|---|---|
committer | Mattias Andrée <maandree@kth.se> | 2021-07-13 02:44:18 +0200 |
commit | bc9033fdf30424c34008e651fdbbba5da8c8fc40 (patch) | |
tree | 995bc6bbd067cf6bebe1a6e6f74e210b11df1a8a /tokeniser.c | |
parent | Second commit (diff) | |
download | apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.gz apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.bz2 apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.xz |
Third commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
-rw-r--r-- | tokeniser.c | 268 |
1 files changed, 253 insertions, 15 deletions
diff --git a/tokeniser.c b/tokeniser.c index 63ff2fd..606726b 100644 --- a/tokeniser.c +++ b/tokeniser.c @@ -5,20 +5,169 @@ void push_mode(struct parser_context *ctx, enum tokeniser_mode mode) { - struct mode_stack *new = emalloc(sizeof(*new)); - new->mode = mode; - new->she_is_comment = 1; - new->previous = ctx->mode_stack; - ctx->mode_stack = new; + struct mode_stack *new_mode_stack; + struct here_document_stack *new_here_document_stack; + + if (mode == BQ_QUOTE_MODE) + weprintf("backquote expression found at line %zu, stop it!\n", ctx->tokeniser_line_number); + + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + new_here_document_stack = ecalloc(1, sizeof(*new_here_document_stack)); + new_here_document_stack->next = &new_here_document_stack->first; + new_here_document_stack->previous = ctx->here_document_stack; + ctx->here_document_stack = new_here_document_stack; + } + + new_mode_stack = emalloc(sizeof(*new_mode_stack)); + new_mode_stack->mode = mode; + new_mode_stack->she_is_comment = 1; + new_mode_stack->previous = ctx->mode_stack; + ctx->mode_stack = new_mode_stack; } void pop_mode(struct parser_context *ctx) { - struct mode_stack *old = ctx->mode_stack; + struct mode_stack *old_mode_stack; + struct here_document_stack *old_here_document_stack; + struct here_document_stack *prev_here_document_stack; + + old_mode_stack = ctx->mode_stack; ctx->mode_stack = ctx->mode_stack->previous; - free(old); + free(old_mode_stack); + + if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) { + if (ctx->here_document_stack->first) { + if (posix_mode) { + eprintf("subshell expression closed at line %zu before here-documents, " + "this is non-portable\n", ctx->tokeniser_line_number); + } + prev_here_document_stack = ctx->here_document_stack->previous; + *ctx->here_document_stack->next = prev_here_document_stack->first; + ctx->here_document_stack->next = prev_here_document_stack->next; + ctx->here_document_stack->previous = prev_here_document_stack->previous; + ctx->here_document_stack->interpret_when_empty = prev_here_document_stack->interpret_when_empty; + free(prev_here_document_stack); + } else { + old_here_document_stack = ctx->here_document_stack; + ctx->here_document_stack = old_here_document_stack->previous; + free(old_here_document_stack); + } + } +} + + +static void +append_and_destroy_quote_to_here_document_terminator(struct here_document *here_document, struct parser_state *quote) +{ + struct argument *terminator, *part, *next_part; + size_t i; + + terminator = here_document->argument->next_part; + + for (i = 0; i < quote->narguments; i++) { + for (part = quote->arguments[i]; part; part = next_part) { + next_part = part->next_part; + if (part->type != QUOTED && part->type != UNQUOTED) { + eprintf("use of run-time evaluated expression as right-hand side " + "of %s operator (at line %zu) is illegal\n", + here_document->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + here_document->argument->line_number); + } + terminator->text = erealloc(terminator->text, terminator->length + part->length + 1); + memcpy(&terminator->text[terminator->length], part->text, part->length); + terminator->length += part->length; + terminator->text[terminator->length] = '\0'; + free(part->text); + free(part); + } + } + + free(quote->arguments); +} + +static void +get_here_document_terminator(struct parser_context *ctx) +{ + struct argument *terminator, *next_part; + struct parser_state *child; + + terminator = ctx->here_document_stack->first->argument->next_part; + if (!terminator || (terminator->type != QUOTED && terminator->type != UNQUOTED && terminator->type != QUOTE_EXPRESSION)) { + eprintf("missing right-hand side of %s operator at line %zu\n", + ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + ctx->here_document_stack->first->argument->line_number); + } else if (terminator->type == QUOTE_EXPRESSION) { + child = terminator->child; + terminator->type = QUOTED; + terminator->text = ecalloc(1, 1); + terminator->length = 0; + append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, child); + free(child); + } + + while ((next_part = terminator->next_part)) { + switch (next_part->type) { + case QUOTED: + terminator->type = QUOTED; + /* fall through */ + case UNQUOTED: + terminator->text = erealloc(terminator->text, terminator->length + next_part->length + 1); + memcpy(&terminator->text[terminator->length], next_part->text, next_part->length); + terminator->length += next_part->length; + terminator->text[terminator->length] = '\0'; + free(next_part->text); + break; + + case QUOTE_EXPRESSION: + terminator->type = QUOTED; + append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, next_part->child); + free(next_part->child); + break; + + case BACKQUOTE_EXPRESSION: + case ARITHMETIC_EXPRESSION: + case VARIABLE_SUBSTITUTION: + case SUBSHELL_SUBSTITUTION: + case PROCESS_SUBSTITUTION_INPUT: + case PROCESS_SUBSTITUTION_OUTPUT: + case PROCESS_SUBSTITUTION_INPUT_OUTPUT: + eprintf("use of run-time evaluated expression as right-hand side of %s operator (at line %zu) is illegal\n", + ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<", + ctx->here_document_stack->first->argument->line_number); + return; + + case REDIRECTION: + case FUNCTION_MARK: + case SUBSHELL: + case ARITHMETIC_SUBSHELL: + /* interpreter shall recognise these as new "arguments" */ + return; + + default: + case COMMAND: /* used by interpreter */ + case VARIABLE: /* ditto */ + abort(); + } + + if (ctx->parser_state->current_argument_end == next_part) + ctx->parser_state->current_argument_end = terminator; + terminator->next_part = next_part->next_part; + free(next_part); + } +} + + +int +check_extension(const char *token, size_t line_number) +{ + if (!posix_mode) { + return 1; + } else { + weprintf("the '%s' token (at line %zu) is not portable, not parsing as it\n", token, line_number); + return 0; + } } @@ -30,6 +179,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) size_t bytes_read = 0; size_t token_len; + struct here_document *here_document; + struct here_document_stack *here_doc_stack; for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) { switch (ctx->mode_stack->mode) { @@ -44,8 +195,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_whitespace(ctx, 0); push_semicolon(ctx, 1); ctx->tokeniser_line_number += 1; - if (ctx->here_documents_first) - push_mode(ctx, HERE_DOCUMENT_MODE); + if (ctx->here_document_stack->first) + push_mode(ctx, HERE_DOCUMENT_MODE_INITIALISATION); } else if (isspace(*code)) { ctx->mode_stack->she_is_comment = 1; @@ -129,7 +280,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_enter(ctx, SUBSHELL_SUBSTITUTION); } - } else if (code[1] == '[') { + } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) { token_len = 2; push_mode(ctx, SB_QUOTE_MODE); push_enter(ctx, ARITHMETIC_EXPRESSION); @@ -139,7 +290,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_mode(ctx, CB_QUOTE_MODE); push_enter(ctx, VARIABLE_SUBSTITUTION); - } else if (code[1] == '\'') { + } else if (code[1] == '\'' && check_extension("$'", ctx->tokeniser_line_number)) { for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) { if (code[token_len] == '\\') { if (token_len + 1 == code_len - bytes_read) { @@ -186,10 +337,87 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) break; + case HERE_DOCUMENT_MODE_INITIALISATION: + here_doc_stack = ctx->here_document_stack; + here_doc_stack->indented = 0; + if (here_doc_stack->first->redirection->type == HERE_DOCUMENT_INDENTED) + here_doc_stack->indented = 1; + get_here_document_terminator(ctx); + here_doc_stack->verbatim = 0; + if (here_doc_stack->first->argument->next_part->type == QUOTED) + here_doc_stack->verbatim = 1; + here_doc_stack->first->terminator = here_doc_stack->first->argument->next_part->text; + here_doc_stack->first->terminator_length = here_doc_stack->first->argument->next_part->length; + here_doc_stack->first->argument->next_part->text = ecalloc(1, 1); + here_doc_stack->first->argument->next_part->length = 0; + here_doc_stack->first->argument->next_part->type = QUOTED; + here_doc_stack->first->argument_end = here_doc_stack->first->argument->next_part; + ctx->mode_stack->mode = HERE_DOCUMENT_MODE; + /* fall through */ + case HERE_DOCUMENT_MODE: - /* TODO read until terminator, remove all <tab> (including on the - * line of the terminator) if <<- and then if terminator was - * unquoted, parse in " "-mode but accept " */ + here_doc_stack = ctx->here_document_stack; + if (*code == '\t' && here_doc_stack->indented) { + token_len = 1; + } else { + token_len = here_doc_stack->line_offset; + for (; token_len < code_len - bytes_read; token_len += 1) { + if (code[token_len] == '\n') { + goto here_document_line_end; + } else if (!here_doc_stack->verbatim) { + if (code[token_len] == '\\') { + if (token_len + 1 == code_len - bytes_read) { + goto need_more; + } else if (code[token_len + 1] == '$' || code[token_len + 1] == '`') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + push_quoted(ctx, &code[token_len + 1], 1); + goto next; + } + token_len += 1; + } else if (code[token_len] == '$') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + bytes_read += token_len; + code = &code[token_len]; + goto quote_mode_dollar_mode; + } else if (code[token_len] == '`') { + here_doc_stack->line_offset = 0; + push_quoted(ctx, code, token_len); + push_mode(ctx, BQ_QUOTE_MODE); + push_enter(ctx, BACKQUOTE_EXPRESSION); + goto next; + } + } + } + goto need_more; + + here_document_line_end: + token_len += 1; + ctx->tokeniser_line_number += 1; + here_doc_stack->line_offset = 0; + here_document = here_doc_stack->first; + + if (token_len - 1 == here_document->terminator_length && + !strncmp(code, here_document->terminator, token_len - 1)) { + here_document->redirection->type = HERE_STRING; + here_doc_stack->first = here_document->next; + free(here_document->terminator); + free(here_document); + if (here_doc_stack->first) { + ctx->mode_stack->mode = HERE_DOCUMENT_MODE_INITIALISATION; + } else { + here_doc_stack->next = &here_doc_stack->first; + pop_mode(ctx); + if (here_doc_stack->interpret_when_empty) { + here_doc_stack->interpret_when_empty = 0; + interpret_and_eliminate(ctx); + } + } + } else { + push_quoted(ctx, code, token_len); + } + } break; @@ -197,6 +425,14 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) if (*code == '\\') { if (code_len - bytes_read < 2) { goto need_more; + } else if (code[1] == '\\' || code[1] == '`' || code[1] == '$') { + token_len = 2; + push_unquoted(ctx, &code[1], 1); + if (code[1] == '$') { + weprintf("meaningless \\ found before $ inside backquote expression at line " + "%zu, perhaps you mean to use \\\\$ instead to get a literal $\n", + ctx->tokeniser_line_number); + } } else { token_len = 2; push_unquoted(ctx, code, 2); @@ -284,6 +520,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) } } else if (*code == '$') { + quote_mode_dollar_mode: if (code_len - bytes_read < 2) { if (ctx->end_of_file_reached) { token_len = 1; @@ -307,7 +544,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) push_enter(ctx, SUBSHELL_SUBSTITUTION); } - } else if (code[1] == '[') { + } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) { token_len = 2; push_mode(ctx, SB_QUOTE_MODE); push_enter(ctx, ARITHMETIC_EXPRESSION); @@ -403,6 +640,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len) abort(); } + next: if (ctx->line_continuations) { ctx->tokeniser_line_number += ctx->line_continuations; ctx->line_continuations = 0; |