aboutsummaryrefslogtreecommitdiffstats
path: root/tokeniser.c
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
committerMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
commitbc9033fdf30424c34008e651fdbbba5da8c8fc40 (patch)
tree995bc6bbd067cf6bebe1a6e6f74e210b11df1a8a /tokeniser.c
parentSecond commit (diff)
downloadapsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.gz
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.bz2
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.xz
Third commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
-rw-r--r--tokeniser.c268
1 files changed, 253 insertions, 15 deletions
diff --git a/tokeniser.c b/tokeniser.c
index 63ff2fd..606726b 100644
--- a/tokeniser.c
+++ b/tokeniser.c
@@ -5,20 +5,169 @@
void
push_mode(struct parser_context *ctx, enum tokeniser_mode mode)
{
- struct mode_stack *new = emalloc(sizeof(*new));
- new->mode = mode;
- new->she_is_comment = 1;
- new->previous = ctx->mode_stack;
- ctx->mode_stack = new;
+ struct mode_stack *new_mode_stack;
+ struct here_document_stack *new_here_document_stack;
+
+ if (mode == BQ_QUOTE_MODE)
+ weprintf("backquote expression found at line %zu, stop it!\n", ctx->tokeniser_line_number);
+
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ new_here_document_stack = ecalloc(1, sizeof(*new_here_document_stack));
+ new_here_document_stack->next = &new_here_document_stack->first;
+ new_here_document_stack->previous = ctx->here_document_stack;
+ ctx->here_document_stack = new_here_document_stack;
+ }
+
+ new_mode_stack = emalloc(sizeof(*new_mode_stack));
+ new_mode_stack->mode = mode;
+ new_mode_stack->she_is_comment = 1;
+ new_mode_stack->previous = ctx->mode_stack;
+ ctx->mode_stack = new_mode_stack;
}
void
pop_mode(struct parser_context *ctx)
{
- struct mode_stack *old = ctx->mode_stack;
+ struct mode_stack *old_mode_stack;
+ struct here_document_stack *old_here_document_stack;
+ struct here_document_stack *prev_here_document_stack;
+
+ old_mode_stack = ctx->mode_stack;
ctx->mode_stack = ctx->mode_stack->previous;
- free(old);
+ free(old_mode_stack);
+
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ if (ctx->here_document_stack->first) {
+ if (posix_mode) {
+ eprintf("subshell expression closed at line %zu before here-documents, "
+ "this is non-portable\n", ctx->tokeniser_line_number);
+ }
+ prev_here_document_stack = ctx->here_document_stack->previous;
+ *ctx->here_document_stack->next = prev_here_document_stack->first;
+ ctx->here_document_stack->next = prev_here_document_stack->next;
+ ctx->here_document_stack->previous = prev_here_document_stack->previous;
+ ctx->here_document_stack->interpret_when_empty = prev_here_document_stack->interpret_when_empty;
+ free(prev_here_document_stack);
+ } else {
+ old_here_document_stack = ctx->here_document_stack;
+ ctx->here_document_stack = old_here_document_stack->previous;
+ free(old_here_document_stack);
+ }
+ }
+}
+
+
+static void
+append_and_destroy_quote_to_here_document_terminator(struct here_document *here_document, struct parser_state *quote)
+{
+ struct argument *terminator, *part, *next_part;
+ size_t i;
+
+ terminator = here_document->argument->next_part;
+
+ for (i = 0; i < quote->narguments; i++) {
+ for (part = quote->arguments[i]; part; part = next_part) {
+ next_part = part->next_part;
+ if (part->type != QUOTED && part->type != UNQUOTED) {
+ eprintf("use of run-time evaluated expression as right-hand side "
+ "of %s operator (at line %zu) is illegal\n",
+ here_document->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ here_document->argument->line_number);
+ }
+ terminator->text = erealloc(terminator->text, terminator->length + part->length + 1);
+ memcpy(&terminator->text[terminator->length], part->text, part->length);
+ terminator->length += part->length;
+ terminator->text[terminator->length] = '\0';
+ free(part->text);
+ free(part);
+ }
+ }
+
+ free(quote->arguments);
+}
+
+static void
+get_here_document_terminator(struct parser_context *ctx)
+{
+ struct argument *terminator, *next_part;
+ struct parser_state *child;
+
+ terminator = ctx->here_document_stack->first->argument->next_part;
+ if (!terminator || (terminator->type != QUOTED && terminator->type != UNQUOTED && terminator->type != QUOTE_EXPRESSION)) {
+ eprintf("missing right-hand side of %s operator at line %zu\n",
+ ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ ctx->here_document_stack->first->argument->line_number);
+ } else if (terminator->type == QUOTE_EXPRESSION) {
+ child = terminator->child;
+ terminator->type = QUOTED;
+ terminator->text = ecalloc(1, 1);
+ terminator->length = 0;
+ append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, child);
+ free(child);
+ }
+
+ while ((next_part = terminator->next_part)) {
+ switch (next_part->type) {
+ case QUOTED:
+ terminator->type = QUOTED;
+ /* fall through */
+ case UNQUOTED:
+ terminator->text = erealloc(terminator->text, terminator->length + next_part->length + 1);
+ memcpy(&terminator->text[terminator->length], next_part->text, next_part->length);
+ terminator->length += next_part->length;
+ terminator->text[terminator->length] = '\0';
+ free(next_part->text);
+ break;
+
+ case QUOTE_EXPRESSION:
+ terminator->type = QUOTED;
+ append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, next_part->child);
+ free(next_part->child);
+ break;
+
+ case BACKQUOTE_EXPRESSION:
+ case ARITHMETIC_EXPRESSION:
+ case VARIABLE_SUBSTITUTION:
+ case SUBSHELL_SUBSTITUTION:
+ case PROCESS_SUBSTITUTION_INPUT:
+ case PROCESS_SUBSTITUTION_OUTPUT:
+ case PROCESS_SUBSTITUTION_INPUT_OUTPUT:
+ eprintf("use of run-time evaluated expression as right-hand side of %s operator (at line %zu) is illegal\n",
+ ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ ctx->here_document_stack->first->argument->line_number);
+ return;
+
+ case REDIRECTION:
+ case FUNCTION_MARK:
+ case SUBSHELL:
+ case ARITHMETIC_SUBSHELL:
+ /* interpreter shall recognise these as new "arguments" */
+ return;
+
+ default:
+ case COMMAND: /* used by interpreter */
+ case VARIABLE: /* ditto */
+ abort();
+ }
+
+ if (ctx->parser_state->current_argument_end == next_part)
+ ctx->parser_state->current_argument_end = terminator;
+ terminator->next_part = next_part->next_part;
+ free(next_part);
+ }
+}
+
+
+int
+check_extension(const char *token, size_t line_number)
+{
+ if (!posix_mode) {
+ return 1;
+ } else {
+ weprintf("the '%s' token (at line %zu) is not portable, not parsing as it\n", token, line_number);
+ return 0;
+ }
}
@@ -30,6 +179,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
size_t bytes_read = 0;
size_t token_len;
+ struct here_document *here_document;
+ struct here_document_stack *here_doc_stack;
for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) {
switch (ctx->mode_stack->mode) {
@@ -44,8 +195,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_whitespace(ctx, 0);
push_semicolon(ctx, 1);
ctx->tokeniser_line_number += 1;
- if (ctx->here_documents_first)
- push_mode(ctx, HERE_DOCUMENT_MODE);
+ if (ctx->here_document_stack->first)
+ push_mode(ctx, HERE_DOCUMENT_MODE_INITIALISATION);
} else if (isspace(*code)) {
ctx->mode_stack->she_is_comment = 1;
@@ -129,7 +280,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
- } else if (code[1] == '[') {
+ } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
@@ -139,7 +290,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_mode(ctx, CB_QUOTE_MODE);
push_enter(ctx, VARIABLE_SUBSTITUTION);
- } else if (code[1] == '\'') {
+ } else if (code[1] == '\'' && check_extension("$'", ctx->tokeniser_line_number)) {
for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '\\') {
if (token_len + 1 == code_len - bytes_read) {
@@ -186,10 +337,87 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
break;
+ case HERE_DOCUMENT_MODE_INITIALISATION:
+ here_doc_stack = ctx->here_document_stack;
+ here_doc_stack->indented = 0;
+ if (here_doc_stack->first->redirection->type == HERE_DOCUMENT_INDENTED)
+ here_doc_stack->indented = 1;
+ get_here_document_terminator(ctx);
+ here_doc_stack->verbatim = 0;
+ if (here_doc_stack->first->argument->next_part->type == QUOTED)
+ here_doc_stack->verbatim = 1;
+ here_doc_stack->first->terminator = here_doc_stack->first->argument->next_part->text;
+ here_doc_stack->first->terminator_length = here_doc_stack->first->argument->next_part->length;
+ here_doc_stack->first->argument->next_part->text = ecalloc(1, 1);
+ here_doc_stack->first->argument->next_part->length = 0;
+ here_doc_stack->first->argument->next_part->type = QUOTED;
+ here_doc_stack->first->argument_end = here_doc_stack->first->argument->next_part;
+ ctx->mode_stack->mode = HERE_DOCUMENT_MODE;
+ /* fall through */
+
case HERE_DOCUMENT_MODE:
- /* TODO read until terminator, remove all <tab> (including on the
- * line of the terminator) if <<- and then if terminator was
- * unquoted, parse in " "-mode but accept " */
+ here_doc_stack = ctx->here_document_stack;
+ if (*code == '\t' && here_doc_stack->indented) {
+ token_len = 1;
+ } else {
+ token_len = here_doc_stack->line_offset;
+ for (; token_len < code_len - bytes_read; token_len += 1) {
+ if (code[token_len] == '\n') {
+ goto here_document_line_end;
+ } else if (!here_doc_stack->verbatim) {
+ if (code[token_len] == '\\') {
+ if (token_len + 1 == code_len - bytes_read) {
+ goto need_more;
+ } else if (code[token_len + 1] == '$' || code[token_len + 1] == '`') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ push_quoted(ctx, &code[token_len + 1], 1);
+ goto next;
+ }
+ token_len += 1;
+ } else if (code[token_len] == '$') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ bytes_read += token_len;
+ code = &code[token_len];
+ goto quote_mode_dollar_mode;
+ } else if (code[token_len] == '`') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ push_mode(ctx, BQ_QUOTE_MODE);
+ push_enter(ctx, BACKQUOTE_EXPRESSION);
+ goto next;
+ }
+ }
+ }
+ goto need_more;
+
+ here_document_line_end:
+ token_len += 1;
+ ctx->tokeniser_line_number += 1;
+ here_doc_stack->line_offset = 0;
+ here_document = here_doc_stack->first;
+
+ if (token_len - 1 == here_document->terminator_length &&
+ !strncmp(code, here_document->terminator, token_len - 1)) {
+ here_document->redirection->type = HERE_STRING;
+ here_doc_stack->first = here_document->next;
+ free(here_document->terminator);
+ free(here_document);
+ if (here_doc_stack->first) {
+ ctx->mode_stack->mode = HERE_DOCUMENT_MODE_INITIALISATION;
+ } else {
+ here_doc_stack->next = &here_doc_stack->first;
+ pop_mode(ctx);
+ if (here_doc_stack->interpret_when_empty) {
+ here_doc_stack->interpret_when_empty = 0;
+ interpret_and_eliminate(ctx);
+ }
+ }
+ } else {
+ push_quoted(ctx, code, token_len);
+ }
+ }
break;
@@ -197,6 +425,14 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
if (*code == '\\') {
if (code_len - bytes_read < 2) {
goto need_more;
+ } else if (code[1] == '\\' || code[1] == '`' || code[1] == '$') {
+ token_len = 2;
+ push_unquoted(ctx, &code[1], 1);
+ if (code[1] == '$') {
+ weprintf("meaningless \\ found before $ inside backquote expression at line "
+ "%zu, perhaps you mean to use \\\\$ instead to get a literal $\n",
+ ctx->tokeniser_line_number);
+ }
} else {
token_len = 2;
push_unquoted(ctx, code, 2);
@@ -284,6 +520,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
}
} else if (*code == '$') {
+ quote_mode_dollar_mode:
if (code_len - bytes_read < 2) {
if (ctx->end_of_file_reached) {
token_len = 1;
@@ -307,7 +544,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
- } else if (code[1] == '[') {
+ } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
@@ -403,6 +640,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
abort();
}
+ next:
if (ctx->line_continuations) {
ctx->tokeniser_line_number += ctx->line_continuations;
ctx->line_continuations = 0;