/* See LICENSE file for copyright and license details. */
#include "common.h"
void
push_mode(struct parser_context *ctx, enum tokeniser_mode mode)
{
struct mode_stack *new_mode_stack;
struct here_document_stack *new_here_document_stack;
if (mode == BQ_QUOTE_MODE)
weprintf("backquote expression found at line %zu, stop it!\n", ctx->tokeniser_line_number);
if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
new_here_document_stack = ecalloc(1, sizeof(*new_here_document_stack));
new_here_document_stack->next = &new_here_document_stack->first;
new_here_document_stack->previous = ctx->here_document_stack;
ctx->here_document_stack = new_here_document_stack;
}
new_mode_stack = emalloc(sizeof(*new_mode_stack));
new_mode_stack->mode = mode;
new_mode_stack->she_is_comment = 1;
new_mode_stack->previous = ctx->mode_stack;
ctx->mode_stack = new_mode_stack;
}
void
pop_mode(struct parser_context *ctx)
{
struct mode_stack *old_mode_stack;
struct here_document_stack *old_here_document_stack;
struct here_document_stack *prev_here_document_stack;
old_mode_stack = ctx->mode_stack;
ctx->mode_stack = ctx->mode_stack->previous;
free(old_mode_stack);
if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
if (ctx->here_document_stack->first) {
if (posix_mode) {
eprintf("subshell expression closed at line %zu before here-documents, "
"this is non-portable\n", ctx->tokeniser_line_number);
}
prev_here_document_stack = ctx->here_document_stack->previous;
*ctx->here_document_stack->next = prev_here_document_stack->first;
ctx->here_document_stack->next = prev_here_document_stack->next;
ctx->here_document_stack->previous = prev_here_document_stack->previous;
ctx->here_document_stack->interpret_when_empty = prev_here_document_stack->interpret_when_empty;
free(prev_here_document_stack);
} else {
old_here_document_stack = ctx->here_document_stack;
ctx->here_document_stack = old_here_document_stack->previous;
free(old_here_document_stack);
}
}
}
static void
append_and_destroy_quote_to_here_document_terminator(struct here_document *here_document, struct parser_state *quote)
{
struct argument *terminator, *part, *next_part;
size_t i;
terminator = here_document->argument->next_part;
for (i = 0; i < quote->narguments; i++) {
for (part = quote->arguments[i]; part; part = next_part) {
next_part = part->next_part;
if (part->type != QUOTED && part->type != UNQUOTED) {
eprintf("use of run-time evaluated expression as right-hand side "
"of %s operator (at line %zu) is illegal\n",
here_document->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
here_document->argument->line_number);
}
terminator->text = erealloc(terminator->text, terminator->length + part->length + 1);
memcpy(&terminator->text[terminator->length], part->text, part->length);
terminator->length += part->length;
terminator->text[terminator->length] = '\0';
free(part->text);
free(part);
}
}
free(quote->arguments);
}
static void
get_here_document_terminator(struct parser_context *ctx)
{
struct argument *terminator, *next_part;
struct parser_state *child;
terminator = ctx->here_document_stack->first->argument->next_part;
if (!terminator || (terminator->type != QUOTED && terminator->type != UNQUOTED && terminator->type != QUOTE_EXPRESSION)) {
eprintf("missing right-hand side of %s operator at line %zu\n",
ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
ctx->here_document_stack->first->argument->line_number);
} else if (terminator->type == QUOTE_EXPRESSION) {
child = terminator->child;
terminator->type = QUOTED;
terminator->text = ecalloc(1, 1);
terminator->length = 0;
append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, child);
free(child);
}
while ((next_part = terminator->next_part)) {
switch (next_part->type) {
case QUOTED:
terminator->type = QUOTED;
/* fall through */
case UNQUOTED:
terminator->text = erealloc(terminator->text, terminator->length + next_part->length + 1);
memcpy(&terminator->text[terminator->length], next_part->text, next_part->length);
terminator->length += next_part->length;
terminator->text[terminator->length] = '\0';
free(next_part->text);
break;
case QUOTE_EXPRESSION:
terminator->type = QUOTED;
append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, next_part->child);
free(next_part->child);
break;
case BACKQUOTE_EXPRESSION:
case ARITHMETIC_EXPRESSION:
case VARIABLE_SUBSTITUTION:
case SUBSHELL_SUBSTITUTION:
case PROCESS_SUBSTITUTION_INPUT:
case PROCESS_SUBSTITUTION_OUTPUT:
case PROCESS_SUBSTITUTION_INPUT_OUTPUT:
eprintf("use of run-time evaluated expression as right-hand side of %s operator (at line %zu) is illegal\n",
ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
ctx->here_document_stack->first->argument->line_number);
return;
case REDIRECTION:
case FUNCTION_MARK:
case SUBSHELL:
case ARITHMETIC_SUBSHELL:
/* interpreter shall recognise these as new "arguments" */
return;
default:
case COMMAND: /* used by interpreter */
case VARIABLE: /* ditto */
abort();
}
if (ctx->parser_state->current_argument_end == next_part)
ctx->parser_state->current_argument_end = terminator;
terminator->next_part = next_part->next_part;
free(next_part);
}
}
int
check_extension(const char *token, size_t line_number)
{
if (!posix_mode) {
return 1;
} else {
weprintf("the '%s' token (at line %zu) is not portable, not parsing as it\n", token, line_number);
return 0;
}
}
size_t
parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
{
#define IS_SYMBOL(C) ((C) == '<' || (C) == '>' || (C) == '&' || (C) == '|' ||\
(C) == '(' || (C) == ')' || (C) == ';' || (C) == '-')
size_t bytes_read = 0;
size_t token_len;
struct here_document *here_document;
struct here_document_stack *here_doc_stack;
for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) {
switch (ctx->mode_stack->mode) {
case NORMAL_MODE:
if (*code == '#' && ctx->mode_stack->she_is_comment) {
token_len = 1;
push_mode(ctx, COMMENT_MODE);
} else if (*code == '\n') {
token_len = 1;
ctx->mode_stack->she_is_comment = 1;
push_whitespace(ctx, 0);
push_semicolon(ctx, 1);
ctx->tokeniser_line_number += 1;
if (ctx->here_document_stack->first)
push_mode(ctx, HERE_DOCUMENT_MODE_INITIALISATION);
} else if (isspace(*code)) {
ctx->mode_stack->she_is_comment = 1;
push_whitespace(ctx, 0);
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
if (!isspace(code[token_len]) || code[token_len] == '\n')
break;
} else if (*code == ')' && ctx->mode_stack->previous) {
token_len = 1;
ctx->mode_stack->she_is_comment = 1;
pop_mode(ctx);
push_leave(ctx);
} else if (IS_SYMBOL(*code)) {
ctx->mode_stack->she_is_comment = 1;
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
if (!IS_SYMBOL(code[token_len]))
goto symbol_end;
if (!ctx->end_of_file_reached)
goto need_more;
symbol_end:
token_len = push_symbol(ctx, code, token_len);
} else if (*code == '\\') {
ctx->mode_stack->she_is_comment = 0;
backslash_mode:
if (code_len - bytes_read < 2)
goto need_more;
token_len = 2;
push_quoted(ctx, &code[1], 1);
} else if (*code == '\'') {
ctx->mode_stack->she_is_comment = 0;
sqoute_mode:
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
if (code[token_len] == '\'')
goto squote_end;
goto need_more;
squote_end:
token_len += 1;
push_quoted(ctx, &code[1], token_len - 2);
} else if (*code == '"') {
ctx->mode_stack->she_is_comment = 0;
dquote_mode:
token_len = 1;
push_mode(ctx, DQ_QUOTE_MODE);
push_enter(ctx, QUOTE_EXPRESSION);
} else if (*code == '`') {
ctx->mode_stack->she_is_comment = 0;
bquote_mode:
token_len = 1;
push_mode(ctx, BQ_QUOTE_MODE);
push_enter(ctx, BACKQUOTE_EXPRESSION);
} else if (*code == '$') {
ctx->mode_stack->she_is_comment = 0;
dollar_mode:
if (code_len - bytes_read < 2) {
if (ctx->end_of_file_reached) {
token_len = 1;
push_unquoted(ctx, code, 1);
} else {
goto need_more;
}
} else if (code[1] == '(') {
if (code_len - bytes_read < 3) {
goto need_more;
} else if (code[2] == '(') {
token_len = 3;
push_mode(ctx, RRB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
} else {
token_len = 2;
push_mode(ctx, NORMAL_MODE);
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
} else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
} else if (code[1] == '{') {
token_len = 2;
push_mode(ctx, CB_QUOTE_MODE);
push_enter(ctx, VARIABLE_SUBSTITUTION);
} else if (code[1] == '\'' && check_extension("$'", ctx->tokeniser_line_number)) {
for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '\\') {
if (token_len + 1 == code_len - bytes_read) {
token_len += 1;
} else {
goto need_more;
}
} else if (code[token_len] == '\'') {
goto dollar_squote_end;
}
}
dollar_squote_end:
token_len += 1;
push_escaped(ctx, &code[2], token_len - 3);
} else {
token_len = 1;
push_unquoted(ctx, code, 1);
}
} else {
ctx->mode_stack->she_is_comment = 0;
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
if (isspace(code[token_len]) || IS_SYMBOL(code[token_len]) ||
code[token_len] == '\'' || code[token_len] == '"' ||
code[token_len] == '\\' || code[token_len] == '$' ||
code[token_len] == '`')
break;
}
push_unquoted(ctx, code, token_len);
}
break;
case COMMENT_MODE:
if (*code == '\n') {
token_len = 0; /* do not consume */
pop_mode(ctx);
} else {
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
if (code[token_len] == '\n')
break;
}
break;
case HERE_DOCUMENT_MODE_INITIALISATION:
here_doc_stack = ctx->here_document_stack;
here_doc_stack->indented = 0;
if (here_doc_stack->first->redirection->type == HERE_DOCUMENT_INDENTED)
here_doc_stack->indented = 1;
get_here_document_terminator(ctx);
here_doc_stack->verbatim = 0;
if (here_doc_stack->first->argument->next_part->type == QUOTED)
here_doc_stack->verbatim = 1;
here_doc_stack->first->terminator = here_doc_stack->first->argument->next_part->text;
here_doc_stack->first->terminator_length = here_doc_stack->first->argument->next_part->length;
here_doc_stack->first->argument->next_part->text = ecalloc(1, 1);
here_doc_stack->first->argument->next_part->length = 0;
here_doc_stack->first->argument->next_part->type = QUOTED;
here_doc_stack->first->argument_end = here_doc_stack->first->argument->next_part;
ctx->mode_stack->mode = HERE_DOCUMENT_MODE;
/* fall through */
case HERE_DOCUMENT_MODE:
here_doc_stack = ctx->here_document_stack;
if (*code == '\t' && here_doc_stack->indented) {
token_len = 1;
} else {
token_len = here_doc_stack->line_offset;
for (; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '\n') {
goto here_document_line_end;
} else if (!here_doc_stack->verbatim) {
if (code[token_len] == '\\') {
if (token_len + 1 == code_len - bytes_read) {
goto need_more;
} else if (code[token_len + 1] == '$' || code[token_len + 1] == '`') {
here_doc_stack->line_offset = 0;
push_quoted(ctx, code, token_len);
push_quoted(ctx, &code[token_len + 1], 1);
goto next;
}
token_len += 1;
} else if (code[token_len] == '$') {
here_doc_stack->line_offset = 0;
push_quoted(ctx, code, token_len);
bytes_read += token_len;
code = &code[token_len];
goto quote_mode_dollar_mode;
} else if (code[token_len] == '`') {
here_doc_stack->line_offset = 0;
push_quoted(ctx, code, token_len);
push_mode(ctx, BQ_QUOTE_MODE);
push_enter(ctx, BACKQUOTE_EXPRESSION);
goto next;
}
}
}
goto need_more;
here_document_line_end:
token_len += 1;
ctx->tokeniser_line_number += 1;
here_doc_stack->line_offset = 0;
here_document = here_doc_stack->first;
if (token_len - 1 == here_document->terminator_length &&
!strncmp(code, here_document->terminator, token_len - 1)) {
here_document->redirection->type = HERE_STRING;
here_doc_stack->first = here_document->next;
free(here_document->terminator);
free(here_document);
if (here_doc_stack->first) {
ctx->mode_stack->mode = HERE_DOCUMENT_MODE_INITIALISATION;
} else {
here_doc_stack->next = &here_doc_stack->first;
pop_mode(ctx);
if (here_doc_stack->interpret_when_empty) {
here_doc_stack->interpret_when_empty = 0;
interpret_and_eliminate(ctx);
}
}
} else {
push_quoted(ctx, code, token_len);
}
}
break;
case BQ_QUOTE_MODE:
if (*code == '\\') {
if (code_len - bytes_read < 2) {
goto need_more;
} else if (code[1] == '\\' || code[1] == '`' || code[1] == '$') {
token_len = 2;
push_unquoted(ctx, &code[1], 1);
if (code[1] == '$') {
weprintf("meaningless \\ found before $ inside backquote expression at line "
"%zu, perhaps you mean to use \\\\$ instead to get a literal $\n",
ctx->tokeniser_line_number);
}
} else {
token_len = 2;
push_unquoted(ctx, code, 2);
}
} else if (*code == '`') {
token_len = 1;
pop_mode(ctx);
push_leave(ctx);
} else if (*code == '\n') {
token_len = 1;
ctx->tokeniser_line_number += 1;
push_unquoted(ctx, code, 1);
} else {
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1)
if (code[token_len] == '\n' || code[token_len] == '\\' || code[token_len] == '`')
break;
push_unquoted(ctx, code, token_len);
}
break;
case DQ_QUOTE_MODE:
if (*code == '"') {
token_len = 1;
pop_mode(ctx);
push_leave(ctx);
} else {
goto common_quote_mode;
}
break;
case RRB_QUOTE_MODE:
if (*code == ')') {
if (code_len - bytes_read < 2) {
goto need_more;
} else if (code[1] == ')') {
token_len = 2;
pop_mode(ctx);
push_leave(ctx);
} else {
goto common_quote_mode;
}
} else {
goto common_quote_mode;
}
break;
case RB_QUOTE_MODE:
if (*code == ')') {
token_len = 1;
pop_mode(ctx);
push_leave(ctx);
} else {
goto common_quote_mode;
}
break;
case SB_QUOTE_MODE:
if (*code == ']') {
token_len = 1;
pop_mode(ctx);
push_leave(ctx);
} else {
goto common_quote_mode;
}
break;
common_quote_mode:
if (*code == '(' && ctx->mode_stack->mode != DQ_QUOTE_MODE) {
if (code_len - bytes_read < 2) {
goto need_more;
} else if (code[1] == '(') {
token_len = 2;
push_mode(ctx, RRB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
} else {
token_len = 1;
push_mode(ctx, RB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
}
} else if (*code == '$') {
quote_mode_dollar_mode:
if (code_len - bytes_read < 2) {
if (ctx->end_of_file_reached) {
token_len = 1;
push_unquoted(ctx, code, 1);
} else {
goto need_more;
}
} else if (code[1] == '(') {
if (code_len - bytes_read < 3) {
goto need_more;
} else if (code[2] == '(') {
token_len = 3;
push_mode(ctx, RRB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
} else {
token_len = 2;
push_mode(ctx, NORMAL_MODE);
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
} else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
} else if (code[1] == '{') {
token_len = 2;
push_mode(ctx, CB_QUOTE_MODE);
push_enter(ctx, VARIABLE_SUBSTITUTION);
} else {
token_len = 1;
push_unquoted(ctx, code, 1);
}
} else if (*code == '\\') {
if (code_len - bytes_read < 2) {
if (ctx->end_of_file_reached) {
token_len = 1;
push_unquoted(ctx, code, 1);
} else {
goto need_more;
}
} else if (code[1] == '$' || code[1] == '`' || code[1] == '"' || code[1] == '\\') {
token_len = 1;
push_quoted(ctx, &code[1], 1);
} else {
token_len = 1;
push_unquoted(ctx, code, 1);
}
} else if (*code == '`') {
goto bquote_mode;
} else if (*code == '\n') {
token_len = 1;
ctx->tokeniser_line_number += 1;
push_unquoted(ctx, code, 1);
} else {
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '"' || code[token_len] == ')' ||
code[token_len] == ']' || code[token_len] == '(' ||
code[token_len] == '$' || code[token_len] == '\\' ||
code[token_len] == '`' || code[token_len] == '\n')
break;
}
push_unquoted(ctx, code, token_len);
}
break;
case CB_QUOTE_MODE:
if (*code == '}') {
token_len = 1;
pop_mode(ctx);
push_leave(ctx);
} else if (*code == '\\') {
goto backslash_mode;
} else if (*code == '\'') {
goto sqoute_mode;
} else if (*code == '"') {
goto dquote_mode;
} else if (*code == '`') {
goto bquote_mode;
} else if (*code == '$') {
goto dollar_mode;
} else if (*code == '\n') {
token_len = 1;
ctx->tokeniser_line_number += 1;
push_unquoted(ctx, code, 1);
} else {
for (token_len = 1; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '}' || code[token_len] == '\\' ||
code[token_len] == '\'' || code[token_len] == '"' ||
code[token_len] == '`' || code[token_len] == '$' ||
code[token_len] == '\n')
break;
}
push_unquoted(ctx, code, token_len);
}
break;
default:
abort();
}
next:
if (ctx->line_continuations) {
ctx->tokeniser_line_number += ctx->line_continuations;
ctx->line_continuations = 0;
}
}
if (bytes_read == code_len && ctx->end_of_file_reached)
push_end_of_file(ctx);
need_more:
return bytes_read;
#undef IS_SYMBOL
}