/* See LICENSE file for copyright and license details. */
#include "common.h"
const char *
get_redirection_token(enum redirection_type type)
{
switch (type) {
case REDIRECT_INPUT:
return "<";
case REDIRECT_INPUT_TO_FD:
return "<&";
case REDIRECT_OUTPUT:
return ">";
case REDIRECT_OUTPUT_APPEND:
return ">>";
case REDIRECT_OUTPUT_CLOBBER:
return ">|";
case REDIRECT_OUTPUT_TO_FD:
return ">&";
case REDIRECT_OUTPUT_AND_STDERR:
return "&>";
case REDIRECT_OUTPUT_AND_STDERR_APPEND:
return "&>>";
case REDIRECT_OUTPUT_AND_STDERR_CLOBBER:
return "&>|";
case REDIRECT_OUTPUT_AND_STDERR_TO_FD:
return "&>&";
case REDIRECT_INPUT_OUTPUT:
return "<>";
case REDIRECT_INPUT_OUTPUT_TO_FD:
return "<>&";
case HERE_STRING:
return "<<<";
case HERE_DOCUMENT:
return "<<";
case HERE_DOCUMENT_INDENTED:
return "<<-";
default:
abort();
}
}
void
push_end_of_file(struct parser_context *ctx)
{
push_semicolon(ctx, 1);
if (ctx->parser_state->parent || ctx->parser_state->ncommands)
ctx->premature_end_of_file = 1;
}
void
push_whitespace(struct parser_context *ctx, int strict)
{
if (ctx->parser_state->need_right_hand_side) {
if (strict)
eprintf("premature end of command\n");
return;
}
if (ctx->parser_state->current_argument) {
ctx->parser_state->arguments = erealloc(ctx->parser_state->arguments,
(ctx->parser_state->narguments + 1) *
sizeof(*ctx->parser_state->arguments));
ctx->parser_state->arguments[ctx->parser_state->narguments++] = ctx->parser_state->current_argument;
ctx->parser_state->current_argument = NULL;
ctx->parser_state->current_argument_end = NULL;
}
}
static void
push_command_terminal(struct parser_context *ctx, enum command_terminal terminal)
{
struct command *new_command;
push_whitespace(ctx, 1);
ctx->parser_state->commands = erealloc(ctx->parser_state->commands,
(ctx->parser_state->ncommands + 1) *
sizeof(*ctx->parser_state->commands));
new_command = ecalloc(1, sizeof(*new_command));
ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command;
new_command->terminal = terminal;
new_command->terminal_line_number = ctx->tokeniser_line_number;
new_command->arguments = ctx->parser_state->arguments;
new_command->narguments = ctx->parser_state->narguments;
new_command->redirections = ctx->parser_state->redirections;
new_command->nredirections = ctx->parser_state->nredirections;
ctx->parser_state->arguments = NULL;
ctx->parser_state->narguments = 0;
ctx->parser_state->redirections = NULL;
ctx->parser_state->nredirections = 0;
if (!ctx->parser_state->parent && !ctx->do_not_run)
if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND)
interpret_and_eliminate(ctx);
}
void
push_semicolon(struct parser_context *ctx, int actually_newline)
{
if (!actually_newline || ctx->parser_state->narguments)
push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON);
}
static void
push_new_argument_part(struct parser_context *ctx, enum argument_type type)
{
struct argument *new_part;
new_part = ecalloc(1, sizeof(*new_part));
new_part->type = type;
new_part->line_number = ctx->tokeniser_line_number;
if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
ctx->here_document_stack->first->argument_end->next_part = new_part;
ctx->here_document_stack->first->argument_end = new_part;
} else if (ctx->parser_state->current_argument_end) {
ctx->parser_state->current_argument_end->next_part = new_part;
ctx->parser_state->current_argument_end = new_part;
} else {
ctx->parser_state->current_argument = new_part;
ctx->parser_state->current_argument_end = new_part;
}
}
PURE_FUNC
static int
is_numeric_argument(struct argument *argument)
{
char *p;
do {
if (argument->type != UNQUOTED)
return 0;
for (p = argument->text; *p; p++)
if (!isdigit(*p))
return 0;
} while ((argument = argument->next_part));
return 1;
}
PURE_FUNC
static int
is_variable_reference(struct argument *argument)
{
char *p;
if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$')
return 0;
do {
if (argument->type != UNQUOTED)
return 0;
for (p = argument->text; *p; p++)
if (!isalnum(*p) && *p != '_')
return p[0] == '$' && !p[1] && !argument->next_part;
} while ((argument = argument->next_part));
return 0;
}
static void
push_redirection(struct parser_context *ctx, enum redirection_type type)
{
struct redirection *new_redirection;
struct argument *new_argument;
struct here_document *new_here_document;
new_redirection = ecalloc(1, sizeof(*new_redirection));
new_redirection->type = type;
ctx->parser_state->redirections = erealloc(ctx->parser_state->redirections,
(ctx->parser_state->nredirections + 1) *
sizeof(*ctx->parser_state->redirections));
ctx->parser_state->redirections[ctx->parser_state->nredirections++] = new_redirection;
if (ctx->parser_state->current_argument) {
if (ctx->parser_state->current_argument->type == REDIRECTION ||
ctx->parser_state->current_argument_end->type == QUOTED ||
ctx->parser_state->current_argument_end->type == QUOTE_EXPRESSION ||
type == REDIRECT_OUTPUT_AND_STDERR ||
type == REDIRECT_OUTPUT_AND_STDERR_APPEND ||
type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER ||
type == REDIRECT_OUTPUT_AND_STDERR_TO_FD ||
!is_numeric_argument(ctx->parser_state->current_argument)) {
if (is_variable_reference(ctx->parser_state->current_argument)) {
if (posix_mode) {
weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n",
get_redirection_token(type), ctx->tokeniser_line_number);
} else {
goto argument_is_left_hand_side;
}
}
push_whitespace(ctx, 1);
} else {
argument_is_left_hand_side:
new_redirection->left_hand_side = ctx->parser_state->current_argument;
}
}
new_argument = ecalloc(1, sizeof(*new_argument));
new_argument->type = REDIRECTION;
new_argument->line_number = ctx->tokeniser_line_number;
ctx->parser_state->current_argument = new_argument;
if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) {
new_here_document = emalloc(sizeof(*new_here_document));
new_here_document->redirection = new_redirection;
new_here_document->argument = new_argument;
new_here_document->next = NULL;
*ctx->here_document_stack->next = new_here_document;
ctx->here_document_stack->next = &new_here_document->next;
}
ctx->parser_state->need_right_hand_side = 1;
}
static void
push_shell_io(struct parser_context *ctx, enum argument_type type, enum tokeniser_mode mode)
{
push_mode(ctx, mode);
push_enter(ctx, type);
}
static void
push_function_mark(struct parser_context *ctx)
{
push_whitespace(ctx, 1);
push_new_argument_part(ctx, FUNCTION_MARK);
push_whitespace(ctx, 1);
}
size_t
push_symbol(struct parser_context *ctx, char *token, size_t token_len)
{
#define LIST_SYMBOLS(_)\
_(0, "<<<", push_redirection(ctx, HERE_STRING))\
_(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
_(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
_(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\
_(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
_(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
_(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
_(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
_(1, "()", push_function_mark(ctx))\
_(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
_(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
_(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
_(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\
_(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
_(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
_(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
_(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
_(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
_(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
_(1, "||", push_command_terminal(ctx, OR))\
_(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
_(1, "&&", push_command_terminal(ctx, AND))\
_(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\
_(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
_(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
_(1, ";", push_semicolon(ctx, 0))\
_(1, "<", push_redirection(ctx, REDIRECT_INPUT))\
_(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\
_(1, "|", push_command_terminal(ctx, PIPE))\
_(1, "&", push_command_terminal(ctx, AMPERSAND))
#define X(PORTABLE, SYMBOL, ACTION)\
if (token_len >= sizeof(SYMBOL) - 1 &&\
!strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\
(PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\
ACTION;\
return token_len;\
}
LIST_SYMBOLS(X)
#undef X
push_unquoted(ctx, token, 1);
return 1;
}
static void
push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument_type type)
{
struct argument *arg_part;
if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
type = QUOTED;
if (ctx->here_document_stack->first->argument_end->type != type ||
ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number)
push_new_argument_part(ctx, type);
arg_part = ctx->here_document_stack->first->argument_end;
} else {
ctx->parser_state->need_right_hand_side = 0;
if (!ctx->parser_state->current_argument_end ||
ctx->parser_state->current_argument_end->type != type ||
ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
push_new_argument_part(ctx, type);
arg_part = ctx->parser_state->current_argument_end;
}
arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1);
memcpy(&arg_part->text[arg_part->length], text, text_len);
arg_part->length += text_len;
arg_part->text[arg_part->length] = '\0';
}
void
push_quoted(struct parser_context *ctx, char *text, size_t text_len)
{
push_text(ctx, text, text_len, QUOTED);
}
static size_t
encode_utf8(char *buf, uint32_t value)
{
size_t i, len;
if (value <= 0x7F) {
buf[0] = (char)value;
return 1;
}
if (value <= 0x000007FFUL) len = 2;
else if (value <= 0x0000FFFFUL) len = 3;
else if (value <= 0x001FFFFFUL) len = 4;
else if (value <= 0x03FFFFFFUL) len = 5;
else if (value <= 0x7FFFFFFFUL) len = 6;
else len = 7;
for (i = len - 1; i; i--) {
buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80);
value >>= 6;
}
buf[0] |= (char)(0xFF << (8 - len));
return len;
}
void
push_escaped(struct parser_context *ctx, char *text, size_t text_len)
{
uint32_t value;
size_t r, w, n;
for (r = w = 0; r < text_len;) {
if (text[r] == '\\' && r + 1 < text_len) {
if (text[r + 1] == 'a') {
text[w++] = '\a';
r += 2;
} else if (text[r + 1] == 'b') {
text[w++] = '\b';
r += 2;
} else if (text[r + 1] == 'e' || text[r + 1] == 'E') {
text[w++] = '\033';
r += 2;
} else if (text[r + 1] == 'f') {
text[w++] = '\f';
r += 2;
} else if (text[r + 1] == 'n') {
text[w++] = '\n';
r += 2;
} else if (text[r + 1] == 'r') {
text[w++] = '\r';
r += 2;
} else if (text[r + 1] == 't') {
text[w++] = '\t';
r += 2;
} else if (text[r + 1] == 'v') {
text[w++] = '\v';
r += 2;
} else if (text[r + 1] == '\\') {
text[w++] = '\\';
r += 2;
} else if (text[r + 1] == '\'') {
text[w++] = '\'';
r += 2;
} else if (text[r + 1] == '"') {
text[w++] = '\"';
r += 2;
} else if (text[r + 1] == '?') {
text[w++] = '?';
r += 2;
} else if ('0' <= text[r + 1] && text[r + 1] <= '7') {
value = 0;
for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) {
if ((text[r] & 15) > 255 - (int)value)
break;
value *= 8;
value |= (uint32_t)(text[r] & 15);
}
if (value) {
text[w++] = (char)value;
} else {
weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
ctx->tokeniser_line_number);
}
} else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) {
value = 0;
for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) {
value *= 16;
value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
}
if (value) {
text[w++] = (char)value;
} else {
weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
ctx->tokeniser_line_number);
}
} else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) {
value = 0;
for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) {
value *= 16;
value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
}
if (value) {
w += encode_utf8(&text[w], value);
} else {
weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
ctx->tokeniser_line_number);
}
} else if (text[r + 1] == 'U') {
value = 0;
for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) {
value *= 16;
value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
}
if (value) {
w += encode_utf8(&text[w], value);
} else {
weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
ctx->tokeniser_line_number);
}
} else if (text[r + 1] == 'c' && text_len - r >= 3) {
if (text[r + 2] & (' ' - 1)) {
text[w++] = (char)(text[r + 2] & (' ' - 1));
} else {
weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
ctx->tokeniser_line_number);
}
r += 3;
} else {
text[w++] = text[r++];
}
} else {
text[w++] = text[r++];
}
}
push_text(ctx, text, w, QUOTED);
}
void
push_unquoted(struct parser_context *ctx, char *text, size_t text_len)
{
push_text(ctx, text, text_len, UNQUOTED);
}
void
push_enter(struct parser_context *ctx, enum argument_type type)
{
struct parser_state *new_state;
if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE)
ctx->parser_state->need_right_hand_side = 0;
push_new_argument_part(ctx, type);
new_state = ecalloc(1, sizeof(*new_state));
new_state->parent = ctx->parser_state;
ctx->parser_state->current_argument_end->child = new_state;
ctx->parser_state = new_state;
}
void
push_leave(struct parser_context *ctx)
{
struct parser_context subctx;
struct argument *argument;
char *code;
size_t code_length;
size_t parsed_length;
size_t arg_i;
if (ctx->mode_stack->mode == NORMAL_MODE) {
push_semicolon(ctx, 1);
} else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) {
initialise_parser_context(&subctx, 1, 1);
subctx.do_not_run = 1;
subctx.end_of_file_reached = 1;
code = NULL;
code_length = 0;
for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) {
argument = ctx->parser_state->arguments[arg_i];
code = erealloc(code, code_length + argument->length);
memcpy(&code[code_length], argument->text, argument->length);
code_length += argument->length;
}
code = erealloc(code, code_length + 1);
code[code_length] = '\0';
parsed_length = parse_preparsed(&subctx, code, code_length);
if (parsed_length < code_length || subctx.premature_end_of_file) {
eprintf("premature end of file backquote expression at line %zu\n",
ctx->parser_state->parent->current_argument_end->line_number);
}
free(code);
free(subctx.here_document_stack);
free(subctx.interpreter_state);
ctx->parser_state->parent->current_argument_end->child = subctx.parser_state;
} else {
/* In quote modes we want everything in a dummy command
* to simplify the implementation of the interpreter.
* The command termination used here doesn't matter,
* neither does the line nummer (for it), the interpreter
* will only look at the argument list. */
push_command_terminal(ctx, NEWLINE);
}
ctx->parser_state = ctx->parser_state->parent;
}