aboutsummaryrefslogblamecommitdiffstats
path: root/parser.c
blob: 957ca969406ca442226ab4988350a6c8ce4a03ca (plain) (tree)
1
2
3
4



                                                         







































                                                 









































                                                                                                                    
                                                                       








                                                                      


                                                                                                                          



    
                                                                
 

                                                                                   











                                                                           



                                                                                    








                                                                              










































                                                                                                 





















                                                                                              









                                                                                                                      

                                                
                                           













                                                                                              

                                                                          


























                                                                                            


































                                                                                        















                                                                                           





                                                                                                             
 








                                                                                                       














                                                                                   


























                                                                      


                                                                     









































































































                                                                                                                        














                                                                      


                                                            











                                                                   







                                                   
                                       

































                                                                                              

                                                      
/* See LICENSE file for copyright and license details. */
#include "common.h"


const char *
get_redirection_token(enum redirection_type type)
{
	switch (type) {
	case REDIRECT_INPUT:
		return "<";
	case REDIRECT_INPUT_TO_FD:
		return "<&";
	case REDIRECT_OUTPUT:
		return ">";
	case REDIRECT_OUTPUT_APPEND:
		return ">>";
	case REDIRECT_OUTPUT_CLOBBER:
		return ">|";
	case REDIRECT_OUTPUT_TO_FD:
		return ">&";
	case REDIRECT_OUTPUT_AND_STDERR:
		return "&>";
	case REDIRECT_OUTPUT_AND_STDERR_APPEND:
		return "&>>";
	case REDIRECT_OUTPUT_AND_STDERR_CLOBBER:
		return "&>|";
	case REDIRECT_OUTPUT_AND_STDERR_TO_FD:
		return "&>&";
	case REDIRECT_INPUT_OUTPUT:
		return "<>";
	case REDIRECT_INPUT_OUTPUT_TO_FD:
		return "<>&";
	case HERE_STRING:
		return "<<<";
	case HERE_DOCUMENT:
		return "<<";
	case HERE_DOCUMENT_INDENTED:
		return "<<-";
	default:
		abort();
	}
}


void
push_end_of_file(struct parser_context *ctx)
{
	push_semicolon(ctx, 1);
	if (ctx->parser_state->parent || ctx->parser_state->ncommands)
		ctx->premature_end_of_file = 1;
}


void
push_whitespace(struct parser_context *ctx, int strict)
{
	if (ctx->parser_state->need_right_hand_side) {
		if (strict)
			eprintf("premature end of command\n");
		return;
	}

	if (ctx->parser_state->current_argument) {
		ctx->parser_state->arguments = erealloc(ctx->parser_state->arguments,
		                                        (ctx->parser_state->narguments + 1) *
		                                        sizeof(*ctx->parser_state->arguments));
		ctx->parser_state->arguments[ctx->parser_state->narguments++] = ctx->parser_state->current_argument;
		ctx->parser_state->current_argument = NULL;
		ctx->parser_state->current_argument_end = NULL;
	}
}


static void
push_command_terminal(struct parser_context *ctx, enum command_terminal terminal)
{
	struct command *new_command;

	push_whitespace(ctx, 1);

	ctx->parser_state->commands = erealloc(ctx->parser_state->commands,
	                                       (ctx->parser_state->ncommands + 1) *
	                                       sizeof(*ctx->parser_state->commands));
	new_command = ecalloc(1, sizeof(*new_command));
	ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command;
	new_command->terminal = terminal;
	new_command->terminal_line_number = ctx->tokeniser_line_number;
	new_command->arguments = ctx->parser_state->arguments;
	new_command->narguments = ctx->parser_state->narguments;
	new_command->redirections = ctx->parser_state->redirections;
	new_command->nredirections = ctx->parser_state->nredirections;
	ctx->parser_state->arguments = NULL;
	ctx->parser_state->narguments = 0;
	ctx->parser_state->redirections = NULL;
	ctx->parser_state->nredirections = 0;

	if (!ctx->parser_state->parent && !ctx->do_not_run)
		if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND)
			interpret_and_eliminate(ctx);
}


void
push_semicolon(struct parser_context *ctx, int actually_newline)
{
	if (!actually_newline || ctx->parser_state->narguments)
		push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON);
}


static void
push_new_argument_part(struct parser_context *ctx, enum argument_type type)
{
	struct argument *new_part;

	new_part = ecalloc(1, sizeof(*new_part));
	new_part->type = type;
	new_part->line_number = ctx->tokeniser_line_number;

	if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
		ctx->here_document_stack->first->argument_end->next_part = new_part;
		ctx->here_document_stack->first->argument_end = new_part;
	} else if (ctx->parser_state->current_argument_end) {
		ctx->parser_state->current_argument_end->next_part = new_part;
		ctx->parser_state->current_argument_end = new_part;
	} else {
		ctx->parser_state->current_argument = new_part;
		ctx->parser_state->current_argument_end = new_part;
	}
}


PURE_FUNC
static int
is_numeric_argument(struct argument *argument)
{
	char *p;

	do {
		if (argument->type != UNQUOTED)
			return 0;

		for (p = argument->text; *p; p++)
			if (!isdigit(*p))
				return 0;

	} while ((argument = argument->next_part));

	return 1;
}


PURE_FUNC
static int
is_variable_reference(struct argument *argument)
{
	char *p;

	if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$')
		return 0;

	do {
		if (argument->type != UNQUOTED)
			return 0;

		for (p = argument->text; *p; p++)
			if (!isalnum(*p) && *p != '_')
				return p[0] == '$' && !p[1] && !argument->next_part;

	} while ((argument = argument->next_part));

	return 0;
}


static void
push_redirection(struct parser_context *ctx, enum redirection_type type)
{
	struct redirection *new_redirection;
	struct argument *new_argument;
	struct here_document *new_here_document;

	new_redirection = ecalloc(1, sizeof(*new_redirection));
	new_redirection->type = type;

	ctx->parser_state->redirections = erealloc(ctx->parser_state->redirections,
	                                           (ctx->parser_state->nredirections + 1) *
	                                           sizeof(*ctx->parser_state->redirections));
	ctx->parser_state->redirections[ctx->parser_state->nredirections++] = new_redirection;

	if (ctx->parser_state->current_argument) {
		if (ctx->parser_state->current_argument->type == REDIRECTION ||
		    ctx->parser_state->current_argument_end->type == QUOTED ||
		    ctx->parser_state->current_argument_end->type == QUOTE_EXPRESSION ||
		    type == REDIRECT_OUTPUT_AND_STDERR ||
		    type == REDIRECT_OUTPUT_AND_STDERR_APPEND ||
		    type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER ||
		    type == REDIRECT_OUTPUT_AND_STDERR_TO_FD ||
		    !is_numeric_argument(ctx->parser_state->current_argument)) {
			if (is_variable_reference(ctx->parser_state->current_argument)) {
				if (posix_mode) {
					weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n",
					         get_redirection_token(type), ctx->tokeniser_line_number);
				} else {
					goto argument_is_left_hand_side;
				}
			}
			push_whitespace(ctx, 1);
		} else {
		argument_is_left_hand_side:
			new_redirection->left_hand_side = ctx->parser_state->current_argument;
		}
	}

	new_argument = ecalloc(1, sizeof(*new_argument));
	new_argument->type = REDIRECTION;
	new_argument->line_number = ctx->tokeniser_line_number;
	ctx->parser_state->current_argument = new_argument;

	if (type == HERE_DOCUMENT || type == HERE_DOCUMENT_INDENTED) {
		new_here_document = emalloc(sizeof(*new_here_document));
		new_here_document->redirection = new_redirection;
		new_here_document->argument = new_argument;
		new_here_document->next = NULL;
		*ctx->here_document_stack->next = new_here_document;
		ctx->here_document_stack->next = &new_here_document->next;
	}

	ctx->parser_state->need_right_hand_side = 1;
}


static void
push_shell_io(struct parser_context *ctx, enum argument_type type, enum tokeniser_mode mode)
{
	push_mode(ctx, mode);
	push_enter(ctx, type);
}


static void
push_function_mark(struct parser_context *ctx)
{
	push_whitespace(ctx, 1);
	push_new_argument_part(ctx, FUNCTION_MARK);
	push_whitespace(ctx, 1);
}


size_t
push_symbol(struct parser_context *ctx, char *token, size_t token_len)
{
#define LIST_SYMBOLS(_)\
	_(0, "<<<", push_redirection(ctx, HERE_STRING))\
	_(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
	_(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
	_(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\
	_(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
	_(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
	_(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
	_(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
	_(1, "()", push_function_mark(ctx))\
	_(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
	_(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
	_(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
	_(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\
	_(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
	_(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
	_(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
	_(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
	_(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
	_(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
	_(1, "||", push_command_terminal(ctx, OR))\
	_(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
	_(1, "&&", push_command_terminal(ctx, AND))\
	_(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\
	_(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
	_(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
	_(1, ";", push_semicolon(ctx, 0))\
	_(1, "<", push_redirection(ctx, REDIRECT_INPUT))\
	_(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\
	_(1, "|", push_command_terminal(ctx, PIPE))\
	_(1, "&", push_command_terminal(ctx, AMPERSAND))

#define X(PORTABLE, SYMBOL, ACTION)\
	if (token_len >= sizeof(SYMBOL) - 1 &&\
	    !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\
	    (PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\
		ACTION;\
		return token_len;\
	}
	LIST_SYMBOLS(X)
#undef X

	push_unquoted(ctx, token, 1);
	return 1;
}


static void
push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument_type type)
{
	struct argument *arg_part;

	if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
		type = QUOTED;
		if (ctx->here_document_stack->first->argument_end->type != type ||
		    ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number)
			push_new_argument_part(ctx, type);
		arg_part = ctx->here_document_stack->first->argument_end;

	} else {
		ctx->parser_state->need_right_hand_side = 0;

		if (!ctx->parser_state->current_argument_end ||
		    ctx->parser_state->current_argument_end->type != type ||
		    ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
			push_new_argument_part(ctx, type);
		arg_part = ctx->parser_state->current_argument_end;
	}

	arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1);
	memcpy(&arg_part->text[arg_part->length], text, text_len);
	arg_part->length += text_len;
	arg_part->text[arg_part->length] = '\0';
}


void
push_quoted(struct parser_context *ctx, char *text, size_t text_len)
{
	push_text(ctx, text, text_len, QUOTED);
}


static size_t
encode_utf8(char *buf, uint32_t value)
{
	size_t i, len;

	if (value <= 0x7F) {
		buf[0] = (char)value;
		return 1;
	}

	if      (value <= 0x000007FFUL) len = 2;
	else if (value <= 0x0000FFFFUL) len = 3;
	else if (value <= 0x001FFFFFUL) len = 4;
	else if (value <= 0x03FFFFFFUL) len = 5;
	else if (value <= 0x7FFFFFFFUL) len = 6;
	else                            len = 7;

	for (i = len - 1; i; i--) {
		buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80);
		value >>= 6;
	}

	buf[0] |= (char)(0xFF << (8 - len));

	return len;
}

void
push_escaped(struct parser_context *ctx, char *text, size_t text_len)
{
	uint32_t value;
	size_t r, w, n;
	for (r = w = 0; r < text_len;) {
		if (text[r] == '\\' && r + 1 < text_len) {
			if (text[r + 1] == 'a') {
				text[w++] = '\a';
				r += 2;
			} else if (text[r + 1] == 'b') {
				text[w++] = '\b';
				r += 2;
			} else if (text[r + 1] == 'e' || text[r + 1] == 'E') {
				text[w++] = '\033';
				r += 2;
			} else if (text[r + 1] == 'f') {
				text[w++] = '\f';
				r += 2;
			} else if (text[r + 1] == 'n') {
				text[w++] = '\n';
				r += 2;
			} else if (text[r + 1] == 'r') {
				text[w++] = '\r';
				r += 2;
			} else if (text[r + 1] == 't') {
				text[w++] = '\t';
				r += 2;
			} else if (text[r + 1] == 'v') {
				text[w++] = '\v';
				r += 2;
			} else if (text[r + 1] == '\\') {
				text[w++] = '\\';
				r += 2;
			} else if (text[r + 1] == '\'') {
				text[w++] = '\'';
				r += 2;
			} else if (text[r + 1] == '"') {
				text[w++] = '\"';
				r += 2;
			} else if (text[r + 1] == '?') {
				text[w++] = '?';
				r += 2;
			} else if ('0' <= text[r + 1] && text[r + 1] <= '7') {
				value = 0;
				for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) {
					if ((text[r] & 15) > 255 - (int)value)
						break;
					value *= 8;
					value |= (uint32_t)(text[r] & 15);
				}
				if (value) {
					text[w++] = (char)value;
				} else {
					weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
					          ctx->tokeniser_line_number);
				}
			} else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) {
				value = 0;
				for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) {
					value *= 16;
					value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
				}
				if (value) {
					text[w++] = (char)value;
				} else {
					weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
					          ctx->tokeniser_line_number);
				}
			} else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) {
				value = 0;
				for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) {
					value *= 16;
					value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
				}
				if (value) {
					w += encode_utf8(&text[w], value);
				} else {
					weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
					          ctx->tokeniser_line_number);
				}
			} else if (text[r + 1] == 'U') {
				value = 0;
				for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) {
					value *= 16;
					value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
				}
				if (value) {
					w += encode_utf8(&text[w], value);
				} else {
					weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
					          ctx->tokeniser_line_number);
				}
			} else if (text[r + 1] == 'c' && text_len - r >= 3) {
				if (text[r + 2] & (' ' - 1)) {
					text[w++] = (char)(text[r + 2] & (' ' - 1));
				} else {
					weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
					          ctx->tokeniser_line_number);
				}
				r += 3;
			} else {
				text[w++] = text[r++];
			}
		} else {
			text[w++] = text[r++];
		}
	}
	push_text(ctx, text, w, QUOTED);
}


void
push_unquoted(struct parser_context *ctx, char *text, size_t text_len)
{
	push_text(ctx, text, text_len, UNQUOTED);
}


void
push_enter(struct parser_context *ctx, enum argument_type type)
{
	struct parser_state *new_state;

	if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE)
		ctx->parser_state->need_right_hand_side = 0;

	push_new_argument_part(ctx, type);

	new_state = ecalloc(1, sizeof(*new_state));
	new_state->parent = ctx->parser_state;
	ctx->parser_state->current_argument_end->child = new_state;
	ctx->parser_state = new_state;
}


void
push_leave(struct parser_context *ctx)
{
	struct parser_context subctx;
	struct argument *argument;
	char *code;
	size_t code_length;
	size_t parsed_length;
	size_t arg_i;

	if (ctx->mode_stack->mode == NORMAL_MODE) {
		push_semicolon(ctx, 1);

	} else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) {
		initialise_parser_context(&subctx, 1, 1);
		subctx.do_not_run = 1;
		subctx.end_of_file_reached = 1;
		code = NULL;
		code_length = 0;
		for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) {
			argument = ctx->parser_state->arguments[arg_i];
			code = erealloc(code, code_length + argument->length);
			memcpy(&code[code_length], argument->text, argument->length);
			code_length += argument->length;
		}
		code = erealloc(code, code_length + 1);
		code[code_length] = '\0';
		parsed_length = parse_preparsed(&subctx, code, code_length);
		if (parsed_length < code_length || subctx.premature_end_of_file) {
			eprintf("premature end of file backquote expression at line %zu\n",
			        ctx->parser_state->parent->current_argument_end->line_number);
		}
		free(code);
		free(subctx.here_document_stack);
		free(subctx.interpreter_state);
		ctx->parser_state->parent->current_argument_end->child = subctx.parser_state;

	} else {
		/* In quote modes we want everything in a dummy command
		 * to simplify the implementation of the interpreter.
		 * The command termination used here doesn't matter,
		 * neither does the line nummer (for it), the interpreter
		 * will only look at the argument list. */
		push_command_terminal(ctx, NEWLINE);
	}

	ctx->parser_state = ctx->parser_state->parent;
}