aboutsummaryrefslogtreecommitdiffstats
path: root/parser.c
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
committerMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
commitbc9033fdf30424c34008e651fdbbba5da8c8fc40 (patch)
tree995bc6bbd067cf6bebe1a6e6f74e210b11df1a8a /parser.c
parentSecond commit (diff)
downloadapsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.gz
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.bz2
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.xz
Third commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c395
1 files changed, 335 insertions, 60 deletions
diff --git a/parser.c b/parser.c
index c3da716..957ca96 100644
--- a/parser.c
+++ b/parser.c
@@ -2,6 +2,46 @@
#include "common.h"
+const char *
+get_redirection_token(enum redirection_type type)
+{
+ switch (type) {
+ case REDIRECT_INPUT:
+ return "<";
+ case REDIRECT_INPUT_TO_FD:
+ return "<&";
+ case REDIRECT_OUTPUT:
+ return ">";
+ case REDIRECT_OUTPUT_APPEND:
+ return ">>";
+ case REDIRECT_OUTPUT_CLOBBER:
+ return ">|";
+ case REDIRECT_OUTPUT_TO_FD:
+ return ">&";
+ case REDIRECT_OUTPUT_AND_STDERR:
+ return "&>";
+ case REDIRECT_OUTPUT_AND_STDERR_APPEND:
+ return "&>>";
+ case REDIRECT_OUTPUT_AND_STDERR_CLOBBER:
+ return "&>|";
+ case REDIRECT_OUTPUT_AND_STDERR_TO_FD:
+ return "&>&";
+ case REDIRECT_INPUT_OUTPUT:
+ return "<>";
+ case REDIRECT_INPUT_OUTPUT_TO_FD:
+ return "<>&";
+ case HERE_STRING:
+ return "<<<";
+ case HERE_DOCUMENT:
+ return "<<";
+ case HERE_DOCUMENT_INDENTED:
+ return "<<-";
+ default:
+ abort();
+ }
+}
+
+
void
push_end_of_file(struct parser_context *ctx)
{
@@ -44,6 +84,7 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal
new_command = ecalloc(1, sizeof(*new_command));
ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command;
new_command->terminal = terminal;
+ new_command->terminal_line_number = ctx->tokeniser_line_number;
new_command->arguments = ctx->parser_state->arguments;
new_command->narguments = ctx->parser_state->narguments;
new_command->redirections = ctx->parser_state->redirections;
@@ -53,22 +94,17 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal
ctx->parser_state->redirections = NULL;
ctx->parser_state->nredirections = 0;
- if (!ctx->parser_state->parent) {
- if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) {
- /* TODO unless in a special construct such as while, case, for, if, or {, run and clear
- * also require that any here-document is specified (count them and run when given);
- * if terminal == AMPERSAND: perform </dev/null first, and reset exist status to 0
- */
- }
- }
+ if (!ctx->parser_state->parent && !ctx->do_not_run)
+ if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND)
+ interpret_and_eliminate(ctx);
}
void
-push_semicolon(struct parser_context *ctx, int maybe)
+push_semicolon(struct parser_context *ctx, int actually_newline)
{
- if (!maybe || ctx->parser_state->narguments)
- push_command_terminal(ctx, SEMICOLON);
+ if (!actually_newline || ctx->parser_state->narguments)
+ push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON);
}
@@ -81,7 +117,10 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type)
new_part->type = type;
new_part->line_number = ctx->tokeniser_line_number;
- if (ctx->parser_state->current_argument_end) {
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ ctx->here_document_stack->first->argument_end->next_part = new_part;
+ ctx->here_document_stack->first->argument_end = new_part;
+ } else if (ctx->parser_state->current_argument_end) {
ctx->parser_state->current_argument_end->next_part = new_part;
ctx->parser_state->current_argument_end = new_part;
} else {
@@ -91,6 +130,49 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type)
}
+PURE_FUNC
+static int
+is_numeric_argument(struct argument *argument)
+{
+ char *p;
+
+ do {
+ if (argument->type != UNQUOTED)
+ return 0;
+
+ for (p = argument->text; *p; p++)
+ if (!isdigit(*p))
+ return 0;
+
+ } while ((argument = argument->next_part));
+
+ return 1;
+}
+
+
+PURE_FUNC
+static int
+is_variable_reference(struct argument *argument)
+{
+ char *p;
+
+ if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$')
+ return 0;
+
+ do {
+ if (argument->type != UNQUOTED)
+ return 0;
+
+ for (p = argument->text; *p; p++)
+ if (!isalnum(*p) && *p != '_')
+ return p[0] == '$' && !p[1] && !argument->next_part;
+
+ } while ((argument = argument->next_part));
+
+ return 0;
+}
+
+
static void
push_redirection(struct parser_context *ctx, enum redirection_type type)
{
@@ -113,9 +195,19 @@ push_redirection(struct parser_context *ctx, enum redirection_type type)
type == REDIRECT_OUTPUT_AND_STDERR ||
type == REDIRECT_OUTPUT_AND_STDERR_APPEND ||
type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER ||
- type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) {
+ type == REDIRECT_OUTPUT_AND_STDERR_TO_FD ||
+ !is_numeric_argument(ctx->parser_state->current_argument)) {
+ if (is_variable_reference(ctx->parser_state->current_argument)) {
+ if (posix_mode) {
+ weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n",
+ get_redirection_token(type), ctx->tokeniser_line_number);
+ } else {
+ goto argument_is_left_hand_side;
+ }
+ }
push_whitespace(ctx, 1);
} else {
+ argument_is_left_hand_side:
new_redirection->left_hand_side = ctx->parser_state->current_argument;
}
}
@@ -130,8 +222,8 @@ push_redirection(struct parser_context *ctx, enum redirection_type type)
new_here_document->redirection = new_redirection;
new_here_document->argument = new_argument;
new_here_document->next = NULL;
- *ctx->here_documents_next = new_here_document;
- ctx->here_documents_next = &new_here_document->next;
+ *ctx->here_document_stack->next = new_here_document;
+ ctx->here_document_stack->next = &new_here_document->next;
}
ctx->parser_state->need_right_hand_side = 1;
@@ -159,39 +251,41 @@ size_t
push_symbol(struct parser_context *ctx, char *token, size_t token_len)
{
#define LIST_SYMBOLS(_)\
- _("<<<", push_redirection(ctx, HERE_STRING))\
- _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
- _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
- _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\
- _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
- _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
- _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
- _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
- _("()", push_function_mark(ctx))\
- _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
- _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
- _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
- _("<<", push_redirection(ctx, HERE_DOCUMENT))\
- _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
- _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
- _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
- _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
- _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
- _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
- _("||", push_command_terminal(ctx, OR))\
- _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
- _("&&", push_command_terminal(ctx, AND))\
- _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\
- _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
- _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
- _(";", push_semicolon(ctx, 0))\
- _("<", push_redirection(ctx, REDIRECT_INPUT))\
- _(">", push_redirection(ctx, REDIRECT_OUTPUT))\
- _("|", push_command_terminal(ctx, PIPE))\
- _("&", push_command_terminal(ctx, AMPERSAND))
-
-#define X(SYMBOL, ACTION)\
- if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\
+ _(0, "<<<", push_redirection(ctx, HERE_STRING))\
+ _(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
+ _(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
+ _(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\
+ _(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
+ _(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
+ _(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
+ _(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
+ _(1, "()", push_function_mark(ctx))\
+ _(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
+ _(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
+ _(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
+ _(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\
+ _(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
+ _(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
+ _(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
+ _(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
+ _(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
+ _(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
+ _(1, "||", push_command_terminal(ctx, OR))\
+ _(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
+ _(1, "&&", push_command_terminal(ctx, AND))\
+ _(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\
+ _(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
+ _(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
+ _(1, ";", push_semicolon(ctx, 0))\
+ _(1, "<", push_redirection(ctx, REDIRECT_INPUT))\
+ _(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\
+ _(1, "|", push_command_terminal(ctx, PIPE))\
+ _(1, "&", push_command_terminal(ctx, AMPERSAND))
+
+#define X(PORTABLE, SYMBOL, ACTION)\
+ if (token_len >= sizeof(SYMBOL) - 1 &&\
+ !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\
+ (PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\
ACTION;\
return token_len;\
}
@@ -208,13 +302,22 @@ push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument
{
struct argument *arg_part;
- ctx->parser_state->need_right_hand_side = 0;
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ type = QUOTED;
+ if (ctx->here_document_stack->first->argument_end->type != type ||
+ ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number)
+ push_new_argument_part(ctx, type);
+ arg_part = ctx->here_document_stack->first->argument_end;
- if (!ctx->parser_state->current_argument_end ||
- ctx->parser_state->current_argument_end->type != type ||
- ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
- push_new_argument_part(ctx, type);
- arg_part = ctx->parser_state->current_argument_end;
+ } else {
+ ctx->parser_state->need_right_hand_side = 0;
+
+ if (!ctx->parser_state->current_argument_end ||
+ ctx->parser_state->current_argument_end->type != type ||
+ ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
+ push_new_argument_part(ctx, type);
+ arg_part = ctx->parser_state->current_argument_end;
+ }
arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1);
memcpy(&arg_part->text[arg_part->length], text, text_len);
@@ -230,11 +333,142 @@ push_quoted(struct parser_context *ctx, char *text, size_t text_len)
}
+static size_t
+encode_utf8(char *buf, uint32_t value)
+{
+ size_t i, len;
+
+ if (value <= 0x7F) {
+ buf[0] = (char)value;
+ return 1;
+ }
+
+ if (value <= 0x000007FFUL) len = 2;
+ else if (value <= 0x0000FFFFUL) len = 3;
+ else if (value <= 0x001FFFFFUL) len = 4;
+ else if (value <= 0x03FFFFFFUL) len = 5;
+ else if (value <= 0x7FFFFFFFUL) len = 6;
+ else len = 7;
+
+ for (i = len - 1; i; i--) {
+ buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80);
+ value >>= 6;
+ }
+
+ buf[0] |= (char)(0xFF << (8 - len));
+
+ return len;
+}
+
void
push_escaped(struct parser_context *ctx, char *text, size_t text_len)
{
- /* TODO resolve backslashes in text */
- push_text(ctx, text, text_len, QUOTED);
+ uint32_t value;
+ size_t r, w, n;
+ for (r = w = 0; r < text_len;) {
+ if (text[r] == '\\' && r + 1 < text_len) {
+ if (text[r + 1] == 'a') {
+ text[w++] = '\a';
+ r += 2;
+ } else if (text[r + 1] == 'b') {
+ text[w++] = '\b';
+ r += 2;
+ } else if (text[r + 1] == 'e' || text[r + 1] == 'E') {
+ text[w++] = '\033';
+ r += 2;
+ } else if (text[r + 1] == 'f') {
+ text[w++] = '\f';
+ r += 2;
+ } else if (text[r + 1] == 'n') {
+ text[w++] = '\n';
+ r += 2;
+ } else if (text[r + 1] == 'r') {
+ text[w++] = '\r';
+ r += 2;
+ } else if (text[r + 1] == 't') {
+ text[w++] = '\t';
+ r += 2;
+ } else if (text[r + 1] == 'v') {
+ text[w++] = '\v';
+ r += 2;
+ } else if (text[r + 1] == '\\') {
+ text[w++] = '\\';
+ r += 2;
+ } else if (text[r + 1] == '\'') {
+ text[w++] = '\'';
+ r += 2;
+ } else if (text[r + 1] == '"') {
+ text[w++] = '\"';
+ r += 2;
+ } else if (text[r + 1] == '?') {
+ text[w++] = '?';
+ r += 2;
+ } else if ('0' <= text[r + 1] && text[r + 1] <= '7') {
+ value = 0;
+ for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) {
+ if ((text[r] & 15) > 255 - (int)value)
+ break;
+ value *= 8;
+ value |= (uint32_t)(text[r] & 15);
+ }
+ if (value) {
+ text[w++] = (char)value;
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) {
+ value = 0;
+ for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ text[w++] = (char)value;
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) {
+ value = 0;
+ for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ w += encode_utf8(&text[w], value);
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'U') {
+ value = 0;
+ for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ w += encode_utf8(&text[w], value);
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'c' && text_len - r >= 3) {
+ if (text[r + 2] & (' ' - 1)) {
+ text[w++] = (char)(text[r + 2] & (' ' - 1));
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ r += 3;
+ } else {
+ text[w++] = text[r++];
+ }
+ } else {
+ text[w++] = text[r++];
+ }
+ }
+ push_text(ctx, text, w, QUOTED);
}
@@ -250,7 +484,9 @@ push_enter(struct parser_context *ctx, enum argument_type type)
{
struct parser_state *new_state;
- ctx->parser_state->need_right_hand_side = 0;
+ if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE)
+ ctx->parser_state->need_right_hand_side = 0;
+
push_new_argument_part(ctx, type);
new_state = ecalloc(1, sizeof(*new_state));
@@ -263,9 +499,48 @@ push_enter(struct parser_context *ctx, enum argument_type type)
void
push_leave(struct parser_context *ctx)
{
- if (ctx->mode_stack->mode == NORMAL_MODE)
+ struct parser_context subctx;
+ struct argument *argument;
+ char *code;
+ size_t code_length;
+ size_t parsed_length;
+ size_t arg_i;
+
+ if (ctx->mode_stack->mode == NORMAL_MODE) {
push_semicolon(ctx, 1);
- /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */
- /* TODO validate subshell content */
+
+ } else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) {
+ initialise_parser_context(&subctx, 1, 1);
+ subctx.do_not_run = 1;
+ subctx.end_of_file_reached = 1;
+ code = NULL;
+ code_length = 0;
+ for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) {
+ argument = ctx->parser_state->arguments[arg_i];
+ code = erealloc(code, code_length + argument->length);
+ memcpy(&code[code_length], argument->text, argument->length);
+ code_length += argument->length;
+ }
+ code = erealloc(code, code_length + 1);
+ code[code_length] = '\0';
+ parsed_length = parse_preparsed(&subctx, code, code_length);
+ if (parsed_length < code_length || subctx.premature_end_of_file) {
+ eprintf("premature end of file backquote expression at line %zu\n",
+ ctx->parser_state->parent->current_argument_end->line_number);
+ }
+ free(code);
+ free(subctx.here_document_stack);
+ free(subctx.interpreter_state);
+ ctx->parser_state->parent->current_argument_end->child = subctx.parser_state;
+
+ } else {
+ /* In quote modes we want everything in a dummy command
+ * to simplify the implementation of the interpreter.
+ * The command termination used here doesn't matter,
+ * neither does the line nummer (for it), the interpreter
+ * will only look at the argument list. */
+ push_command_terminal(ctx, NEWLINE);
+ }
+
ctx->parser_state = ctx->parser_state->parent;
}