aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
committerMattias Andrée <maandree@kth.se>2021-07-13 02:44:18 +0200
commitbc9033fdf30424c34008e651fdbbba5da8c8fc40 (patch)
tree995bc6bbd067cf6bebe1a6e6f74e210b11df1a8a
parentSecond commit (diff)
downloadapsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.gz
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.bz2
apsh-bc9033fdf30424c34008e651fdbbba5da8c8fc40.tar.xz
Third commit
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r--Makefile5
-rw-r--r--apsh.c49
-rw-r--r--common.h164
-rw-r--r--interpreter.c970
-rw-r--r--parser.c395
-rw-r--r--preparser.c2
-rw-r--r--regular_builtins.c67
-rw-r--r--special_builtins.c11
-rw-r--r--tokeniser.c268
9 files changed, 1829 insertions, 102 deletions
diff --git a/Makefile b/Makefile
index 696478c..bf1daeb 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,10 @@ OBJ =\
apsh.o\
preparser.o\
tokeniser.o\
- parser.o
+ parser.o\
+ interpreter.o\
+ special_builtins.o\
+ regular_builtins.o
HDR =\
common.h\
diff --git a/apsh.c b/apsh.c
index 4e7183b..90743ad 100644
--- a/apsh.c
+++ b/apsh.c
@@ -4,19 +4,39 @@
USAGE("");
+int login_shell;
+int posix_mode;
+
+
void
-initialise_parser_context(struct parser_context *ctx)
+initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser)
{
memset(ctx, 0, sizeof(*ctx));
- ctx->preparser_line_number = 1;
- ctx->tokeniser_line_number = 1;
- ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack));
- ctx->mode_stack->mode = NORMAL_MODE;
- ctx->mode_stack->she_is_comment = 1;
- ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state));
- ctx->here_documents_next = &ctx->here_documents_first;
+ if (need_tokeniser) {
+ ctx->preparser_line_number = 1;
+ ctx->tokeniser_line_number = 1;
+ ctx->mode_stack = ecalloc(1, sizeof(*ctx->mode_stack));
+ ctx->mode_stack->she_is_comment = 1;
+ ctx->here_document_stack = ecalloc(1, sizeof(*ctx->here_document_stack));
+ ctx->here_document_stack->next = &ctx->here_document_stack->first;
+ }
+ if (need_parser) {
+ ctx->parser_state = ecalloc(1, sizeof(*ctx->parser_state));
+ }
+ ctx->interpreter_state = ecalloc(1, sizeof(*ctx->interpreter_state));
+}
+
+
+static int
+is_sh(char *name)
+{
+ if (!strcmp(name, "sh"))
+ return 1;
+ name = strrchr(name, '/');
+ return name && !strcmp(name, "/sh");
}
+
int
main(int argc, char *argv[])
{
@@ -36,8 +56,11 @@ main(int argc, char *argv[])
if (argc)
usage();
- initialise_parser_context(&ctx);
- ctx.tty_input = isatty(STDIN_FILENO);
+ login_shell = (argv0[0] == '-');
+ posix_mode = is_sh(&argv0[login_shell]);
+
+ initialise_parser_context(&ctx, 1, 1);
+ ctx.tty_input = (char)isatty(STDIN_FILENO);
if (ctx.tty_input)
weprintf("apsh is currently not implemented to be interactive\n");
@@ -71,6 +94,12 @@ main(int argc, char *argv[])
if (buffer_tail != buffer_head || ctx.premature_end_of_file)
eprintf("premature end of file reached\n");
+ free(ctx.parser_state->commands);
+ free(ctx.parser_state->arguments);
+ free(ctx.parser_state->redirections);
+ free(ctx.parser_state);
+ free(ctx.here_document_stack);
+ free(ctx.interpreter_state);
free(buffer);
return 0;
}
diff --git a/common.h b/common.h
index b5def37..52481c0 100644
--- a/common.h
+++ b/common.h
@@ -4,11 +4,35 @@
#include "config.h"
+#if defined(__GNUC__)
+# define CONST_FUNC __attribute__((__const__))
+# define PURE_FUNC __attribute__((__pure__))
+#else
+# define CONST_FUNC
+# define PURE_FUNC
+#endif
+
+
+#define BUILTIN_USAGE(FUNCTION_NAME, SYNOPSIS)\
+ BUILTIN_NUSAGE(1, FUNCTION_NAME, SYNOPSIS)
+
+#define BUILTIN_NUSAGE(STATUS, FUNCTION_NAME, SYNOPSIS)\
+ static void\
+ FUNCTION_NAME(void)\
+ {\
+ const char *syn = SYNOPSIS ? SYNOPSIS : "";\
+ fprintf(stderr, "usage: %s%s%s\n", argv0, *syn ? " " : "", syn);\
+ exit(STATUS);\
+ }
+
+
enum argument_type {
/* .text and .length */
QUOTED, /* \ or '…' or $'…' */
UNQUOTED, /* normal */
- /* .child */
+ VARIABLE, /* used by interpreter, not parser */
+ OPERATOR, /* used by interpreter for ${}, not parser */
+ /* .child, but changed to .command by interpreter */
QUOTE_EXPRESSION, /* "…" */
BACKQUOTE_EXPRESSION, /* `…` */
ARITHMETIC_EXPRESSION, /* $((…)) */
@@ -19,27 +43,46 @@ enum argument_type {
PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */
SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */
ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */
+ /* .command */
+ COMMAND, /* used by interpreter, not parser */
/* (none) */
REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */
FUNCTION_MARK /* () */
};
+enum nesting_type {
+ MAIN_BODY,
+ CODE_ROOT,
+ TEXT_ROOT,
+ VARIABLE_SUBSTITUTION_BRACKET,
+ CURLY_NESTING,
+ IF_STATEMENT,
+ IF_CONDITIONAL,
+ IF_CLAUSE,
+ ELSE_CLAUSE,
+ UNTIL_STATEMENT,
+ WHILE_STATEMENT,
+ REPEAT_CONDITIONAL,
+ DO_CLAUSE,
+ FOR_STATEMENT
+};
+
enum redirection_type {
REDIRECT_INPUT,
- REDIRECT_INPUT_TO_FD,
+ REDIRECT_INPUT_TO_FD, /* but close if right-hand side is "-" */
REDIRECT_OUTPUT,
REDIRECT_OUTPUT_APPEND,
REDIRECT_OUTPUT_CLOBBER,
- REDIRECT_OUTPUT_TO_FD,
+ REDIRECT_OUTPUT_TO_FD, /* ditto */
REDIRECT_OUTPUT_AND_STDERR,
REDIRECT_OUTPUT_AND_STDERR_APPEND,
REDIRECT_OUTPUT_AND_STDERR_CLOBBER,
- REDIRECT_OUTPUT_AND_STDERR_TO_FD,
+ REDIRECT_OUTPUT_AND_STDERR_TO_FD, /* ditto */
REDIRECT_INPUT_OUTPUT,
- REDIRECT_INPUT_OUTPUT_TO_FD,
+ REDIRECT_INPUT_OUTPUT_TO_FD, /* ditto */
HERE_STRING,
- HERE_DOCUMENT,
- HERE_DOCUMENT_INDENTED
+ HERE_DOCUMENT, /* eliminated during parse */
+ HERE_DOCUMENT_INDENTED /* eliminated during parse */
};
enum tokeniser_mode {
@@ -51,21 +94,45 @@ enum tokeniser_mode {
RB_QUOTE_MODE,
SB_QUOTE_MODE,
CB_QUOTE_MODE,
+ HERE_DOCUMENT_MODE_INITIALISATION,
HERE_DOCUMENT_MODE
};
enum command_terminal {
DOUBLE_SEMICOLON,
SEMICOLON,
+ NEWLINE,
AMPERSAND,
SOCKET_PIPE,
PIPE,
PIPE_AMPERSAND,
+ AMPERSAND_PIPE, /* synonym for |& to match &> */
AND,
OR
};
+enum interpreter_requirement {
+ NEED_COMMAND = 0,
+ NEED_COMMAND_END,
+ NO_REQUIREMENT,
+ NEED_FUNCTION_BODY,
+ NEED_VARIABLE_NAME,
+ NEED_IN_OR_DO,
+ NEED_DO,
+ NEED_VALUE,
+ NEED_PREFIX_OR_VARIABLE_NAME,
+ NEED_INDEX_OR_OPERATOR_OR_END,
+ NEED_INDEX_OR_SUFFIX_OR_END,
+ NEED_INDEX_OR_END,
+ NEED_OPERATOR_OR_END,
+ NEED_AT_OPERAND,
+ NEED_TEXT_OR_SLASH,
+ NEED_TEXT_OR_COLON,
+ NEED_END
+};
+
struct parser_state;
+struct interpreter_state;
struct argument {
enum argument_type type;
@@ -75,7 +142,13 @@ struct argument {
size_t length;
};
struct parser_state *child;
+ struct interpreter_state *command;
};
+ /* (TODO) need to be able to track locations of functions, dots, evals, and maybe aliases,
+ * as well as filenames, so a more complex tracking method is required, basically
+ * a reversed tree (stack with reference counted nodes) with filename and linenumber
+ * nodes, with type annotation; however for memory efficiency, .line_number shall
+ * still be used for the leaves */
size_t line_number;
struct argument *next_part;
};
@@ -83,19 +156,23 @@ struct argument {
struct redirection {
enum redirection_type type;
struct argument *left_hand_side;
+ struct argument *right_hand_side; /* set by interpreter, not parser */
};
struct command {
enum command_terminal terminal;
+ char have_bang; /* set by interpreter */
+ size_t terminal_line_number; /* (TODO) same idea as in `struct argument` */
struct argument **arguments;
size_t narguments;
struct redirection **redirections;
size_t nredirections;
+ size_t redirections_offset; /* used by interpreter */
};
struct parser_state {
struct parser_state *parent;
- struct command **commands;
+ struct command **commands; /* in text nodes, all text will be in at most one argument in a single dummy command */
size_t ncommands;
struct argument **arguments;
size_t narguments;
@@ -109,6 +186,9 @@ struct parser_state {
struct here_document {
struct redirection *redirection;
struct argument *argument;
+ struct argument *argument_end;
+ char *terminator;
+ size_t terminator_length;
struct here_document *next;
};
@@ -118,23 +198,52 @@ struct mode_stack {
struct mode_stack *previous;
};
+struct here_document_stack {
+ char indented;
+ char verbatim;
+ char interpret_when_empty;
+ size_t line_offset;
+ struct here_document *first;
+ struct here_document **next;
+ struct here_document_stack *previous;
+};
+
+struct interpreter_state {
+ enum nesting_type dealing_with;
+ enum interpreter_requirement requirement;
+ char allow_newline;
+ char disallow_bang; /* disallow rather than allow, so that default value is 0 */
+ char have_bang;
+ struct command **commands; /* normally the results are stored here */
+ size_t ncommands;
+ struct argument **arguments; /* for TEXT_ROOT and VARIABLE_SUBSTITUTION_BRACKET, results are stored here */
+ size_t narguments;
+ struct redirection **redirections;
+ size_t nredirections;
+ struct interpreter_state *parent;
+};
+
struct parser_context {
- int tty_input;
- int end_of_file_reached;
- int premature_end_of_file;
+ char tty_input;
+ char end_of_file_reached;
+ char premature_end_of_file;
+ char do_not_run;
size_t preparser_offset;
size_t preparser_line_number;
size_t line_continuations;
size_t tokeniser_line_number;
+ size_t interpreter_offset;
struct mode_stack *mode_stack;
struct parser_state *parser_state;
- struct here_document *here_documents_first;
- struct here_document **here_documents_next;
+ struct here_document_stack *here_document_stack;
+ struct interpreter_state *interpreter_state;
};
/* apsh.c */
-void initialise_parser_context(struct parser_context *ctx);
+extern int login_shell;
+extern int posix_mode;
+void initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser);
/* preparser.c */
size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp);
@@ -142,15 +251,40 @@ size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nr
/* tokeniser.c */
void push_mode(struct parser_context *ctx, enum tokeniser_mode mode);
void pop_mode(struct parser_context *ctx);
+int check_extension(const char *token, size_t line_number);
size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len);
/* parser.c */
+PURE_FUNC const char *get_redirection_token(enum redirection_type type);
void push_end_of_file(struct parser_context *ctx);
void push_whitespace(struct parser_context *ctx, int strict);
-void push_semicolon(struct parser_context *ctx, int maybe);
+void push_semicolon(struct parser_context *ctx, int actually_newline);
size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len);
void push_quoted(struct parser_context *ctx, char *text, size_t text_len);
void push_escaped(struct parser_context *ctx, char *text, size_t text_len);
void push_unquoted(struct parser_context *ctx, char *text, size_t text_len);
void push_enter(struct parser_context *ctx, enum argument_type type);
void push_leave(struct parser_context *ctx);
+
+/* interpreter.c */
+void interpret_and_eliminate(struct parser_context *ctx);
+
+/* special_builtins.c */
+#define LIST_SPECIAL_BUILTINS(_)\
+ _(":", colon_main, CONST_FUNC)
+
+/* regular_builtins.c */
+#define LIST_REGULAR_BUILTINS(_)\
+ _("true", true_main, CONST_FUNC)\
+ _("false", false_main, CONST_FUNC)\
+ _("pwd", pwd_main,)
+/* "true" and "false" are defined as regular built-in shell utilities
+ * (that must be searched before PATH), not as stand-alone utilities,
+ * in POSIX (but vice verse in LSB). "pwd" is defined both as regular
+ * built-in shell utility and as a stand-alone utility. */
+
+#define X(SH_NAME, C_FUNCTION, C_ATTRIBUTES)\
+ C_ATTRIBUTES int C_FUNCTION(int argc, char **argv);
+LIST_SPECIAL_BUILTINS(X)
+LIST_REGULAR_BUILTINS(X)
+#undef X
diff --git a/interpreter.c b/interpreter.c
new file mode 100644
index 0000000..e4bca1a
--- /dev/null
+++ b/interpreter.c
@@ -0,0 +1,970 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+#define LIST_RESERVED_WORDS(_)\
+ _("!", BANG)\
+ _("{", OPEN_CURLY)\
+ _("}", CLOSE_CURLY)\
+ _("case", CASE) /* (TODO) case patterns requires update to tokeniser */\
+ _("do", DO)\
+ _("done", DONE)\
+ _("elif", ELIF)\
+ _("else", ELSE)\
+ _("esac", ESAC)\
+ _("fi", FI)\
+ _("for", FOR)\
+ _("if", IF)\
+ _("in", IN)\
+ _("then", THEN)\
+ _("until", UNTIL)\
+ _("while", WHILE)
+
+#define X(S, C) ,C
+enum reserved_word {
+ NOT_A_RESERVED_WORD = 0
+ LIST_RESERVED_WORDS(X)
+};
+#undef X
+
+
+PURE_FUNC
+static enum reserved_word
+get_reserved_word(struct argument *argument)
+{
+ if (argument->type != UNQUOTED || argument->next_part)
+ return NOT_A_RESERVED_WORD;
+#define X(S, C)\
+ if (argument->length == sizeof(S) - 1 && !strcmp(argument->text, S))\
+ return C;
+ LIST_RESERVED_WORDS(X)
+#undef X
+ return NOT_A_RESERVED_WORD;
+}
+
+
+static void
+stray_command_terminal(struct command *command)
+{
+ switch (command->terminal) {
+ case DOUBLE_SEMICOLON: eprintf("stray ';;' at line %zu\n", command->terminal_line_number); return;
+ case SEMICOLON: eprintf("stray ';' at line %zu\n", command->terminal_line_number); return;
+ case NEWLINE: eprintf("stray <newline> at line %zu\n", command->terminal_line_number); return;
+ case AMPERSAND: eprintf("stray '&' at line %zu\n", command->terminal_line_number); return;
+ case SOCKET_PIPE: eprintf("stray '<>|' at line %zu\n", command->terminal_line_number); return;
+ case PIPE: eprintf("stray '|' at line %zu\n", command->terminal_line_number); return;
+ case PIPE_AMPERSAND: eprintf("stray '|&' at line %zu\n", command->terminal_line_number); return;
+ case AMPERSAND_PIPE: eprintf("stray '&|' at line %zu\n", command->terminal_line_number); return;
+ case AND: eprintf("stray '&&' at line %zu\n", command->terminal_line_number); return;
+ case OR: eprintf("stray '||' at line %zu\n", command->terminal_line_number); return;
+ default:
+ abort();
+ }
+}
+
+
+static void
+stray_reserved_word(struct argument *argument)
+{
+ eprintf("stray '%s' at line %zu\n", argument->text, argument->line_number);
+}
+
+
+static void
+stray_redirection(struct command *command, struct argument *argument)
+{
+ enum redirection_type type = command->redirections[command->redirections_offset]->type;
+ eprintf("stray '%s' at line %zu\n", get_redirection_token(type), argument->line_number);
+}
+
+
+static void
+free_text_argument(struct argument **argumentp)
+{
+ struct argument *argument = *argumentp;
+ *argumentp = argument->next_part;
+ free(argument->text);
+ free(argument);
+}
+
+
+static void
+push_interpreted_argument(struct parser_context *ctx, struct argument *argument)
+{
+ ctx->interpreter_state->arguments = erealloc(ctx->interpreter_state->arguments,
+ (ctx->interpreter_state->narguments + 1) *
+ sizeof(*ctx->interpreter_state->arguments));
+ ctx->interpreter_state->arguments[ctx->interpreter_state->narguments] = argument;
+ ctx->interpreter_state->narguments += 1;
+}
+
+
+static void
+push_state(struct parser_context *ctx, enum nesting_type dealing_with, size_t line_number)
+{
+ struct interpreter_state *new_state;
+ struct argument *new_argument;
+ new_state = ecalloc(1, sizeof(*new_state));
+ new_state->parent = ctx->interpreter_state;
+ new_state->dealing_with = dealing_with;
+ new_argument = calloc(1, sizeof(*new_argument));
+ new_argument->type = COMMAND;
+ new_argument->command = new_state;
+ new_argument->line_number = line_number;
+ push_interpreted_argument(ctx, new_argument);
+ ctx->interpreter_state = new_state;
+}
+
+
+static void
+pop_state(struct parser_context *ctx)
+{
+ ctx->interpreter_state = ctx->interpreter_state->parent;
+}
+
+
+static void
+push_command(struct parser_context *ctx, struct command *command)
+{
+ free(command->redirections);
+ free(command->arguments);
+ command->redirections = ctx->interpreter_state->redirections;
+ command->nredirections = ctx->interpreter_state->nredirections;
+ command->arguments = ctx->interpreter_state->arguments;
+ command->narguments = ctx->interpreter_state->narguments;
+ command->have_bang = ctx->interpreter_state->have_bang;
+ ctx->interpreter_state->redirections = NULL;
+ ctx->interpreter_state->nredirections = 0;
+ ctx->interpreter_state->arguments = NULL;
+ ctx->interpreter_state->narguments = 0;
+ ctx->interpreter_state->have_bang = 0;
+ ctx->parser_state->commands[ctx->interpreter_offset] = NULL;
+
+ ctx->interpreter_state->commands = erealloc(ctx->interpreter_state->commands,
+ (ctx->interpreter_state->ncommands + 1) *
+ sizeof(*ctx->interpreter_state->commands));
+ ctx->interpreter_state->commands[ctx->interpreter_state->ncommands] = command;
+ ctx->interpreter_state->ncommands += 1;
+}
+
+
+static void
+interpret_nested_code(struct argument *argument, enum nesting_type dealing_with, enum interpreter_requirement requirement)
+{
+ struct parser_state *code = argument->child;
+ struct parser_context ctx;
+
+ initialise_parser_context(&ctx, 0, 0);
+ ctx.parser_state = code;
+ ctx.interpreter_state->dealing_with = dealing_with;
+ ctx.interpreter_state->requirement = requirement;
+
+ interpret_and_eliminate(&ctx);
+
+ if (ctx.parser_state->ncommands)
+ eprintf("premature end of subexpression at line %zu\n", argument->line_number);
+
+ free(ctx.parser_state->commands);
+ free(ctx.parser_state->arguments);
+ free(ctx.parser_state->redirections);
+
+ argument->command = ctx.interpreter_state;
+ free(code);
+}
+
+
+static void
+validate_identifier_name(struct argument *argument, const char *type, const char *reserved_word)
+{
+ const char *s;
+
+ if (!argument->text[0] || isdigit(argument->text[0]))
+ goto illegal;
+
+ for (s = argument->text; *s; s++)
+ if (!isalpha(*s) && !isdigit(*s) && *s != '_')
+ goto illegal;
+
+ return;
+
+illegal:
+ eprintf("illegal %s \"%s\" at line %zu for '%s'\n",
+ type, argument->text, argument->line_number, reserved_word);
+}
+
+
+static void
+interpret_unquoted_text(struct argument **argumentp)
+{
+ struct argument *argument = *argumentp;
+ struct argument *new_argument;
+ char *text = argument->text;
+ char *beginning = text, *end = text;
+ size_t addendum_length;
+ int can_append = 1;
+
+ while (*end && *end != '$')
+ end++;
+
+ if (!*end)
+ return;
+
+ if (end != beginning) {
+ argument->length = (size_t)(end - beginning);
+ argument->text = emalloc(argument->length + 1);
+ memcpy(argument->text, beginning, argument->length);
+ argument->text[argument->length] = '\0';
+ }
+
+ do {
+ beginning = &end[1];
+ switch (*beginning) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (isdigit(beginning[1])) {
+ weprintf("multiple digits found immediately after '$' at line %zu, "
+ "only taking one for position argument\n", argument->line_number);
+ }
+ /* fall through */
+ case '@':
+ case '*':
+ case '?':
+ case '#':
+ case '-':
+ case '$':
+ case '!':
+ end = &beginning[1];
+ break;
+ case '~':
+ if (check_extension("$~", argument->line_number)) {
+ /* Get user home, so you can use it in arguments (in the way Bash allows ~ to be used;
+ * be we cannot because we don't want to violate POSIX needlessly) that look like
+ * variable assignments. Instead of limiting usernames to [a-z_][a-z0-9_-]*[$]?
+ * we will limit them only to [a-zA-Z0-9_-]\+[$]? and accept $ at the end even though
+ * it is stupid */
+ end = &beginning[1];
+ if (isalpha(*end) || isdigit(*end) || *end == '0' || *end == '-') {
+ for (end = &end[1]; *end; end++)
+ if (!isalpha(*end) && !isdigit(*end) && *end != '0' && *end != '-')
+ break;
+ if (*end == '$')
+ end = &end[1];
+ }
+ } else {
+ beginning--;
+ goto append_text;
+ }
+ break;
+ default:
+ if (isalpha(*beginning) || *beginning == '_') {
+ for (end = &beginning[1]; isdigit(*end) || isalpha(*end) || *end == '_'; end++);
+ } else {
+ beginning--;
+ goto append_text;
+ }
+ }
+
+ new_argument = ecalloc(1, sizeof(*new_argument));
+ new_argument->next_part = argument->next_part;
+ argument = *argumentp = argument->next_part = new_argument;
+ argument->type = VARIABLE;
+ argument->length = (size_t)(end - beginning);
+ argument->text = emalloc(argument->length + 1);
+ memcpy(argument->text, beginning, argument->length);
+ argument->text[argument->length] = '\0';
+
+ beginning = end;
+ can_append = 0;
+
+ append_text:
+ while (*end && *end != '$')
+ end++;
+
+ if (end != beginning) {
+ if (can_append) {
+ addendum_length = (size_t)(end - beginning);
+ argument->text = erealloc(argument->text, argument->length + addendum_length + 1);
+ memcpy(&argument->text[argument->length], beginning, addendum_length);
+ argument->length += addendum_length;
+ argument->text[argument->length] = '\0';
+ } else {
+ new_argument = ecalloc(1, sizeof(*new_argument));
+ new_argument->next_part = argument->next_part;
+ argument = *argumentp = argument->next_part = new_argument;
+ argument->type = UNQUOTED;
+ argument->length = (size_t)(end - beginning);
+ argument->text = emalloc(argument->length + 1);
+ memcpy(argument->text, beginning, argument->length);
+ argument->text[argument->length] = '\0';
+ }
+ can_append = 1;
+ }
+
+ } while (*end);
+
+ free(text);
+}
+
+
+static void
+translate_text_argument(struct argument *argument)
+{
+ struct interpreter_state *nested_state;
+
+ for (; argument; argument = argument->next_part) {
+ switch (argument->type) {
+ case QUOTED:
+ /* keep as is */
+ break;
+
+ case UNQUOTED:
+ interpret_unquoted_text(&argument);
+ break;
+
+ case QUOTE_EXPRESSION:
+ case ARITHMETIC_EXPRESSION:
+ case ARITHMETIC_SUBSHELL:
+ /* ARITHMETIC_EXPRESSION and ARITHMETIC_SUBSHELL can only be interpreted
+ * when evaluated as substitution can be used to insert operators */
+ interpret_nested_code(argument, TEXT_ROOT, 0);
+ break;
+
+ case VARIABLE_SUBSTITUTION:
+ interpret_nested_code(argument, VARIABLE_SUBSTITUTION_BRACKET, NEED_PREFIX_OR_VARIABLE_NAME);
+ nested_state = argument->command;
+ if (nested_state->requirement != NEED_INDEX_OR_OPERATOR_OR_END &&
+ nested_state->requirement != NEED_INDEX_OR_END &&
+ nested_state->requirement != NEED_OPERATOR_OR_END &&
+ nested_state->requirement != NEED_END) {
+ eprintf("invalid variable substitution at line %zu\n", argument->line_number);
+ }
+ break;
+
+ case BACKQUOTE_EXPRESSION:
+ case SUBSHELL_SUBSTITUTION:
+ case PROCESS_SUBSTITUTION_INPUT:
+ case PROCESS_SUBSTITUTION_OUTPUT:
+ case PROCESS_SUBSTITUTION_INPUT_OUTPUT:
+ case SUBSHELL:
+ interpret_nested_code(argument, CODE_ROOT, NEED_COMMAND);
+ break;
+
+ default:
+ case COMMAND:
+ case REDIRECTION:
+ case FUNCTION_MARK:
+ case VARIABLE:
+ abort();
+ }
+ }
+}
+
+
+static void
+push_redirection(struct command *command, struct argument **argumentp)
+{
+ struct redirection *redirection;
+ struct argument *argument, *argument_end, *last_part;
+
+ redirection = command->redirections[command->redirections_offset];
+ command->redirections[command->redirections_offset] = NULL;
+ command->redirections_offset += 1;
+
+ argument = *argumentp;
+ *argumentp = argument->next_part;
+
+ redirection->right_hand_side = *argumentp;
+ last_part = NULL;
+ for (argument_end = redirection->right_hand_side; argument_end; argument_end = argument_end->next_part) {
+ if (argument_end->type != QUOTED &&
+ argument_end->type != UNQUOTED &&
+ argument_end->type != QUOTE_EXPRESSION &&
+ argument_end->type != BACKQUOTE_EXPRESSION &&
+ argument_end->type != ARITHMETIC_EXPRESSION &&
+ argument_end->type != VARIABLE_SUBSTITUTION &&
+ argument_end->type != SUBSHELL_SUBSTITUTION)
+ break;
+ last_part = argument_end;
+ }
+
+ if (!last_part) {
+ eprintf("missing right-hand side of '%s' at line %zu\n",
+ get_redirection_token(redirection->type), argument->line_number);
+ }
+
+ *argumentp = last_part->next_part;
+ last_part->next_part = NULL;
+ free(argument);
+
+ if (redirection->left_hand_side)
+ translate_text_argument(redirection->left_hand_side);
+ translate_text_argument(redirection->right_hand_side);
+}
+
+
+static void
+push_argument(struct parser_context *ctx, struct argument **argumentp)
+{
+ struct argument *argument = *argumentp, *last_part;
+
+ if (argument->type == REDIRECTION || argument->type == FUNCTION_MARK) {
+ *argumentp = argument->next_part;
+ argument->next_part = NULL;
+
+ } else {
+ for (last_part = argument; last_part->next_part; last_part = last_part->next_part)
+ if (last_part->next_part->type == REDIRECTION || last_part->next_part->type == FUNCTION_MARK)
+ break;
+ *argumentp = last_part->next_part;
+ last_part->next_part = NULL;
+
+ translate_text_argument(argument);
+ }
+
+ push_interpreted_argument(ctx, argument);
+}
+
+
+static void
+push_typed_text(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length, enum argument_type type)
+{
+ struct argument *new_argument;
+
+ new_argument = ecalloc(1, sizeof(new_argument));
+ new_argument->type = type;
+ new_argument->line_number = argument->line_number;
+ new_argument->length = text_length;
+ new_argument->text = emalloc(text_length + 1);
+ memcpy(new_argument->text, text, text_length);
+ new_argument->text[text_length] = '\0';
+
+ push_interpreted_argument(ctx, new_argument);
+}
+
+
+static void
+push_unquoted_segment(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length) /* TODO (must handle $) */
+{
+}
+
+
+static void
+push_variable(struct parser_context *ctx, struct argument *argument, char *text, size_t text_length)
+{
+ push_typed_text(ctx, argument, text, text_length, VARIABLE);
+}
+
+
+static void
+push_operator(struct parser_context *ctx, struct argument *argument, char *token, size_t token_length)
+{
+ push_typed_text(ctx, argument, token, token_length, OPERATOR);
+}
+
+
+static void
+push_variable_substitution_argument(struct parser_context *ctx, struct command *command, struct argument **argumentp)
+{
+#define IS_SPECIAL_PARAMETER(C)\
+ ((C) == '@' || (C) == '*' || (C) == '?' || (C) == '#' || (C) == '$' || (C) == '!')
+
+ struct argument *argument;
+ size_t length, line_number;
+ char *s;
+
+ argument = *argumentp;
+ *argumentp = argument->next_part;
+ argument->next_part = NULL;
+
+ line_number = argument->line_number;
+
+ if (argument->type == UNQUOTED) {
+ for (s = argument->text; *s;) {
+ if (ctx->interpreter_state->requirement == NEED_PREFIX_OR_VARIABLE_NAME) {
+ if (s[0] == '_' || isalnum(s[0]) || (s[0] == '~' && check_extension("~", line_number))) {
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_OPERATOR_OR_END;
+ variable_or_tilde:
+ length = 1;
+ while (s[length] == '_' || isalnum(s[length]) || (s[0] == '~' && s[length] == '-'))
+ length += 1;
+ if (s[0] == '~' && s[length] == '$')
+ length += 1;
+ push_variable(ctx, argument, s, length);
+ s = &s[length];
+ } else if (IS_SPECIAL_PARAMETER(s[1])) {
+ if (s[0] == '!' && check_extension("!", line_number))
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_SUFFIX_OR_END;
+ else if (s[0] == '#')
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_END;
+ else
+ goto bad_syntax;
+ push_operator(ctx, argument, &s[0], 1);
+ push_variable(ctx, argument, &s[1], 1);
+ s = &s[2];
+ } else if (s[1] == '_' || isalnum(s[1]) || (s[1] == '~' && check_extension("~", line_number))) {
+ if (s[0] == '!' && check_extension("!", line_number))
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_SUFFIX_OR_END;
+ else if (s[0] == '#')
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_END;
+ else
+ goto bad_syntax;
+ push_operator(ctx, argument, s, 1);
+ s = &s[1];
+ goto variable_or_tilde;
+ } else if (IS_SPECIAL_PARAMETER(s[0])) {
+ ctx->interpreter_state->requirement = NEED_INDEX_OR_OPERATOR_OR_END;
+ push_variable(ctx, argument, s, 1);
+ s = &s[1];
+ } else {
+ goto bad_syntax;
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_OPERATOR_OR_END) {
+ if (s[0] == '[') {
+ ctx->interpreter_state->requirement = NEED_OPERATOR_OR_END;
+ index:
+ /* TODO push INDEX substate that exits on ] */
+ } else {
+ operator:
+ ctx->interpreter_state->requirement = NO_REQUIREMENT;
+ if (s[0] == ':' && (s[1] == '-' || s[1] == '=' || s[1] == '?' || s[1] == '+')) {
+ length = 2;
+ } else if (s[0] == '-' || s[0] == '=' || s[0] == '?' || s[0] == '+') {
+ length = 1;
+ } else if (s[0] == '%' || s[0] == '#' ||
+ (s[0] == ',' && check_extension(s[1] == s[0] ? ",," : ",", line_number)) ||
+ (s[0] == '^' && check_extension(s[1] == s[0] ? "^^" : "^", line_number))) {
+ if (s[1] == s[0])
+ length = 2;
+ else
+ length = 1;
+ } else if (s[0] == '/' && check_extension("/", line_number)) {
+ ctx->interpreter_state->requirement = NEED_TEXT_OR_SLASH;
+ length = 1;
+ } else if (s[0] == ':' && check_extension(":", line_number)) {
+ ctx->interpreter_state->requirement = NEED_TEXT_OR_COLON;
+ length = 1;
+ } else if (s[0] == '@' && check_extension("@", line_number)) {
+ ctx->interpreter_state->requirement = NEED_AT_OPERAND;
+ length = 1;
+ } else {
+ goto bad_syntax;
+ }
+ push_operator(ctx, argument, s, 2);
+ s = &s[length];
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_SUFFIX_OR_END) {
+ ctx->interpreter_state->requirement = NEED_END;
+ if (s[0] == '[') {
+ goto index;
+ } else if (s[0] == '*' || s[0] == '@') {
+ push_operator(ctx, argument, s, 1);
+ s = &s[1];
+ } else {
+ goto bad_syntax;
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_INDEX_OR_END) {
+ ctx->interpreter_state->requirement = NEED_END;
+ if (s[0] == '[') {
+ goto index;
+ } else {
+ goto bad_syntax;
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_OPERATOR_OR_END) {
+ if (s[0] == '[')
+ goto bad_syntax;
+ else
+ goto operator;
+
+ } else if (ctx->interpreter_state->requirement == NEED_END) {
+ goto bad_syntax;
+
+ } else if (ctx->interpreter_state->requirement == NEED_AT_OPERAND) {
+ if (*s == 'U' || *s == 'u' || *s == 'L' || *s == 'Q' || *s == 'E' ||
+ *s == 'P' || *s == 'A' || *s == 'K' || *s == 'a') {
+ ctx->interpreter_state->requirement = NEED_END;
+ push_operator(ctx, argument, s, 1);
+ s = &s[1];
+ } else {
+ goto bad_syntax;
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_TEXT_OR_SLASH) {
+ length = 0;
+ while (s[length] && s[length] != '/')
+ length += 1;
+ if (length) {
+ push_unquoted_segment(ctx, argument, s, length);
+ s = &s[length];
+ }
+ if (s[0]) {
+ ctx->interpreter_state->requirement = NO_REQUIREMENT;
+ push_operator(ctx, argument, s, 1);
+ s = &s[1];
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_TEXT_OR_COLON) {
+ length = 0;
+ while (s[length] && s[length] != ':')
+ length += 1;
+ if (length) {
+ push_unquoted_segment(ctx, argument, s, length);
+ s = &s[length];
+ }
+ if (s[0]) {
+ ctx->interpreter_state->requirement = NO_REQUIREMENT;
+ push_operator(ctx, argument, s, 1);
+ s = &s[1];
+ }
+
+ } else {
+ push_unquoted_segment(ctx, argument, s, length);
+ }
+ }
+ free(argument->text);
+ free(argument);
+ } else {
+ if (ctx->interpreter_state->requirement != NO_REQUIREMENT &&
+ ctx->interpreter_state->requirement != NEED_TEXT_OR_SLASH &&
+ ctx->interpreter_state->requirement != NEED_TEXT_OR_COLON) {
+ goto bad_syntax;
+ } else if (argument->type == QUOTED) {
+ push_interpreted_argument(ctx, argument);
+ } else {
+ push_argument(ctx, &argument);
+ }
+ }
+
+ return;
+
+bad_syntax:
+ eprintf("stray '%c' in bracketed variable substitution at line %zu\n", *s, line_number);
+
+#undef IS_SPECIAL_PARAMETER
+}
+
+
+void
+interpret_and_eliminate(struct parser_context *ctx)
+{
+ size_t interpreted = 0, arg_i;
+ struct command *command;
+ struct argument *argument, *next_argument;
+ enum reserved_word reserved_word;
+
+ if (ctx->here_document_stack && ctx->here_document_stack->first) {
+ ctx->here_document_stack->interpret_when_empty = 1;
+ return;
+ }
+
+ for (; ctx->interpreter_offset < ctx->parser_state->ncommands; ctx->interpreter_offset++) {
+ command = ctx->parser_state->commands[ctx->interpreter_offset];
+ argument = NULL;
+
+ if (ctx->interpreter_state->dealing_with == TEXT_ROOT) {
+ ctx->interpreter_state->requirement = NEED_VALUE;
+ } else if (ctx->interpreter_state->dealing_with != FOR_STATEMENT &&
+ ctx->interpreter_state->dealing_with != VARIABLE_SUBSTITUTION_BRACKET) {
+ ctx->interpreter_state->requirement = NEED_COMMAND;
+ }
+
+ for (arg_i = 0; argument || arg_i < command->narguments; arg_i += !argument) {
+ if (!argument)
+ argument = command->arguments[arg_i];
+
+ /* TODO Implement alias substitution
+ *
+ * Unless a word was quoted/backslashed, it is subject
+ * to alias substitution if it is the first argument
+ * of a command (after any previous alias substitution)
+ * or if it immediately follows an alias substitution
+ * resulting in an unquoted whitespace at the end.
+ * However, if the word is a reserved word (which may
+ * indeed the name of an alias) it shall not be subject
+ * to alias substitution if it has meaning in the context
+ * it appears in (for example: alias while=x be expanded
+ * for followed by the expansion of alias echo='echo '
+ * but not if it is the first word in a command). Creating
+ * aliases named after reserved words is stupid and we
+ * should only allow it in POSIX mode.
+ *
+ * (Alias substitution occurs before the grammar is
+ * interpreted, meaning definition an alias does not
+ * modify already declared function that use a command
+ * with the same name as the alias.)
+ *
+ * The result of alias substition is subject to
+ * alias substition, however (to avoid infinite loop),
+ * already expanded aliases shall not be recognised.
+ */
+
+ if (ctx->interpreter_state->requirement == NEED_COMMAND &&
+ (reserved_word = get_reserved_word(argument))) {
+ switch (reserved_word) {
+ case BANG:
+ if (ctx->interpreter_state->disallow_bang)
+ stray_reserved_word(argument);
+ ctx->interpreter_state->disallow_bang = 1;
+ ctx->interpreter_state->have_bang = 1;
+ break;
+
+ case OPEN_CURLY:
+ open_curly:
+ push_state(ctx, CURLY_NESTING, argument->line_number);
+ goto new_command;
+
+ case CLOSE_CURLY:
+ if (ctx->interpreter_state->dealing_with != CURLY_NESTING)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ ctx->interpreter_state->requirement = NEED_COMMAND_END;
+ break;
+
+ case CASE: /* (TODO) */
+ eprintf("reserved word 'case' (at line %zu) has not been implemented yet\n",
+ argument->line_number);
+ /* NEWLINEs surrounding 'in' shall be ignored; ';' is not allowed */
+ break;
+
+ case DO:
+ if (ctx->interpreter_state->dealing_with != REPEAT_CONDITIONAL)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ do_keyword:
+ push_state(ctx, DO_CLAUSE, argument->line_number);
+ goto new_command;
+
+ case DONE:
+ if (ctx->interpreter_state->dealing_with != DO_CLAUSE)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ pop_state(ctx);
+ ctx->interpreter_state->requirement = NEED_COMMAND_END;
+ break;
+
+ case ELIF:
+ if (ctx->interpreter_state->dealing_with != IF_CLAUSE)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ push_state(ctx, IF_CONDITIONAL, argument->line_number);
+ goto new_command;
+
+ case ELSE:
+ if (ctx->interpreter_state->dealing_with != IF_CLAUSE)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ push_state(ctx, ELSE_CLAUSE, argument->line_number);
+ goto new_command;
+
+ case ESAC:
+ stray_reserved_word(argument);
+ break;
+
+ case FI:
+ if (ctx->interpreter_state->dealing_with != IF_CLAUSE &&
+ ctx->interpreter_state->dealing_with != ELSE_CLAUSE)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ pop_state(ctx);
+ ctx->interpreter_state->requirement = NEED_COMMAND_END;
+ break;
+
+ case FOR:
+ push_state(ctx, FOR_STATEMENT, argument->line_number);
+ ctx->interpreter_state->requirement = NEED_VARIABLE_NAME;
+ free_text_argument(&argument);
+ ctx->interpreter_state->allow_newline = 1;
+ continue;
+
+ case IF:
+ push_state(ctx, IF_STATEMENT, argument->line_number);
+ push_state(ctx, IF_CONDITIONAL, argument->line_number);
+ goto new_command;
+
+ case IN:
+ stray_reserved_word(argument);
+ break;
+
+ case THEN:
+ if (ctx->interpreter_state->dealing_with != IF_CONDITIONAL)
+ stray_reserved_word(argument);
+ pop_state(ctx);
+ push_state(ctx, IF_CLAUSE, argument->line_number);
+ goto new_command;
+
+ case UNTIL:
+ push_state(ctx, UNTIL_STATEMENT, argument->line_number);
+ push_state(ctx, REPEAT_CONDITIONAL, argument->line_number);
+ goto new_command;
+
+ case WHILE:
+ push_state(ctx, WHILE_STATEMENT, argument->line_number);
+ push_state(ctx, REPEAT_CONDITIONAL, argument->line_number);
+ goto new_command;
+
+ default:
+ case NOT_A_RESERVED_WORD:
+ abort();
+ }
+
+ free_text_argument(&argument);
+ ctx->interpreter_state->allow_newline = 0;
+ continue;
+
+ new_command:
+ ctx->interpreter_state->requirement = NEED_COMMAND;
+ free_text_argument(&argument);
+ ctx->interpreter_state->allow_newline = 1;
+ continue;
+
+ } else if (ctx->interpreter_state->dealing_with == VARIABLE_SUBSTITUTION_BRACKET) {
+ push_variable_substitution_argument(ctx, command, &argument);
+
+ } else if (argument->type == REDIRECTION) {
+ if (ctx->interpreter_state->dealing_with == FOR_STATEMENT)
+ stray_redirection(command, argument);
+ push_redirection(command, &argument);
+ if (ctx->interpreter_state->requirement != NEED_FUNCTION_BODY)
+ ctx->interpreter_state->requirement = NO_REQUIREMENT; /* e.g. "<somefile;" is ok */
+
+ } else if (argument->type == FUNCTION_MARK) {
+ if (ctx->interpreter_state->requirement == NEED_FUNCTION_BODY ||
+ ctx->interpreter_state->requirement == NEED_COMMAND_END ||
+ ctx->interpreter_state->narguments != 1 ||
+ ctx->interpreter_state->dealing_with == FOR_STATEMENT)
+ eprintf("stray '()' at line %zu\n", argument->line_number);
+
+ next_argument = argument->next_part;
+ argument->next_part = NULL;
+ push_argument(ctx, &argument);
+
+ /* swap position of () and function name to make it easier to identify */
+ argument = ctx->interpreter_state->arguments[0];
+ ctx->interpreter_state->arguments[0] = ctx->interpreter_state->arguments[1];
+ ctx->interpreter_state->arguments[1] = argument;
+
+ argument = next_argument;
+ ctx->interpreter_state->requirement = NEED_FUNCTION_BODY;
+ ctx->interpreter_state->allow_newline = 1;
+
+ } else if (ctx->interpreter_state->requirement == NEED_FUNCTION_BODY) {
+ reserved_word = get_reserved_word(argument);
+ if (reserved_word == OPEN_CURLY) {
+ goto open_curly;
+ } else if (argument->type == SUBSHELL) {
+ ctx->interpreter_state->requirement = NEED_COMMAND_END;
+ push_argument(ctx, &argument);
+ } else {
+ eprintf("required function body or redirection at line %zu;\n", argument->line_number);
+ }
+ ctx->interpreter_state->allow_newline = 0;
+
+ } else if (ctx->interpreter_state->requirement == NEED_VARIABLE_NAME) {
+ if (ctx->interpreter_state->dealing_with == FOR_STATEMENT) {
+ if (argument->type != UNQUOTED)
+ eprintf("required variable name after 'for' at line %zu\n", argument->line_number);
+ validate_identifier_name(argument, "variable name", "for");
+ argument->type = VARIABLE;
+ push_interpreted_argument(ctx, argument);
+ ctx->interpreter_state->requirement = NEED_IN_OR_DO;
+ ctx->interpreter_state->allow_newline = 1;
+ } else {
+ abort();
+ }
+
+ } else if (ctx->interpreter_state->requirement == NEED_DO) {
+ reserved_word = get_reserved_word(argument);
+ if (reserved_word != DO)
+ stray_reserved_word(argument);
+ goto do_keyword;
+
+ } else if (ctx->interpreter_state->requirement == NEED_IN_OR_DO) {
+ reserved_word = get_reserved_word(argument);
+ if (reserved_word == DO) {
+ push_command(ctx, command);
+ goto do_keyword;
+ } else if (reserved_word == IN) {
+ ctx->interpreter_state->requirement = NEED_VALUE;
+ ctx->interpreter_state->allow_newline = 0;
+ } else {
+ stray_reserved_word(argument);
+ }
+
+ } else {
+ if (ctx->interpreter_state->requirement == NEED_COMMAND_END) {
+ eprintf("required %s at line %zu after control statement\n",
+ "';', '&', '||', '&&', '|', '&|', '|&', '<>|', or redirection",
+ argument->line_number);
+ }
+
+ if (ctx->interpreter_state->requirement != NEED_VALUE)
+ ctx->interpreter_state->requirement = NO_REQUIREMENT;
+ if (argument->type == SUBSHELL || argument->type == ARITHMETIC_SUBSHELL)
+ if (ctx->interpreter_state->narguments == 0)
+ ctx->interpreter_state->requirement = NEED_COMMAND_END;
+
+ push_argument(ctx, &argument);
+ ctx->interpreter_state->allow_newline = 0;
+ }
+ }
+
+ if (ctx->interpreter_state->dealing_with == TEXT_ROOT ||
+ ctx->interpreter_state->dealing_with == VARIABLE_SUBSTITUTION_BRACKET) {
+ free(command->redirections);
+ free(command->arguments);
+ free(command);
+ continue;
+ }
+
+ if (ctx->interpreter_state->allow_newline) {
+ ctx->interpreter_state->allow_newline = 0;
+ if (command->terminal == NEWLINE) {
+ free(command->redirections);
+ free(command->arguments);
+ free(command);
+ continue;
+ }
+ }
+
+ if ((ctx->interpreter_state->requirement == NEED_COMMAND && command->narguments == arg_i) ||
+ ctx->interpreter_state->requirement == NEED_FUNCTION_BODY ||
+ ctx->interpreter_state->requirement == NEED_VARIABLE_NAME)
+ stray_command_terminal(command);
+
+ if (ctx->interpreter_state->requirement == NEED_IN_OR_DO) {
+ ctx->interpreter_state->requirement = NEED_DO;
+ if (command->terminal != SEMICOLON && command->terminal != NEWLINE)
+ stray_command_terminal(command);
+ }
+
+ push_command(ctx, command);
+
+ if (command->terminal == SEMICOLON ||
+ command->terminal == NEWLINE ||
+ command->terminal == AMPERSAND) {
+ ctx->interpreter_state->disallow_bang = 0;
+ if (ctx->interpreter_state->dealing_with == MAIN_BODY) {
+ /* TODO execute and destroy queued up commands (also destroy list) */
+ interpreted = ctx->interpreter_offset + 1;
+ }
+ } else if (command->terminal == DOUBLE_SEMICOLON) {
+ stray_command_terminal(command);
+ } else {
+ ctx->interpreter_state->disallow_bang = 1;
+ }
+ }
+
+ memmove(&ctx->parser_state->commands[0],
+ &ctx->parser_state->commands[interpreted],
+ ctx->parser_state->ncommands - interpreted);
+ ctx->parser_state->ncommands -= interpreted;
+ ctx->interpreter_offset -= interpreted;
+
+ if (!ctx->parser_state->ncommands) {
+ free(ctx->parser_state->commands);
+ ctx->parser_state->commands = NULL;
+ }
+}
diff --git a/parser.c b/parser.c
index c3da716..957ca96 100644
--- a/parser.c
+++ b/parser.c
@@ -2,6 +2,46 @@
#include "common.h"
+const char *
+get_redirection_token(enum redirection_type type)
+{
+ switch (type) {
+ case REDIRECT_INPUT:
+ return "<";
+ case REDIRECT_INPUT_TO_FD:
+ return "<&";
+ case REDIRECT_OUTPUT:
+ return ">";
+ case REDIRECT_OUTPUT_APPEND:
+ return ">>";
+ case REDIRECT_OUTPUT_CLOBBER:
+ return ">|";
+ case REDIRECT_OUTPUT_TO_FD:
+ return ">&";
+ case REDIRECT_OUTPUT_AND_STDERR:
+ return "&>";
+ case REDIRECT_OUTPUT_AND_STDERR_APPEND:
+ return "&>>";
+ case REDIRECT_OUTPUT_AND_STDERR_CLOBBER:
+ return "&>|";
+ case REDIRECT_OUTPUT_AND_STDERR_TO_FD:
+ return "&>&";
+ case REDIRECT_INPUT_OUTPUT:
+ return "<>";
+ case REDIRECT_INPUT_OUTPUT_TO_FD:
+ return "<>&";
+ case HERE_STRING:
+ return "<<<";
+ case HERE_DOCUMENT:
+ return "<<";
+ case HERE_DOCUMENT_INDENTED:
+ return "<<-";
+ default:
+ abort();
+ }
+}
+
+
void
push_end_of_file(struct parser_context *ctx)
{
@@ -44,6 +84,7 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal
new_command = ecalloc(1, sizeof(*new_command));
ctx->parser_state->commands[ctx->parser_state->ncommands++] = new_command;
new_command->terminal = terminal;
+ new_command->terminal_line_number = ctx->tokeniser_line_number;
new_command->arguments = ctx->parser_state->arguments;
new_command->narguments = ctx->parser_state->narguments;
new_command->redirections = ctx->parser_state->redirections;
@@ -53,22 +94,17 @@ push_command_terminal(struct parser_context *ctx, enum command_terminal terminal
ctx->parser_state->redirections = NULL;
ctx->parser_state->nredirections = 0;
- if (!ctx->parser_state->parent) {
- if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == AMPERSAND) {
- /* TODO unless in a special construct such as while, case, for, if, or {, run and clear
- * also require that any here-document is specified (count them and run when given);
- * if terminal == AMPERSAND: perform </dev/null first, and reset exist status to 0
- */
- }
- }
+ if (!ctx->parser_state->parent && !ctx->do_not_run)
+ if (terminal == DOUBLE_SEMICOLON || terminal == SEMICOLON || terminal == NEWLINE || terminal == AMPERSAND)
+ interpret_and_eliminate(ctx);
}
void
-push_semicolon(struct parser_context *ctx, int maybe)
+push_semicolon(struct parser_context *ctx, int actually_newline)
{
- if (!maybe || ctx->parser_state->narguments)
- push_command_terminal(ctx, SEMICOLON);
+ if (!actually_newline || ctx->parser_state->narguments)
+ push_command_terminal(ctx, actually_newline ? NEWLINE : SEMICOLON);
}
@@ -81,7 +117,10 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type)
new_part->type = type;
new_part->line_number = ctx->tokeniser_line_number;
- if (ctx->parser_state->current_argument_end) {
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ ctx->here_document_stack->first->argument_end->next_part = new_part;
+ ctx->here_document_stack->first->argument_end = new_part;
+ } else if (ctx->parser_state->current_argument_end) {
ctx->parser_state->current_argument_end->next_part = new_part;
ctx->parser_state->current_argument_end = new_part;
} else {
@@ -91,6 +130,49 @@ push_new_argument_part(struct parser_context *ctx, enum argument_type type)
}
+PURE_FUNC
+static int
+is_numeric_argument(struct argument *argument)
+{
+ char *p;
+
+ do {
+ if (argument->type != UNQUOTED)
+ return 0;
+
+ for (p = argument->text; *p; p++)
+ if (!isdigit(*p))
+ return 0;
+
+ } while ((argument = argument->next_part));
+
+ return 1;
+}
+
+
+PURE_FUNC
+static int
+is_variable_reference(struct argument *argument)
+{
+ char *p;
+
+ if (argument->type != UNQUOTED || isdigit(argument->text[0]) || argument->text[0] == '$')
+ return 0;
+
+ do {
+ if (argument->type != UNQUOTED)
+ return 0;
+
+ for (p = argument->text; *p; p++)
+ if (!isalnum(*p) && *p != '_')
+ return p[0] == '$' && !p[1] && !argument->next_part;
+
+ } while ((argument = argument->next_part));
+
+ return 0;
+}
+
+
static void
push_redirection(struct parser_context *ctx, enum redirection_type type)
{
@@ -113,9 +195,19 @@ push_redirection(struct parser_context *ctx, enum redirection_type type)
type == REDIRECT_OUTPUT_AND_STDERR ||
type == REDIRECT_OUTPUT_AND_STDERR_APPEND ||
type == REDIRECT_OUTPUT_AND_STDERR_CLOBBER ||
- type == REDIRECT_OUTPUT_AND_STDERR_TO_FD) {
+ type == REDIRECT_OUTPUT_AND_STDERR_TO_FD ||
+ !is_numeric_argument(ctx->parser_state->current_argument)) {
+ if (is_variable_reference(ctx->parser_state->current_argument)) {
+ if (posix_mode) {
+ weprintf("the '$%s' token (at line %zu) is not portable, not parsing as it\n",
+ get_redirection_token(type), ctx->tokeniser_line_number);
+ } else {
+ goto argument_is_left_hand_side;
+ }
+ }
push_whitespace(ctx, 1);
} else {
+ argument_is_left_hand_side:
new_redirection->left_hand_side = ctx->parser_state->current_argument;
}
}
@@ -130,8 +222,8 @@ push_redirection(struct parser_context *ctx, enum redirection_type type)
new_here_document->redirection = new_redirection;
new_here_document->argument = new_argument;
new_here_document->next = NULL;
- *ctx->here_documents_next = new_here_document;
- ctx->here_documents_next = &new_here_document->next;
+ *ctx->here_document_stack->next = new_here_document;
+ ctx->here_document_stack->next = &new_here_document->next;
}
ctx->parser_state->need_right_hand_side = 1;
@@ -159,39 +251,41 @@ size_t
push_symbol(struct parser_context *ctx, char *token, size_t token_len)
{
#define LIST_SYMBOLS(_)\
- _("<<<", push_redirection(ctx, HERE_STRING))\
- _("<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
- _("<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
- _("<>|", push_command_terminal(ctx, SOCKET_PIPE))\
- _("<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
- _("&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
- _("&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
- _("&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
- _("()", push_function_mark(ctx))\
- _("((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
- _(";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
- _("<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
- _("<<", push_redirection(ctx, HERE_DOCUMENT))\
- _("<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
- _("<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
- _(">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
- _(">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
- _(">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
- _(">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
- _("||", push_command_terminal(ctx, OR))\
- _("|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
- _("&&", push_command_terminal(ctx, AND))\
- _("&|", push_command_terminal(ctx, PIPE_AMPERSAND)) /* synonym for |& to match &> */\
- _("&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
- _("(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
- _(";", push_semicolon(ctx, 0))\
- _("<", push_redirection(ctx, REDIRECT_INPUT))\
- _(">", push_redirection(ctx, REDIRECT_OUTPUT))\
- _("|", push_command_terminal(ctx, PIPE))\
- _("&", push_command_terminal(ctx, AMPERSAND))
-
-#define X(SYMBOL, ACTION)\
- if (token_len >= sizeof(SYMBOL) - 1 && !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1)) {\
+ _(0, "<<<", push_redirection(ctx, HERE_STRING))\
+ _(1, "<<-", push_redirection(ctx, HERE_DOCUMENT_INDENTED))\
+ _(0, "<>(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT_OUTPUT, NORMAL_MODE))\
+ _(0, "<>|", push_command_terminal(ctx, SOCKET_PIPE))\
+ _(1, "<>&", push_redirection(ctx, REDIRECT_INPUT_OUTPUT_TO_FD))\
+ _(0, "&>>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_APPEND))\
+ _(0, "&>&", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_TO_FD))\
+ _(0, "&>|", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR_CLOBBER))\
+ _(1, "()", push_function_mark(ctx))\
+ _(0, "((", push_shell_io(ctx, ARITHMETIC_SUBSHELL, RRB_QUOTE_MODE))\
+ _(1, ";;", push_command_terminal(ctx, DOUBLE_SEMICOLON))\
+ _(0, "<(", push_shell_io(ctx, PROCESS_SUBSTITUTION_OUTPUT, NORMAL_MODE))\
+ _(1, "<<", push_redirection(ctx, HERE_DOCUMENT))\
+ _(1, "<>", push_redirection(ctx, REDIRECT_INPUT_OUTPUT))\
+ _(1, "<&", push_redirection(ctx, REDIRECT_INPUT_TO_FD))\
+ _(0, ">(", push_shell_io(ctx, PROCESS_SUBSTITUTION_INPUT, NORMAL_MODE))\
+ _(1, ">>", push_redirection(ctx, REDIRECT_OUTPUT_APPEND))\
+ _(1, ">&", push_redirection(ctx, REDIRECT_OUTPUT_TO_FD))\
+ _(1, ">|", push_redirection(ctx, REDIRECT_OUTPUT_CLOBBER))\
+ _(1, "||", push_command_terminal(ctx, OR))\
+ _(0, "|&", push_command_terminal(ctx, PIPE_AMPERSAND))\
+ _(1, "&&", push_command_terminal(ctx, AND))\
+ _(0, "&|", push_command_terminal(ctx, AMPERSAND_PIPE))\
+ _(0, "&>", push_redirection(ctx, REDIRECT_OUTPUT_AND_STDERR))\
+ _(1, "(", push_shell_io(ctx, SUBSHELL, NORMAL_MODE))\
+ _(1, ";", push_semicolon(ctx, 0))\
+ _(1, "<", push_redirection(ctx, REDIRECT_INPUT))\
+ _(1, ">", push_redirection(ctx, REDIRECT_OUTPUT))\
+ _(1, "|", push_command_terminal(ctx, PIPE))\
+ _(1, "&", push_command_terminal(ctx, AMPERSAND))
+
+#define X(PORTABLE, SYMBOL, ACTION)\
+ if (token_len >= sizeof(SYMBOL) - 1 &&\
+ !strncmp(token, SYMBOL, sizeof(SYMBOL) - 1) &&\
+ (PORTABLE || check_extension(SYMBOL, ctx->tokeniser_line_number))) {\
ACTION;\
return token_len;\
}
@@ -208,13 +302,22 @@ push_text(struct parser_context *ctx, char *text, size_t text_len, enum argument
{
struct argument *arg_part;
- ctx->parser_state->need_right_hand_side = 0;
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ type = QUOTED;
+ if (ctx->here_document_stack->first->argument_end->type != type ||
+ ctx->here_document_stack->first->argument_end->line_number != ctx->tokeniser_line_number)
+ push_new_argument_part(ctx, type);
+ arg_part = ctx->here_document_stack->first->argument_end;
- if (!ctx->parser_state->current_argument_end ||
- ctx->parser_state->current_argument_end->type != type ||
- ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
- push_new_argument_part(ctx, type);
- arg_part = ctx->parser_state->current_argument_end;
+ } else {
+ ctx->parser_state->need_right_hand_side = 0;
+
+ if (!ctx->parser_state->current_argument_end ||
+ ctx->parser_state->current_argument_end->type != type ||
+ ctx->parser_state->current_argument_end->line_number != ctx->tokeniser_line_number)
+ push_new_argument_part(ctx, type);
+ arg_part = ctx->parser_state->current_argument_end;
+ }
arg_part->text = erealloc(arg_part->text, arg_part->length + text_len + 1);
memcpy(&arg_part->text[arg_part->length], text, text_len);
@@ -230,11 +333,142 @@ push_quoted(struct parser_context *ctx, char *text, size_t text_len)
}
+static size_t
+encode_utf8(char *buf, uint32_t value)
+{
+ size_t i, len;
+
+ if (value <= 0x7F) {
+ buf[0] = (char)value;
+ return 1;
+ }
+
+ if (value <= 0x000007FFUL) len = 2;
+ else if (value <= 0x0000FFFFUL) len = 3;
+ else if (value <= 0x001FFFFFUL) len = 4;
+ else if (value <= 0x03FFFFFFUL) len = 5;
+ else if (value <= 0x7FFFFFFFUL) len = 6;
+ else len = 7;
+
+ for (i = len - 1; i; i--) {
+ buf[len - 1 - i] = (char)(((int)value & 0x3F) | 0x80);
+ value >>= 6;
+ }
+
+ buf[0] |= (char)(0xFF << (8 - len));
+
+ return len;
+}
+
void
push_escaped(struct parser_context *ctx, char *text, size_t text_len)
{
- /* TODO resolve backslashes in text */
- push_text(ctx, text, text_len, QUOTED);
+ uint32_t value;
+ size_t r, w, n;
+ for (r = w = 0; r < text_len;) {
+ if (text[r] == '\\' && r + 1 < text_len) {
+ if (text[r + 1] == 'a') {
+ text[w++] = '\a';
+ r += 2;
+ } else if (text[r + 1] == 'b') {
+ text[w++] = '\b';
+ r += 2;
+ } else if (text[r + 1] == 'e' || text[r + 1] == 'E') {
+ text[w++] = '\033';
+ r += 2;
+ } else if (text[r + 1] == 'f') {
+ text[w++] = '\f';
+ r += 2;
+ } else if (text[r + 1] == 'n') {
+ text[w++] = '\n';
+ r += 2;
+ } else if (text[r + 1] == 'r') {
+ text[w++] = '\r';
+ r += 2;
+ } else if (text[r + 1] == 't') {
+ text[w++] = '\t';
+ r += 2;
+ } else if (text[r + 1] == 'v') {
+ text[w++] = '\v';
+ r += 2;
+ } else if (text[r + 1] == '\\') {
+ text[w++] = '\\';
+ r += 2;
+ } else if (text[r + 1] == '\'') {
+ text[w++] = '\'';
+ r += 2;
+ } else if (text[r + 1] == '"') {
+ text[w++] = '\"';
+ r += 2;
+ } else if (text[r + 1] == '?') {
+ text[w++] = '?';
+ r += 2;
+ } else if ('0' <= text[r + 1] && text[r + 1] <= '7') {
+ value = 0;
+ for (r += 1, n = 0; n < 3 && '0' <= text[r + 1] && text[r + 1] <= '7'; r += 1, n += 1) {
+ if ((text[r] & 15) > 255 - (int)value)
+ break;
+ value *= 8;
+ value |= (uint32_t)(text[r] & 15);
+ }
+ if (value) {
+ text[w++] = (char)value;
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'x' && text_len - r >= 3 && isxdigit(text[r + 2])) {
+ value = 0;
+ for (r += 2, n = 0; n < 2 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ text[w++] = (char)value;
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'u' && text_len - r >= 3 && isxdigit(text[r + 2])) {
+ value = 0;
+ for (r += 2, n = 0; n < 4 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ w += encode_utf8(&text[w], value);
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'U') {
+ value = 0;
+ for (r += 2, n = 0; n < 8 && isxdigit(text[r]); r += 1, n += 1) {
+ value *= 16;
+ value |= (uint32_t)((text[r] > '9' ? 9 : 0) + (text[r] & 15));
+ }
+ if (value) {
+ w += encode_utf8(&text[w], value);
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ } else if (text[r + 1] == 'c' && text_len - r >= 3) {
+ if (text[r + 2] & (' ' - 1)) {
+ text[w++] = (char)(text[r + 2] & (' ' - 1));
+ } else {
+ weprintf("ignoring NUL byte result from $''-expression at line %zu\n",
+ ctx->tokeniser_line_number);
+ }
+ r += 3;
+ } else {
+ text[w++] = text[r++];
+ }
+ } else {
+ text[w++] = text[r++];
+ }
+ }
+ push_text(ctx, text, w, QUOTED);
}
@@ -250,7 +484,9 @@ push_enter(struct parser_context *ctx, enum argument_type type)
{
struct parser_state *new_state;
- ctx->parser_state->need_right_hand_side = 0;
+ if (ctx->mode_stack->mode != HERE_DOCUMENT_MODE)
+ ctx->parser_state->need_right_hand_side = 0;
+
push_new_argument_part(ctx, type);
new_state = ecalloc(1, sizeof(*new_state));
@@ -263,9 +499,48 @@ push_enter(struct parser_context *ctx, enum argument_type type)
void
push_leave(struct parser_context *ctx)
{
- if (ctx->mode_stack->mode == NORMAL_MODE)
+ struct parser_context subctx;
+ struct argument *argument;
+ char *code;
+ size_t code_length;
+ size_t parsed_length;
+ size_t arg_i;
+
+ if (ctx->mode_stack->mode == NORMAL_MODE) {
push_semicolon(ctx, 1);
- /* TODO else if (ctx->mode_stack->mode == BQ_QUOTE_MODE), parse content */
- /* TODO validate subshell content */
+
+ } else if (ctx->mode_stack->mode == BQ_QUOTE_MODE) {
+ initialise_parser_context(&subctx, 1, 1);
+ subctx.do_not_run = 1;
+ subctx.end_of_file_reached = 1;
+ code = NULL;
+ code_length = 0;
+ for (arg_i = 0; arg_i < ctx->parser_state->narguments; arg_i++) {
+ argument = ctx->parser_state->arguments[arg_i];
+ code = erealloc(code, code_length + argument->length);
+ memcpy(&code[code_length], argument->text, argument->length);
+ code_length += argument->length;
+ }
+ code = erealloc(code, code_length + 1);
+ code[code_length] = '\0';
+ parsed_length = parse_preparsed(&subctx, code, code_length);
+ if (parsed_length < code_length || subctx.premature_end_of_file) {
+ eprintf("premature end of file backquote expression at line %zu\n",
+ ctx->parser_state->parent->current_argument_end->line_number);
+ }
+ free(code);
+ free(subctx.here_document_stack);
+ free(subctx.interpreter_state);
+ ctx->parser_state->parent->current_argument_end->child = subctx.parser_state;
+
+ } else {
+ /* In quote modes we want everything in a dummy command
+ * to simplify the implementation of the interpreter.
+ * The command termination used here doesn't matter,
+ * neither does the line nummer (for it), the interpreter
+ * will only look at the argument list. */
+ push_command_terminal(ctx, NEWLINE);
+ }
+
ctx->parser_state = ctx->parser_state->parent;
}
diff --git a/preparser.c b/preparser.c
index 840209f..9ab8432 100644
--- a/preparser.c
+++ b/preparser.c
@@ -5,7 +5,7 @@
size_t
parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp)
{
- int end_of_file_reached;
+ char end_of_file_reached;
size_t bytes_parsed = 0;
end_of_file_reached = ctx->end_of_file_reached;
diff --git a/regular_builtins.c b/regular_builtins.c
new file mode 100644
index 0000000..5cdb4fe
--- /dev/null
+++ b/regular_builtins.c
@@ -0,0 +1,67 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+true_main(int argc, char **argv)
+{
+ (void) argc;
+ (void) argv;
+ return 0;
+}
+
+
+int
+false_main(int argc, char **argv)
+{
+ (void) argc;
+ (void) argv;
+ return 1;
+}
+
+
+BUILTIN_USAGE(pwd_usage, "[-L | -P]")
+int
+pwd_main(int argc, char **argv)
+{
+ void (*usage)(void) = pwd_usage;
+ int physical = 0;
+ char *cwd = NULL;
+ size_t size = 64 / 2;
+ const char *pwd;
+ struct stat cst, pst;
+
+ ARGBEGIN {
+ case 'L':
+ physical = 0;
+ break;
+ case 'P':
+ physical = 1;
+ break;
+ default:
+ usage();
+ } ARGEND;
+
+ if (argc)
+ weprintf("ignoring operands"); /* other implementations either warn or are silent, they don't fail */
+
+ for (;;) {
+ cwd = erealloc(cwd, size *= 2);
+ if (getcwd(cwd, size))
+ break;
+ if (errno != ERANGE)
+ eprintf("getcwd %zu:", size);
+ }
+
+ if (physical || !(pwd = getenv("PWD")) || *pwd != '/' || stat(pwd, &pst) || stat(cwd, &cst))
+ puts(cwd);
+ else if (pst.st_dev == cst.st_dev && pst.st_ino == cst.st_ino)
+ puts(pwd);
+ else
+ puts(cwd);
+
+ free(cwd);
+ if (fflush(stdout) || ferror(stdout))
+ weprintf("fflush <stdout>:");
+ return 0;
+}
diff --git a/special_builtins.c b/special_builtins.c
new file mode 100644
index 0000000..20e37ae
--- /dev/null
+++ b/special_builtins.c
@@ -0,0 +1,11 @@
+/* See LICENSE file for copyright and license details. */
+#include "common.h"
+
+
+int
+colon_main(int argc, char **argv)
+{
+ (void) argc;
+ (void) argv;
+ return 0;
+}
diff --git a/tokeniser.c b/tokeniser.c
index 63ff2fd..606726b 100644
--- a/tokeniser.c
+++ b/tokeniser.c
@@ -5,20 +5,169 @@
void
push_mode(struct parser_context *ctx, enum tokeniser_mode mode)
{
- struct mode_stack *new = emalloc(sizeof(*new));
- new->mode = mode;
- new->she_is_comment = 1;
- new->previous = ctx->mode_stack;
- ctx->mode_stack = new;
+ struct mode_stack *new_mode_stack;
+ struct here_document_stack *new_here_document_stack;
+
+ if (mode == BQ_QUOTE_MODE)
+ weprintf("backquote expression found at line %zu, stop it!\n", ctx->tokeniser_line_number);
+
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ new_here_document_stack = ecalloc(1, sizeof(*new_here_document_stack));
+ new_here_document_stack->next = &new_here_document_stack->first;
+ new_here_document_stack->previous = ctx->here_document_stack;
+ ctx->here_document_stack = new_here_document_stack;
+ }
+
+ new_mode_stack = emalloc(sizeof(*new_mode_stack));
+ new_mode_stack->mode = mode;
+ new_mode_stack->she_is_comment = 1;
+ new_mode_stack->previous = ctx->mode_stack;
+ ctx->mode_stack = new_mode_stack;
}
void
pop_mode(struct parser_context *ctx)
{
- struct mode_stack *old = ctx->mode_stack;
+ struct mode_stack *old_mode_stack;
+ struct here_document_stack *old_here_document_stack;
+ struct here_document_stack *prev_here_document_stack;
+
+ old_mode_stack = ctx->mode_stack;
ctx->mode_stack = ctx->mode_stack->previous;
- free(old);
+ free(old_mode_stack);
+
+ if (ctx->mode_stack->mode == HERE_DOCUMENT_MODE) {
+ if (ctx->here_document_stack->first) {
+ if (posix_mode) {
+ eprintf("subshell expression closed at line %zu before here-documents, "
+ "this is non-portable\n", ctx->tokeniser_line_number);
+ }
+ prev_here_document_stack = ctx->here_document_stack->previous;
+ *ctx->here_document_stack->next = prev_here_document_stack->first;
+ ctx->here_document_stack->next = prev_here_document_stack->next;
+ ctx->here_document_stack->previous = prev_here_document_stack->previous;
+ ctx->here_document_stack->interpret_when_empty = prev_here_document_stack->interpret_when_empty;
+ free(prev_here_document_stack);
+ } else {
+ old_here_document_stack = ctx->here_document_stack;
+ ctx->here_document_stack = old_here_document_stack->previous;
+ free(old_here_document_stack);
+ }
+ }
+}
+
+
+static void
+append_and_destroy_quote_to_here_document_terminator(struct here_document *here_document, struct parser_state *quote)
+{
+ struct argument *terminator, *part, *next_part;
+ size_t i;
+
+ terminator = here_document->argument->next_part;
+
+ for (i = 0; i < quote->narguments; i++) {
+ for (part = quote->arguments[i]; part; part = next_part) {
+ next_part = part->next_part;
+ if (part->type != QUOTED && part->type != UNQUOTED) {
+ eprintf("use of run-time evaluated expression as right-hand side "
+ "of %s operator (at line %zu) is illegal\n",
+ here_document->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ here_document->argument->line_number);
+ }
+ terminator->text = erealloc(terminator->text, terminator->length + part->length + 1);
+ memcpy(&terminator->text[terminator->length], part->text, part->length);
+ terminator->length += part->length;
+ terminator->text[terminator->length] = '\0';
+ free(part->text);
+ free(part);
+ }
+ }
+
+ free(quote->arguments);
+}
+
+static void
+get_here_document_terminator(struct parser_context *ctx)
+{
+ struct argument *terminator, *next_part;
+ struct parser_state *child;
+
+ terminator = ctx->here_document_stack->first->argument->next_part;
+ if (!terminator || (terminator->type != QUOTED && terminator->type != UNQUOTED && terminator->type != QUOTE_EXPRESSION)) {
+ eprintf("missing right-hand side of %s operator at line %zu\n",
+ ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ ctx->here_document_stack->first->argument->line_number);
+ } else if (terminator->type == QUOTE_EXPRESSION) {
+ child = terminator->child;
+ terminator->type = QUOTED;
+ terminator->text = ecalloc(1, 1);
+ terminator->length = 0;
+ append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, child);
+ free(child);
+ }
+
+ while ((next_part = terminator->next_part)) {
+ switch (next_part->type) {
+ case QUOTED:
+ terminator->type = QUOTED;
+ /* fall through */
+ case UNQUOTED:
+ terminator->text = erealloc(terminator->text, terminator->length + next_part->length + 1);
+ memcpy(&terminator->text[terminator->length], next_part->text, next_part->length);
+ terminator->length += next_part->length;
+ terminator->text[terminator->length] = '\0';
+ free(next_part->text);
+ break;
+
+ case QUOTE_EXPRESSION:
+ terminator->type = QUOTED;
+ append_and_destroy_quote_to_here_document_terminator(ctx->here_document_stack->first, next_part->child);
+ free(next_part->child);
+ break;
+
+ case BACKQUOTE_EXPRESSION:
+ case ARITHMETIC_EXPRESSION:
+ case VARIABLE_SUBSTITUTION:
+ case SUBSHELL_SUBSTITUTION:
+ case PROCESS_SUBSTITUTION_INPUT:
+ case PROCESS_SUBSTITUTION_OUTPUT:
+ case PROCESS_SUBSTITUTION_INPUT_OUTPUT:
+ eprintf("use of run-time evaluated expression as right-hand side of %s operator (at line %zu) is illegal\n",
+ ctx->here_document_stack->first->redirection->type == HERE_DOCUMENT_INDENTED ? "<<-" : "<<",
+ ctx->here_document_stack->first->argument->line_number);
+ return;
+
+ case REDIRECTION:
+ case FUNCTION_MARK:
+ case SUBSHELL:
+ case ARITHMETIC_SUBSHELL:
+ /* interpreter shall recognise these as new "arguments" */
+ return;
+
+ default:
+ case COMMAND: /* used by interpreter */
+ case VARIABLE: /* ditto */
+ abort();
+ }
+
+ if (ctx->parser_state->current_argument_end == next_part)
+ ctx->parser_state->current_argument_end = terminator;
+ terminator->next_part = next_part->next_part;
+ free(next_part);
+ }
+}
+
+
+int
+check_extension(const char *token, size_t line_number)
+{
+ if (!posix_mode) {
+ return 1;
+ } else {
+ weprintf("the '%s' token (at line %zu) is not portable, not parsing as it\n", token, line_number);
+ return 0;
+ }
}
@@ -30,6 +179,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
size_t bytes_read = 0;
size_t token_len;
+ struct here_document *here_document;
+ struct here_document_stack *here_doc_stack;
for (; bytes_read < code_len; bytes_read += token_len, code = &code[token_len]) {
switch (ctx->mode_stack->mode) {
@@ -44,8 +195,8 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_whitespace(ctx, 0);
push_semicolon(ctx, 1);
ctx->tokeniser_line_number += 1;
- if (ctx->here_documents_first)
- push_mode(ctx, HERE_DOCUMENT_MODE);
+ if (ctx->here_document_stack->first)
+ push_mode(ctx, HERE_DOCUMENT_MODE_INITIALISATION);
} else if (isspace(*code)) {
ctx->mode_stack->she_is_comment = 1;
@@ -129,7 +280,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
- } else if (code[1] == '[') {
+ } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
@@ -139,7 +290,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_mode(ctx, CB_QUOTE_MODE);
push_enter(ctx, VARIABLE_SUBSTITUTION);
- } else if (code[1] == '\'') {
+ } else if (code[1] == '\'' && check_extension("$'", ctx->tokeniser_line_number)) {
for (token_len = 2; token_len < code_len - bytes_read; token_len += 1) {
if (code[token_len] == '\\') {
if (token_len + 1 == code_len - bytes_read) {
@@ -186,10 +337,87 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
break;
+ case HERE_DOCUMENT_MODE_INITIALISATION:
+ here_doc_stack = ctx->here_document_stack;
+ here_doc_stack->indented = 0;
+ if (here_doc_stack->first->redirection->type == HERE_DOCUMENT_INDENTED)
+ here_doc_stack->indented = 1;
+ get_here_document_terminator(ctx);
+ here_doc_stack->verbatim = 0;
+ if (here_doc_stack->first->argument->next_part->type == QUOTED)
+ here_doc_stack->verbatim = 1;
+ here_doc_stack->first->terminator = here_doc_stack->first->argument->next_part->text;
+ here_doc_stack->first->terminator_length = here_doc_stack->first->argument->next_part->length;
+ here_doc_stack->first->argument->next_part->text = ecalloc(1, 1);
+ here_doc_stack->first->argument->next_part->length = 0;
+ here_doc_stack->first->argument->next_part->type = QUOTED;
+ here_doc_stack->first->argument_end = here_doc_stack->first->argument->next_part;
+ ctx->mode_stack->mode = HERE_DOCUMENT_MODE;
+ /* fall through */
+
case HERE_DOCUMENT_MODE:
- /* TODO read until terminator, remove all <tab> (including on the
- * line of the terminator) if <<- and then if terminator was
- * unquoted, parse in " "-mode but accept " */
+ here_doc_stack = ctx->here_document_stack;
+ if (*code == '\t' && here_doc_stack->indented) {
+ token_len = 1;
+ } else {
+ token_len = here_doc_stack->line_offset;
+ for (; token_len < code_len - bytes_read; token_len += 1) {
+ if (code[token_len] == '\n') {
+ goto here_document_line_end;
+ } else if (!here_doc_stack->verbatim) {
+ if (code[token_len] == '\\') {
+ if (token_len + 1 == code_len - bytes_read) {
+ goto need_more;
+ } else if (code[token_len + 1] == '$' || code[token_len + 1] == '`') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ push_quoted(ctx, &code[token_len + 1], 1);
+ goto next;
+ }
+ token_len += 1;
+ } else if (code[token_len] == '$') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ bytes_read += token_len;
+ code = &code[token_len];
+ goto quote_mode_dollar_mode;
+ } else if (code[token_len] == '`') {
+ here_doc_stack->line_offset = 0;
+ push_quoted(ctx, code, token_len);
+ push_mode(ctx, BQ_QUOTE_MODE);
+ push_enter(ctx, BACKQUOTE_EXPRESSION);
+ goto next;
+ }
+ }
+ }
+ goto need_more;
+
+ here_document_line_end:
+ token_len += 1;
+ ctx->tokeniser_line_number += 1;
+ here_doc_stack->line_offset = 0;
+ here_document = here_doc_stack->first;
+
+ if (token_len - 1 == here_document->terminator_length &&
+ !strncmp(code, here_document->terminator, token_len - 1)) {
+ here_document->redirection->type = HERE_STRING;
+ here_doc_stack->first = here_document->next;
+ free(here_document->terminator);
+ free(here_document);
+ if (here_doc_stack->first) {
+ ctx->mode_stack->mode = HERE_DOCUMENT_MODE_INITIALISATION;
+ } else {
+ here_doc_stack->next = &here_doc_stack->first;
+ pop_mode(ctx);
+ if (here_doc_stack->interpret_when_empty) {
+ here_doc_stack->interpret_when_empty = 0;
+ interpret_and_eliminate(ctx);
+ }
+ }
+ } else {
+ push_quoted(ctx, code, token_len);
+ }
+ }
break;
@@ -197,6 +425,14 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
if (*code == '\\') {
if (code_len - bytes_read < 2) {
goto need_more;
+ } else if (code[1] == '\\' || code[1] == '`' || code[1] == '$') {
+ token_len = 2;
+ push_unquoted(ctx, &code[1], 1);
+ if (code[1] == '$') {
+ weprintf("meaningless \\ found before $ inside backquote expression at line "
+ "%zu, perhaps you mean to use \\\\$ instead to get a literal $\n",
+ ctx->tokeniser_line_number);
+ }
} else {
token_len = 2;
push_unquoted(ctx, code, 2);
@@ -284,6 +520,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
}
} else if (*code == '$') {
+ quote_mode_dollar_mode:
if (code_len - bytes_read < 2) {
if (ctx->end_of_file_reached) {
token_len = 1;
@@ -307,7 +544,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
push_enter(ctx, SUBSHELL_SUBSTITUTION);
}
- } else if (code[1] == '[') {
+ } else if (code[1] == '[' && check_extension("$[", ctx->tokeniser_line_number)) {
token_len = 2;
push_mode(ctx, SB_QUOTE_MODE);
push_enter(ctx, ARITHMETIC_EXPRESSION);
@@ -403,6 +640,7 @@ parse_preparsed(struct parser_context *ctx, char *code, size_t code_len)
abort();
}
+ next:
if (ctx->line_continuations) {
ctx->tokeniser_line_number += ctx->line_continuations;
ctx->line_continuations = 0;