/* See LICENSE file for copyright and license details. */ #include #include #include "config.h" #if defined(__GNUC__) # define CONST_FUNC __attribute__((__const__)) # define PURE_FUNC __attribute__((__pure__)) #else # define CONST_FUNC # define PURE_FUNC #endif #define BUILTIN_USAGE(FUNCTION_NAME, SYNOPSIS)\ BUILTIN_NUSAGE(1, FUNCTION_NAME, SYNOPSIS) #define BUILTIN_NUSAGE(STATUS, FUNCTION_NAME, SYNOPSIS)\ static void\ FUNCTION_NAME(void)\ {\ const char *syn = SYNOPSIS ? SYNOPSIS : "";\ fprintf(stderr, "usage: %s%s%s\n", argv0, *syn ? " " : "", syn);\ exit(STATUS);\ } enum argument_type { /* .text and .length */ QUOTED, /* \ or '…' or $'…' */ UNQUOTED, /* normal */ VARIABLE, /* used by interpreter, not parser */ OPERATOR, /* used by interpreter for ${}, not parser */ /* .child, but changed to .command by interpreter */ QUOTE_EXPRESSION, /* "…" */ BACKQUOTE_EXPRESSION, /* `…` */ ARITHMETIC_EXPRESSION, /* $((…)) */ VARIABLE_SUBSTITUTION, /* ${…} */ SUBSHELL_SUBSTITUTION, /* $(…) */ PROCESS_SUBSTITUTION_INPUT, /* >(…) */ PROCESS_SUBSTITUTION_OUTPUT, /* <(…) */ PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */ SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */ ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */ /* .command */ COMMAND, /* used by interpreter, not parser */ /* (none) */ REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */ FUNCTION_MARK /* () */ }; enum nesting_type { MAIN_BODY, CODE_ROOT, TEXT_ROOT, VARIABLE_SUBSTITUTION_BRACKET, CURLY_NESTING, IF_STATEMENT, IF_CONDITIONAL, IF_CLAUSE, ELSE_CLAUSE, UNTIL_STATEMENT, WHILE_STATEMENT, REPEAT_CONDITIONAL, DO_CLAUSE, FOR_STATEMENT }; enum redirection_type { REDIRECT_INPUT, REDIRECT_INPUT_TO_FD, /* but close if right-hand side is "-" */ REDIRECT_OUTPUT, REDIRECT_OUTPUT_APPEND, REDIRECT_OUTPUT_CLOBBER, REDIRECT_OUTPUT_TO_FD, /* ditto */ REDIRECT_OUTPUT_AND_STDERR, REDIRECT_OUTPUT_AND_STDERR_APPEND, REDIRECT_OUTPUT_AND_STDERR_CLOBBER, REDIRECT_OUTPUT_AND_STDERR_TO_FD, /* ditto */ REDIRECT_INPUT_OUTPUT, REDIRECT_INPUT_OUTPUT_TO_FD, /* ditto */ HERE_STRING, HERE_DOCUMENT, /* eliminated during parse */ HERE_DOCUMENT_INDENTED /* eliminated during parse */ }; enum tokeniser_mode { NORMAL_MODE, COMMENT_MODE, BQ_QUOTE_MODE, DQ_QUOTE_MODE, RRB_QUOTE_MODE, RB_QUOTE_MODE, SB_QUOTE_MODE, CB_QUOTE_MODE, HERE_DOCUMENT_MODE_INITIALISATION, HERE_DOCUMENT_MODE }; enum command_terminal { DOUBLE_SEMICOLON, SEMICOLON, NEWLINE, AMPERSAND, SOCKET_PIPE, PIPE, PIPE_AMPERSAND, AMPERSAND_PIPE, /* synonym for |& to match &> */ AND, OR }; enum interpreter_requirement { NEED_COMMAND = 0, NEED_COMMAND_END, NO_REQUIREMENT, NEED_FUNCTION_BODY, NEED_VARIABLE_NAME, NEED_IN_OR_DO, NEED_DO, NEED_VALUE, NEED_PREFIX_OR_VARIABLE_NAME, NEED_INDEX_OR_OPERATOR_OR_END, NEED_INDEX_OR_SUFFIX_OR_END, NEED_INDEX_OR_END, NEED_OPERATOR_OR_END, NEED_AT_OPERAND, NEED_TEXT_OR_SLASH, NEED_TEXT_OR_COLON, NEED_END }; struct parser_state; struct interpreter_state; struct argument { enum argument_type type; union { struct { char *text; size_t length; }; struct parser_state *child; struct interpreter_state *command; }; /* (TODO) need to be able to track locations of functions, dots, evals, and maybe aliases, * as well as filenames, so a more complex tracking method is required, basically * a reversed tree (stack with reference counted nodes) with filename and linenumber * nodes, with type annotation; however for memory efficiency, .line_number shall * still be used for the leaves */ size_t line_number; struct argument *next_part; }; struct redirection { enum redirection_type type; struct argument *left_hand_side; struct argument *right_hand_side; /* set by interpreter, not parser */ }; struct command { enum command_terminal terminal; char have_bang; /* set by interpreter */ size_t terminal_line_number; /* (TODO) same idea as in `struct argument` */ struct argument **arguments; size_t narguments; struct redirection **redirections; size_t nredirections; size_t redirections_offset; /* used by interpreter */ }; struct parser_state { struct parser_state *parent; struct command **commands; /* in text nodes, all text will be in at most one argument in a single dummy command */ size_t ncommands; struct argument **arguments; size_t narguments; struct redirection **redirections; size_t nredirections; struct argument *current_argument; struct argument *current_argument_end; char need_right_hand_side; }; struct here_document { struct redirection *redirection; struct argument *argument; struct argument *argument_end; char *terminator; size_t terminator_length; struct here_document *next; }; struct mode_stack { enum tokeniser_mode mode; int she_is_comment; struct mode_stack *previous; }; struct here_document_stack { char indented; char verbatim; char interpret_when_empty; size_t line_offset; struct here_document *first; struct here_document **next; struct here_document_stack *previous; }; struct interpreter_state { enum nesting_type dealing_with; enum interpreter_requirement requirement; char allow_newline; char disallow_bang; /* disallow rather than allow, so that default value is 0 */ char have_bang; struct command **commands; /* normally the results are stored here */ size_t ncommands; struct argument **arguments; /* for TEXT_ROOT and VARIABLE_SUBSTITUTION_BRACKET, results are stored here */ size_t narguments; struct redirection **redirections; size_t nredirections; struct interpreter_state *parent; }; struct parser_context { char tty_input; char end_of_file_reached; char premature_end_of_file; char do_not_run; size_t preparser_offset; size_t preparser_line_number; size_t line_continuations; size_t tokeniser_line_number; size_t interpreter_offset; struct mode_stack *mode_stack; struct parser_state *parser_state; struct here_document_stack *here_document_stack; struct interpreter_state *interpreter_state; }; /* apsh.c */ extern int login_shell; extern int posix_mode; void initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser); /* preparser.c */ size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp); /* tokeniser.c */ void push_mode(struct parser_context *ctx, enum tokeniser_mode mode); void pop_mode(struct parser_context *ctx); int check_extension(const char *token, size_t line_number); size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len); /* parser.c */ PURE_FUNC const char *get_redirection_token(enum redirection_type type); void push_end_of_file(struct parser_context *ctx); void push_whitespace(struct parser_context *ctx, int strict); void push_semicolon(struct parser_context *ctx, int actually_newline); size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len); void push_quoted(struct parser_context *ctx, char *text, size_t text_len); void push_escaped(struct parser_context *ctx, char *text, size_t text_len); void push_unquoted(struct parser_context *ctx, char *text, size_t text_len); void push_enter(struct parser_context *ctx, enum argument_type type); void push_leave(struct parser_context *ctx); /* interpreter.c */ void interpret_and_eliminate(struct parser_context *ctx); /* special_builtins.c */ #define LIST_SPECIAL_BUILTINS(_)\ _(":", colon_main, CONST_FUNC) /* regular_builtins.c */ #define LIST_REGULAR_BUILTINS(_)\ _("true", true_main, CONST_FUNC)\ _("false", false_main, CONST_FUNC)\ _("pwd", pwd_main,) /* "true" and "false" are defined as regular built-in shell utilities * (that must be searched before PATH), not as stand-alone utilities, * in POSIX (but vice verse in LSB). "pwd" is defined both as regular * built-in shell utility and as a stand-alone utility. */ #define X(SH_NAME, C_FUNCTION, C_ATTRIBUTES)\ C_ATTRIBUTES int C_FUNCTION(int argc, char **argv); LIST_SPECIAL_BUILTINS(X) LIST_REGULAR_BUILTINS(X) #undef X