aboutsummaryrefslogblamecommitdiffstats
path: root/common.h
blob: 52481c012ed740a8e341d5c7a0273d5efd48e85b (plain) (tree)
1
2
3
4
5
6





                                                         





















                                                                                 



                                          


                                                               









                                                                                                                          

                                                      




                                                                                                                         
















                                      

                       
                                                                       


                                
                                          


                                           
                                                     
                              
                                                
                    

                                                            










                       
                                          





                          
                



                       
                                                        



            



















                                      
                    
                         








                                           
                                                  
          




                                                                                                   






                                        
                                                                              



                                       

                                                                                   



                                          
                                                             



                                    
                                                                                                                          












                                              


                                      








                                    
























                                                                                                                   
                       



                                   



                                     
                                  

                                          

                                                        



            


                                                                                                






                                                                                         
                                                           


                                                                                
                                                                        

                                                             
                                                                      





                                                                              






















                                                                     
/* See LICENSE file for copyright and license details. */
#include <libsimple.h>
#include <libsimple-arg.h>
#include "config.h"


#if defined(__GNUC__)
# define CONST_FUNC __attribute__((__const__))
# define PURE_FUNC  __attribute__((__pure__))
#else
# define CONST_FUNC
# define PURE_FUNC
#endif


#define BUILTIN_USAGE(FUNCTION_NAME, SYNOPSIS)\
	BUILTIN_NUSAGE(1, FUNCTION_NAME, SYNOPSIS)

#define BUILTIN_NUSAGE(STATUS, FUNCTION_NAME, SYNOPSIS)\
	static void\
	FUNCTION_NAME(void)\
	{\
		const char *syn = SYNOPSIS ? SYNOPSIS : "";\
		fprintf(stderr, "usage: %s%s%s\n", argv0, *syn ? " " : "", syn);\
		exit(STATUS);\
	}


enum argument_type {
	/* .text and .length */
	QUOTED, /* \ or '…' or $'…' */
	UNQUOTED, /* normal */
	VARIABLE, /* used by interpreter, not parser */
	OPERATOR, /* used by interpreter for ${}, not parser */
	/* .child, but changed to .command by interpreter */
	QUOTE_EXPRESSION, /* "…" */
	BACKQUOTE_EXPRESSION, /* `…` */
	ARITHMETIC_EXPRESSION, /* $((…)) */
	VARIABLE_SUBSTITUTION, /* ${…} */
	SUBSHELL_SUBSTITUTION, /* $(…) */
	PROCESS_SUBSTITUTION_INPUT, /* >(…) */
	PROCESS_SUBSTITUTION_OUTPUT, /* <(…) */
	PROCESS_SUBSTITUTION_INPUT_OUTPUT, /* <>(…) */
	SUBSHELL, /* (…) ## if non-first argument: format shell code into a string (can be used for a clean subshell) */
	ARITHMETIC_SUBSHELL, /* ((…)) ## if non-first argument: format shell code into a string */
	/* .command */
	COMMAND, /* used by interpreter, not parser */
	/* (none) */
	REDIRECTION, /* at beginning of argument, use next redirection and use reminder of argument as right-hand side */
	FUNCTION_MARK /* () */
};

enum nesting_type {
	MAIN_BODY,
	CODE_ROOT,
	TEXT_ROOT,
	VARIABLE_SUBSTITUTION_BRACKET,
	CURLY_NESTING,
	IF_STATEMENT,
	IF_CONDITIONAL,
	IF_CLAUSE,
	ELSE_CLAUSE,
	UNTIL_STATEMENT,
	WHILE_STATEMENT,
	REPEAT_CONDITIONAL,
	DO_CLAUSE,
	FOR_STATEMENT
};

enum redirection_type {
	REDIRECT_INPUT,
	REDIRECT_INPUT_TO_FD, /* but close if right-hand side is "-" */
	REDIRECT_OUTPUT,
	REDIRECT_OUTPUT_APPEND,
	REDIRECT_OUTPUT_CLOBBER,
	REDIRECT_OUTPUT_TO_FD, /* ditto */
	REDIRECT_OUTPUT_AND_STDERR,
	REDIRECT_OUTPUT_AND_STDERR_APPEND,
	REDIRECT_OUTPUT_AND_STDERR_CLOBBER,
	REDIRECT_OUTPUT_AND_STDERR_TO_FD, /* ditto */
	REDIRECT_INPUT_OUTPUT,
	REDIRECT_INPUT_OUTPUT_TO_FD, /* ditto */
	HERE_STRING,
	HERE_DOCUMENT, /* eliminated during parse */
	HERE_DOCUMENT_INDENTED /* eliminated during parse */
};

enum tokeniser_mode {
	NORMAL_MODE,
	COMMENT_MODE,
	BQ_QUOTE_MODE,
	DQ_QUOTE_MODE,
	RRB_QUOTE_MODE,
	RB_QUOTE_MODE,
	SB_QUOTE_MODE,
	CB_QUOTE_MODE,
	HERE_DOCUMENT_MODE_INITIALISATION,
	HERE_DOCUMENT_MODE
};

enum command_terminal {
	DOUBLE_SEMICOLON,
	SEMICOLON,
	NEWLINE,
	AMPERSAND,
	SOCKET_PIPE,
	PIPE,
	PIPE_AMPERSAND,
	AMPERSAND_PIPE, /* synonym for |& to match &> */
	AND,
	OR
};

enum interpreter_requirement {
	NEED_COMMAND = 0,
	NEED_COMMAND_END,
	NO_REQUIREMENT,
	NEED_FUNCTION_BODY,
	NEED_VARIABLE_NAME,
	NEED_IN_OR_DO,
	NEED_DO,
	NEED_VALUE,
	NEED_PREFIX_OR_VARIABLE_NAME,
	NEED_INDEX_OR_OPERATOR_OR_END,
	NEED_INDEX_OR_SUFFIX_OR_END,
	NEED_INDEX_OR_END,
	NEED_OPERATOR_OR_END,
	NEED_AT_OPERAND,
	NEED_TEXT_OR_SLASH,
	NEED_TEXT_OR_COLON,
	NEED_END
};

struct parser_state;
struct interpreter_state;

struct argument {
	enum argument_type type;
	union {
		struct {
			char *text;
			size_t length;
		};
		struct parser_state *child;
		struct interpreter_state *command;
	};
	/* (TODO) need to be able to track locations of functions, dots, evals, and maybe aliases,
	 *        as well as filenames, so a more complex tracking method is required, basically
	 *        a reversed tree (stack with reference counted nodes) with filename and linenumber
	 *        nodes, with type annotation; however for memory efficiency, .line_number shall
	 *        still be used for the leaves */
	size_t line_number;
	struct argument *next_part;
};

struct redirection {
	enum redirection_type type;
	struct argument *left_hand_side;
	struct argument *right_hand_side; /* set by interpreter, not parser */
};

struct command {
	enum command_terminal terminal;
	char have_bang; /* set by interpreter */
	size_t terminal_line_number; /* (TODO) same idea as in `struct argument` */
	struct argument **arguments;
	size_t narguments;
	struct redirection **redirections;
	size_t nredirections;
	size_t redirections_offset; /* used by interpreter */
};

struct parser_state {
	struct parser_state *parent;
	struct command **commands; /* in text nodes, all text will be in at most one argument in a single dummy command */
	size_t ncommands;
	struct argument **arguments;
	size_t narguments;
	struct redirection **redirections;
	size_t nredirections;
	struct argument *current_argument;
	struct argument *current_argument_end;
	char need_right_hand_side;
};

struct here_document {
	struct redirection *redirection;
	struct argument *argument;
	struct argument *argument_end;
	char *terminator;
	size_t terminator_length;
	struct here_document *next;
};

struct mode_stack {
	enum tokeniser_mode mode;
	int she_is_comment;
	struct mode_stack *previous;
};

struct here_document_stack {
	char indented;
	char verbatim;
	char interpret_when_empty;
	size_t line_offset;
	struct here_document *first;
	struct here_document **next;
	struct here_document_stack *previous;
};

struct interpreter_state {
	enum nesting_type dealing_with;
	enum interpreter_requirement requirement;
	char allow_newline;
	char disallow_bang; /* disallow rather than allow, so that default value is 0 */
	char have_bang;
	struct command **commands; /* normally the results are stored here */
	size_t ncommands;
	struct argument **arguments; /* for TEXT_ROOT and VARIABLE_SUBSTITUTION_BRACKET, results are stored here */
	size_t narguments;
	struct redirection **redirections;
	size_t nredirections;
	struct interpreter_state *parent;
};

struct parser_context {
	char tty_input;
	char end_of_file_reached;
	char premature_end_of_file;
	char do_not_run;
	size_t preparser_offset;
	size_t preparser_line_number;
	size_t line_continuations;
	size_t tokeniser_line_number;
	size_t interpreter_offset;
	struct mode_stack *mode_stack;
	struct parser_state *parser_state;
	struct here_document_stack *here_document_stack;
	struct interpreter_state *interpreter_state;
};


/* apsh.c */
extern int login_shell;
extern int posix_mode;
void initialise_parser_context(struct parser_context *ctx, int need_tokeniser, int need_parser);

/* preparser.c */
size_t parse(struct parser_context *ctx, char *code, size_t code_len, size_t *nremovedp);

/* tokeniser.c */
void push_mode(struct parser_context *ctx, enum tokeniser_mode mode);
void pop_mode(struct parser_context *ctx);
int check_extension(const char *token, size_t line_number);
size_t parse_preparsed(struct parser_context *ctx, char *code, size_t code_len);

/* parser.c */
PURE_FUNC const char *get_redirection_token(enum redirection_type type);
void push_end_of_file(struct parser_context *ctx);
void push_whitespace(struct parser_context *ctx, int strict);
void push_semicolon(struct parser_context *ctx, int actually_newline);
size_t push_symbol(struct parser_context *ctx, char *token, size_t token_len);
void push_quoted(struct parser_context *ctx, char *text, size_t text_len);
void push_escaped(struct parser_context *ctx, char *text, size_t text_len);
void push_unquoted(struct parser_context *ctx, char *text, size_t text_len);
void push_enter(struct parser_context *ctx, enum argument_type type);
void push_leave(struct parser_context *ctx);

/* interpreter.c */
void interpret_and_eliminate(struct parser_context *ctx);

/* special_builtins.c */
#define LIST_SPECIAL_BUILTINS(_)\
	_(":", colon_main, CONST_FUNC)

/* regular_builtins.c */
#define LIST_REGULAR_BUILTINS(_)\
	_("true", true_main, CONST_FUNC)\
	_("false", false_main, CONST_FUNC)\
	_("pwd", pwd_main,)
/* "true" and "false" are defined as regular built-in shell utilities
 * (that must be searched before PATH), not as stand-alone utilities,
 * in POSIX (but vice verse in LSB). "pwd" is defined both as regular
 * built-in shell utility and as a stand-alone utility. */

#define X(SH_NAME, C_FUNCTION, C_ATTRIBUTES)\
	C_ATTRIBUTES int C_FUNCTION(int argc, char **argv);
LIST_SPECIAL_BUILTINS(X)
LIST_REGULAR_BUILTINS(X)
#undef X