diff options
Diffstat (limited to 'extras/grammar')
| -rw-r--r-- | extras/grammar | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/extras/grammar b/extras/grammar new file mode 100644 index 0000000..8cc6022 --- /dev/null +++ b/extras/grammar @@ -0,0 +1,134 @@ +(* CHARACTER CLASSES *) + +_space = " " | "\n" | "\t"; +_alpha = <"a", "z"> | <"A", "Z">; +_octal = <"0", "7">; +_digit = <"0", "9">; +_xdigit = _digit | <"a", "f"> | <"A", "F">; +_nonascii = <128, 255>; + + +(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *) + +_comment_str_esc = "\\", (_space | <"!", 255>); +_comment_str_char = _space | !"\"", <"!", 255>; +_comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -); +_comment_char = _space | !"*)", !"\"", <"!", 255>; +comment = "(*", {_comment_char | _comment_str}, ("*)" | -); + +_ = {_space | comment}; + + +(* IDENTIFIERS *) + +_identifier_head = _alpha | _digit | _nonascii | "_"; +_identifier_tail = _identifier_head | "-"; + +intrusive-identifier = _identifier_head, {_identifier_tail}; +discrete-identifier = "_", {_identifier_tail}; + +_identifier = discrete-identifier | intrusive-identifier; + + +(* STRINGS *) + +_escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v"; +_escape_hex = ("x" | "X"), _xdigit, _xdigit; +_escape_octal = _octal, {_octal}; +escape-payload = _escape_simple | _escape_hex | _escape_octal | -; +ESCAPE = "\\"; +escape = ESCAPE, escape-payload; +_character = escape | !"\"", <" ", 255>; +_string = "\"", _character, {_character}, ("\"" | -); + +string = _string; +character = "\"", _character, ("\"" | -); + + +(* INTEGERS *) + +_decimal = _digit, {_digit}; +_hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit}; + +integer = _hexadecimal | _decimal; + + +(* GROUPINGS *) + +boundary = character | integer; +low = boundary; +high = boundary; + +NONDETERMINISTIC = "?"; + +COMMITTED = "+"; +committed = COMMITTED, _, operand; + +REJECTION = "!"; +rejection = REJECTION, _, operand; + +EXCEPTION = "-"; +exception = EXCEPTION; + +CONCATENATION = ","; +concatenation = operand, {_, CONCATENATION, _, operand}; + +ALTERNATION = "|"; +alternation = concatenation, {_, [NONDETERMINISTIC], ALTERNATION, _, concatenation}; + +CHAR-RANGE-START = "<"; +CHAR-RANGE-COMMA = ","; +CHAR-RANGE-END = ">"; +CHAR-RANGE-EDGE = CHAR-RANGE-START | CHAR-RANGE-END; +CHAR-RANGE = CHAR-RANGE-EDGE | CHAR-RANGE-COMMA; +char-range = !!CHAR-RANGE-START, CHAR-RANGE + , _, low, _ + , !!CHAR-RANGE-COMMA, CHAR-RANGE + , _, high, _ + , !!CHAR-RANGE-END, CHAR-RANGE; + +OPTIONAL-START = "["; +OPTIONAL-END = "]"; +OPTIONAL = OPTIONAL-START | OPTIONAL-END; +optional = [NONDETERMINISTIC] + , !!OPTIONAL-START, OPTIONAL + , _, _expression, _ + , !!OPTIONAL-END, OPTIONAL; + +REPEATED-START = "{"; +REPEATED-END = "}"; +REPEATED = REPEATED-START | REPEATED-END; +repeated = [NONDETERMINISTIC] + , !!REPEATED-START, REPEATED + , _, _expression, _ + , !!REPEATED-END, REPEATED; + +GROUP-START = "("; +GROUP-END = ")"; +GROUP = GROUP-START | GROUP-END; +group = !!GROUP-START, GROUP + , _, _expression, _ + , !!GROUP-END, GROUP; + +embedded-rule = _identifier; + +_literal = char-range | exception | string; +_group = optional | repeated | group | embedded-rule; +operand = _group | _literal | rejection | committed; + +_expression = alternation; + + +(* RULES *) + +DEFINITION = "="; +TERMINATION = ";"; +RULE = DEFINITION | TERMINATION; +rule-name = _identifier; +rule = rule-name, _ + , !!DEFINITION, RULE + , _, _expression, _ + , !!TERMINATION, RULE; + +(* This is the root rule of the grammar. *) +grammar = _, {rule, _}; |
