aboutsummaryrefslogtreecommitdiffstats
path: root/extras/grammar
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--extras/grammar134
1 files changed, 134 insertions, 0 deletions
diff --git a/extras/grammar b/extras/grammar
new file mode 100644
index 0000000..8cc6022
--- /dev/null
+++ b/extras/grammar
@@ -0,0 +1,134 @@
+(* CHARACTER CLASSES *)
+
+_space = " " | "\n" | "\t";
+_alpha = <"a", "z"> | <"A", "Z">;
+_octal = <"0", "7">;
+_digit = <"0", "9">;
+_xdigit = _digit | <"a", "f"> | <"A", "F">;
+_nonascii = <128, 255>;
+
+
+(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
+
+_comment_str_esc = "\\", (_space | <"!", 255>);
+_comment_str_char = _space | !"\"", <"!", 255>;
+_comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
+_comment_char = _space | !"*)", !"\"", <"!", 255>;
+comment = "(*", {_comment_char | _comment_str}, ("*)" | -);
+
+_ = {_space | comment};
+
+
+(* IDENTIFIERS *)
+
+_identifier_head = _alpha | _digit | _nonascii | "_";
+_identifier_tail = _identifier_head | "-";
+
+intrusive-identifier = _identifier_head, {_identifier_tail};
+discrete-identifier = "_", {_identifier_tail};
+
+_identifier = discrete-identifier | intrusive-identifier;
+
+
+(* STRINGS *)
+
+_escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
+_escape_hex = ("x" | "X"), _xdigit, _xdigit;
+_escape_octal = _octal, {_octal};
+escape-payload = _escape_simple | _escape_hex | _escape_octal | -;
+ESCAPE = "\\";
+escape = ESCAPE, escape-payload;
+_character = escape | !"\"", <" ", 255>;
+_string = "\"", _character, {_character}, ("\"" | -);
+
+string = _string;
+character = "\"", _character, ("\"" | -);
+
+
+(* INTEGERS *)
+
+_decimal = _digit, {_digit};
+_hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit};
+
+integer = _hexadecimal | _decimal;
+
+
+(* GROUPINGS *)
+
+boundary = character | integer;
+low = boundary;
+high = boundary;
+
+NONDETERMINISTIC = "?";
+
+COMMITTED = "+";
+committed = COMMITTED, _, operand;
+
+REJECTION = "!";
+rejection = REJECTION, _, operand;
+
+EXCEPTION = "-";
+exception = EXCEPTION;
+
+CONCATENATION = ",";
+concatenation = operand, {_, CONCATENATION, _, operand};
+
+ALTERNATION = "|";
+alternation = concatenation, {_, [NONDETERMINISTIC], ALTERNATION, _, concatenation};
+
+CHAR-RANGE-START = "<";
+CHAR-RANGE-COMMA = ",";
+CHAR-RANGE-END = ">";
+CHAR-RANGE-EDGE = CHAR-RANGE-START | CHAR-RANGE-END;
+CHAR-RANGE = CHAR-RANGE-EDGE | CHAR-RANGE-COMMA;
+char-range = !!CHAR-RANGE-START, CHAR-RANGE
+ , _, low, _
+ , !!CHAR-RANGE-COMMA, CHAR-RANGE
+ , _, high, _
+ , !!CHAR-RANGE-END, CHAR-RANGE;
+
+OPTIONAL-START = "[";
+OPTIONAL-END = "]";
+OPTIONAL = OPTIONAL-START | OPTIONAL-END;
+optional = [NONDETERMINISTIC]
+ , !!OPTIONAL-START, OPTIONAL
+ , _, _expression, _
+ , !!OPTIONAL-END, OPTIONAL;
+
+REPEATED-START = "{";
+REPEATED-END = "}";
+REPEATED = REPEATED-START | REPEATED-END;
+repeated = [NONDETERMINISTIC]
+ , !!REPEATED-START, REPEATED
+ , _, _expression, _
+ , !!REPEATED-END, REPEATED;
+
+GROUP-START = "(";
+GROUP-END = ")";
+GROUP = GROUP-START | GROUP-END;
+group = !!GROUP-START, GROUP
+ , _, _expression, _
+ , !!GROUP-END, GROUP;
+
+embedded-rule = _identifier;
+
+_literal = char-range | exception | string;
+_group = optional | repeated | group | embedded-rule;
+operand = _group | _literal | rejection | committed;
+
+_expression = alternation;
+
+
+(* RULES *)
+
+DEFINITION = "=";
+TERMINATION = ";";
+RULE = DEFINITION | TERMINATION;
+rule-name = _identifier;
+rule = rule-name, _
+ , !!DEFINITION, RULE
+ , _, _expression, _
+ , !!TERMINATION, RULE;
+
+(* This is the root rule of the grammar. *)
+grammar = _, {rule, _};