1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
(* CHARACTER CLASSES *)
_space = " " | "\n" | "\t";
_alpha = <"a", "z"> | <"A", "Z">;
_octal = <"0", "7">;
_digit = <"0", "9">;
_xdigit = _digit | <"a", "f"> | <"A", "F">;
_nonascii = <128, 255>;
(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)
_comment_str_esc = "\\", (_space | <"!", 255>);
_comment_str_char = _space | !"\"", <"!", 255>;
_comment_str = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
_comment_char = _space | !"*)", !"\"", <"!", 255>;
comment = "(*", {_comment_char | _comment_str}, ("*)" | -);
_ = {_space | comment};
(* IDENTIFIERS *)
_identifier_head = _alpha | _digit | _nonascii | "_";
_identifier_tail = _identifier_head | "-";
intrusive-identifier = _identifier_head, {_identifier_tail};
discrete-identifier = "_", {_identifier_tail};
_identifier = discrete-identifier | intrusive-identifier;
(* STRINGS *)
_escape_simple = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
_escape_hex = ("x" | "X"), _xdigit, _xdigit;
_escape_octal = _octal, {_octal};
escape-payload = _escape_simple | _escape_hex | _escape_octal | -;
ESCAPE = "\\";
escape = ESCAPE, escape-payload;
_character = escape | !"\"", <" ", 255>;
_string = "\"", _character, {_character}, ("\"" | -);
string = _string;
character = "\"", _character, ("\"" | -);
(* INTEGERS *)
_decimal = _digit, {_digit};
_hexadecimal = "0", ("x" | "X"), _xdigit, {_xdigit};
integer = _hexadecimal | _decimal;
(* GROUPINGS *)
boundary = character | integer;
low = boundary;
high = boundary;
NONDETERMINISTIC = "?";
COMMITTED = "+";
committed = COMMITTED, _, operand;
REJECTION = "!";
rejection = REJECTION, _, operand;
EXCEPTION = "-";
exception = EXCEPTION;
CONCATENATION = ",";
concatenation = operand, {_, CONCATENATION, _, operand};
ALTERNATION = "|";
alternation = concatenation, {_, [NONDETERMINISTIC], ALTERNATION, _, concatenation};
CHAR-RANGE-START = "<";
CHAR-RANGE-COMMA = ",";
CHAR-RANGE-END = ">";
CHAR-RANGE-EDGE = CHAR-RANGE-START | CHAR-RANGE-END;
CHAR-RANGE = CHAR-RANGE-EDGE | CHAR-RANGE-COMMA;
char-range = !!CHAR-RANGE-START, CHAR-RANGE
, _, low, _
, !!CHAR-RANGE-COMMA, CHAR-RANGE
, _, high, _
, !!CHAR-RANGE-END, CHAR-RANGE;
OPTIONAL-START = "[";
OPTIONAL-END = "]";
OPTIONAL = OPTIONAL-START | OPTIONAL-END;
optional = [NONDETERMINISTIC]
, !!OPTIONAL-START, OPTIONAL
, _, _expression, _
, !!OPTIONAL-END, OPTIONAL;
REPEATED-START = "{";
REPEATED-END = "}";
REPEATED = REPEATED-START | REPEATED-END;
repeated = [NONDETERMINISTIC]
, !!REPEATED-START, REPEATED
, _, _expression, _
, !!REPEATED-END, REPEATED;
GROUP-START = "(";
GROUP-END = ")";
GROUP = GROUP-START | GROUP-END;
group = !!GROUP-START, GROUP
, _, _expression, _
, !!GROUP-END, GROUP;
embedded-rule = _identifier;
_literal = char-range | exception | string;
_group = optional | repeated | group | embedded-rule;
operand = _group | _literal | rejection | committed;
_expression = alternation;
(* RULES *)
DEFINITION = "=";
TERMINATION = ";";
RULE = DEFINITION | TERMINATION;
rule-name = _identifier;
rule = rule-name, _
, !!DEFINITION, RULE
, _, _expression, _
, !!TERMINATION, RULE;
(* This is the root rule of the grammar. *)
grammar = _, {rule, _};
|