aboutsummaryrefslogtreecommitdiffstats
path: root/extras/grammar
blob: 8cc6022f741341d9de8d5929b5c1d037d982676d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
(* CHARACTER CLASSES *)

_space               = " " | "\n" | "\t";
_alpha               = <"a", "z"> | <"A", "Z">;
_octal               = <"0", "7">;
_digit               = <"0", "9">;
_xdigit              = _digit | <"a", "f"> | <"A", "F">;
_nonascii            = <128, 255>;


(* WHITESPACE/COMMENTS, THE GRAMMAR IS FREE-FORM *)

_comment_str_esc     = "\\", (_space | <"!", 255>);
_comment_str_char    = _space | !"\"", <"!", 255>;
_comment_str         = "\"", {_comment_str_esc | _comment_str_char}, ("\"" | -);
_comment_char        = _space | !"*)", !"\"", <"!", 255>;
comment              = "(*", {_comment_char | _comment_str}, ("*)" | -);

_                    = {_space | comment};


(* IDENTIFIERS *)

_identifier_head     = _alpha | _digit | _nonascii | "_";
_identifier_tail     = _identifier_head | "-";

intrusive-identifier = _identifier_head, {_identifier_tail};
discrete-identifier  = "_", {_identifier_tail};

_identifier          = discrete-identifier | intrusive-identifier;


(* STRINGS *)

_escape_simple       = "\\" | "\"" | "'" | "a" | "b" | "f" | "n" | "r" | "t" | "v";
_escape_hex          = ("x" | "X"), _xdigit, _xdigit;
_escape_octal        = _octal, {_octal};
escape-payload       = _escape_simple | _escape_hex | _escape_octal | -;
ESCAPE               = "\\";
escape               = ESCAPE, escape-payload;
_character           = escape | !"\"", <" ", 255>;
_string              = "\"", _character, {_character}, ("\"" | -);

string               = _string;
character            = "\"", _character, ("\"" | -);


(* INTEGERS *)

_decimal             = _digit, {_digit};
_hexadecimal         = "0", ("x" | "X"), _xdigit, {_xdigit};

integer              = _hexadecimal | _decimal;


(* GROUPINGS *)

boundary             = character | integer;
low                  = boundary;
high                 = boundary;

NONDETERMINISTIC     = "?";

COMMITTED            = "+";
committed            = COMMITTED, _, operand;

REJECTION            = "!";
rejection            = REJECTION, _, operand;

EXCEPTION            = "-";
exception            = EXCEPTION;

CONCATENATION        = ",";
concatenation        = operand, {_, CONCATENATION, _, operand};

ALTERNATION          = "|";
alternation          = concatenation, {_, [NONDETERMINISTIC], ALTERNATION, _, concatenation};

CHAR-RANGE-START     = "<";
CHAR-RANGE-COMMA     = ",";
CHAR-RANGE-END       = ">";
CHAR-RANGE-EDGE      = CHAR-RANGE-START | CHAR-RANGE-END;
CHAR-RANGE           = CHAR-RANGE-EDGE | CHAR-RANGE-COMMA;
char-range           = !!CHAR-RANGE-START, CHAR-RANGE
                     , _, low, _
                     , !!CHAR-RANGE-COMMA, CHAR-RANGE
                     , _, high, _
                     , !!CHAR-RANGE-END, CHAR-RANGE;

OPTIONAL-START       = "[";
OPTIONAL-END         = "]";
OPTIONAL             = OPTIONAL-START | OPTIONAL-END;
optional             = [NONDETERMINISTIC]
                     , !!OPTIONAL-START, OPTIONAL
                     , _, _expression, _
                     , !!OPTIONAL-END, OPTIONAL;

REPEATED-START       = "{";
REPEATED-END         = "}";
REPEATED             = REPEATED-START | REPEATED-END;
repeated             = [NONDETERMINISTIC]
                     , !!REPEATED-START, REPEATED
                     , _, _expression, _
                     , !!REPEATED-END, REPEATED;

GROUP-START          = "(";
GROUP-END            = ")";
GROUP                = GROUP-START | GROUP-END;
group                = !!GROUP-START, GROUP
                     , _, _expression, _
                     , !!GROUP-END, GROUP;

embedded-rule        = _identifier;

_literal             = char-range | exception | string;
_group               = optional | repeated | group | embedded-rule;
operand              = _group | _literal | rejection | committed;

_expression          = alternation;


(* RULES *)

DEFINITION           = "=";
TERMINATION          = ";";
RULE                 = DEFINITION | TERMINATION;
rule-name            = _identifier;
rule                 = rule-name, _
                     , !!DEFINITION, RULE
                     , _, _expression, _
                     , !!TERMINATION, RULE;

(* This is the root rule of the grammar. *)
grammar              = _, {rule, _};