7 files changed, 136 insertions, 50 deletions
diff --git a/README b/README
index 37b2701..23d231b 100644
--- a/README
+++ b/README
@@ -76,11 +76,14 @@ EXTENDED DESCRIPTION
 		_low             = character | integer;
 		_high            = character | integer;
 
+		nondeterministic = "?";
+
+		committed        = "+", _, _operand;
 		rejection        = "!", _, _operand;
 		concatenation    = _operand, {_, ",", _, _operand};
-		alternation      = concatenation, {_, "|", _, concatenation};
-		optional         = "[", _, _expression, _, "]";
-		repeated         = "{", _, _expression, _, "}";
+		alternation      = concatenation, {_, [nondeterministic], "|", _, concatenation};
+		optional         = [nondeterministic], "[", _, _expression, _, "]";
+		repeated         = [nondeterministic], "{", _, _expression, _, "}";
 		group            = "(", _, _expression, _, ")";
 		char-range       = "<", _, _low, _, ",", _, _high, "_", ">";
 		exception        = "-";
@@ -88,7 +91,7 @@ EXTENDED DESCRIPTION
 
 		_literal         = char-range | exception | string;
 		_group           = optional | repeated | group | embedded-rule;
-		_operand         = _group | _literal | rejection;
+		_operand         = _group | _literal | rejection | committed;
 
 		_expression      = alternation;
 
@@ -109,12 +112,24 @@ EXTENDED DESCRIPTION
 	reached, the parser will terminate there.
 
 	Repeated symbols may occur any number of times, including
-	zero. The compiler is able to backtrack if it takes too much.
+	zero. The parser will try to take as much as as possible.
+
+	Optional symbols are taken whenever possible.
+
+	Concatenation has higher precedence than alternation, groups
+	("(", ..., ")") have no semantic meaning and are useful only
+	for including alternations inside concatenations or put
+	alternations or concatenations inside a commitment or
+	rejection.
 
-	Concatenation has higher precedence than alternation,
-	groups ("(", ..., ")") have no semantic meaning and are useful
-	only to put a alternation inside a concatenation without
-	creating a new rule for that.
+	The parser has the ability to be non-deterministic, which
+	can make it really slow. The speed up parsing, you can add
+	commits. Once the committed sentence has been matched, the
+	branching-points inside it are unrecorded, so when the parser
+	backtracks, it will not try different choices inside the
+	committed sentence. Commit sentences are undone by the parser
+	when it backtracks to a branching-point outside the committed
+	sentence.
 
 	In character ranges, the _high and _low values must be at
 	least 0 and at most 255, and _high must be greater than _low.
@@ -125,7 +140,9 @@ EXTENDED DESCRIPTION
 
 	Left recursion is illegal (it will cause stack overflow at
 	runtime as the empty condition before the recursion is always
-	met).
+	met). Likewise, repeated optional sentences are illegal; a
+	repeated sentence must always consume input, otherwise it
+	gets stuck.
 
     Right-context-sensitive grammar
 	libparser originally used context-free grammar, but with
diff --git a/calc-example/calc.syntax b/calc-example/calc.syntax
index d76b68b..fb5a871 100644
--- a/calc-example/calc.syntax
+++ b/calc-example/calc.syntax
@@ -15,16 +15,16 @@ DIV         = _, ("/" | "∕" | "÷"), _;
 sign        = ADD | SUB;
 
 _digit      = DIGIT | _WHITESPACE | "_" | "'";
-unsigned    = DIGIT, +({_digit}, !_digit);
+unsigned    = DIGIT, {_digit}, !_digit;
 
 _number     = unsigned | "(", _expr, (")" | -);
 
-number      = _number, +{_, _number}; (* optionally with implicit multiplication *)
+number      = _number, {_, _number}; (* optionally with implicit multiplication *)
 
 value       = [sign], number;
 
 _expr       = hyper1;
 
 
-hyper1      = _, hyper2, {+(ADD | SUB), +(hyper2 | -)}, _;
-hyper2      = _, value, {+(MUL | DIV), +(value | -)}, _;
+hyper1      = _, hyper2, {(ADD | SUB), (hyper2 | -)}, _;
+hyper2      = _, value, {(MUL | DIV), (value | -)}, _;
diff --git a/libparser-generate.c b/libparser-generate.c
index a8761f8..0a91835 100644
--- a/libparser-generate.c
+++ b/libparser-generate.c
@@ -38,6 +38,7 @@ struct node {
 	struct node *next; /* next element in list */
 	struct node *data; /* beginning of subsentence */
 	struct node **head; /* end of subsentence */
+	int nondeterministic;
 };
 
 
@@ -65,15 +66,6 @@ emalloc(size_t n)
 }
 
 static void *
-ecalloc(size_t n, size_t m)
-{
-	void *ret = calloc(n, m);
-	if (!ret)
-		eprintf("%s: calloc %zu %zu: %s\n", argv0, n, m, strerror(errno));
-	return ret;
-}
-
-static void *
 erealloc(void *ptr, size_t n)
 {
 	void *ret = realloc(ptr, n);
@@ -104,6 +96,20 @@ estrdup(char *s)
 }
 
 
+static struct node *
+new_node(void)
+{
+	struct node *ret = emalloc(sizeof(*ret));
+	ret->token = NULL;
+	ret->parent = NULL;
+	ret->next = NULL;
+	ret->data = NULL;
+	ret->head = NULL;
+	ret->nondeterministic = 0;
+	return ret;
+}
+
+
 static int
 strpcmp(const void *av, const void *bv)
 {
@@ -120,6 +126,7 @@ isidentifier(char c)
 }
 
 
+
 static int
 check_utf8(char *buf, size_t *ip, size_t len)
 {
@@ -379,10 +386,11 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 	case '!': type = "REJECTION"; unary:
 		emit_and_free_sentence(node->data, indexp);
 		printf("static union libparser_sentence sentence_%zu_%zu = {.unary = {"
-		           ".type = LIBPARSER_SENTENCE_TYPE_%s, .sentence = &sentence_%zu_%zu"
+		           ".type = LIBPARSER_SENTENCE_TYPE_%s%s, .sentence = &sentence_%zu_%zu"
 		       "}};\n",
 		       nrule_names, index,
-		       type, nrule_names, index + 1u);
+		       node->nondeterministic ? "ND_" : "", type,
+		       nrule_names, index + 1u);
 		break;
 
 	case '<':
@@ -412,11 +420,11 @@ emit_and_free_sentence(struct node *node, size_t *indexp)
 		left = *indexp;
 		emit_and_free_sentence(node->data, indexp);
 		printf("static union libparser_sentence sentence_%zu_%zu = {.binary = {"
-		           ".type = LIBPARSER_SENTENCE_TYPE_%s, "
+		           ".type = LIBPARSER_SENTENCE_TYPE_%s%s, "
 		           ".left = &sentence_%zu_%zu, .right = &sentence_%zu_%zu"
 		       "}};\n",
 		       nrule_names, index,
-		       type,
+		       node->nondeterministic ? "ND_" : "", type,
 		       nrule_names, left, nrule_names, right);
 		break;
 
@@ -587,7 +595,7 @@ main(int argc, char *argv[])
 	struct node *stack = NULL, *parent_node, *node;
 	char *data;
 	struct token **tokens;
-	size_t i, j;
+	size_t i, j, nondeterministic;
 	int cmp, err, val;
 
 	if (argc) {
@@ -613,6 +621,7 @@ main(int argc, char *argv[])
 
 	printf("#include <libparser.h>\n");
 
+	nondeterministic = SIZE_MAX;
 	i = 0;
 again:
 	for (; tokens[i]; i++) {
@@ -635,7 +644,7 @@ again:
 		}
 
 		/* Also remove any whitespace (the tokeniser
-		 * simple and does not recognise mulltisymbol
+		 * simple and does not recognise multisymbol
 		 * tokens (that is apart form strings and
 		 * identifiers) so it cannot ignore whitespace. */
 		if (isspace(tokens[i]->s[0])) {
@@ -643,6 +652,16 @@ again:
 			continue;
 		}
 
+		/* Check if next token is non-deterministic */
+		if (tokens[i]->s[0] == '?') {
+			if (tokens[i + 1u]->s[0] == '[' ||
+			    tokens[i + 1u]->s[0] == '{' ||
+			    tokens[i + 1u]->s[0] == '|') {
+				nondeterministic = i + 1u;
+				continue;
+			}
+		}
+
 		/* For the sake of code readability, identify
 		 * the token type */
 		if (tokens[i]->s[0] == '"') {
@@ -661,7 +680,8 @@ again:
 				eprintf("%s: expected an identifier on line %zu at column %zu (character %zu)\n",
 				        argv0, tokens[i]->lineno, tokens[i]->column, tokens[i]->character);
 			}
-			stack = calloc(1, sizeof(*stack));
+			stack = new_node();
+			stack->nondeterministic = i == nondeterministic;
 			stack->token = tokens[i];
 			stack->head = &stack->data;
 			/* and then we expect an equals sign */
@@ -715,7 +735,8 @@ again:
 					state = EXPECT_RANGE_LOW;
 				push_stack:
 					parent_node = stack;
-					stack = ecalloc(1, sizeof(*stack));
+					stack = new_node();
+					stack->nondeterministic = i == nondeterministic;
 					stack->parent = parent_node;
 					stack->token = tokens[i];
 					stack->head = &stack->data;
@@ -756,7 +777,8 @@ again:
 				 * token to be an operand */
 				state = EXPECT_OPERAND;
 			add_singleton:
-				node = calloc(1u, sizeof(*node));
+				node = new_node();
+				node->nondeterministic = i == nondeterministic;
 				node->token = tokens[i];
 				*stack->head = node;
 				stack->head = &node->next;
diff --git a/libparser.7 b/libparser.7
index 2d921a7..c50b5c9 100644
--- a/libparser.7
+++ b/libparser.7
@@ -88,12 +88,14 @@ integer          = _decimal | _hexadecimal; (* May not exceed 255. *)
 _low             = character | integer;
 _high            = character | integer;
 
+nondeterministic = \(dq?\(dq;
+
 committed        = \(dq+\(dq, _, _operand;
 rejection        = \(dq!\(dq, _, _operand;
 concatenation    = _operand, {_, \(dq,\(dq, _, _operand};
-alternation      = concatenation, {_, \(dq|\(dq, _, concatenation};
-optional         = \(dq[\(dq, _, _expression, _, \(dq]\(dq;
-repeated         = \(dq{\(dq, _, _expression, _, \(dq}\(dq;
+alternation      = concatenation, {_, [nondeterministic], \(dq|\(dq, _, concatenation};
+optional         = [nondeterministic], \(dq[\(dq, _, _expression, _, \(dq]\(dq;
+repeated         = [nondeterministic], \(dq{\(dq, _, _expression, _, \(dq}\(dq;
 group            = \(dq(\(dq, _, _expression, _, \(dq)\(dq;
 char-range       = \(dq<\(dq, _, _low, _, \(dq,\(dq, _, _high, \(dq_\(dq, \(dq>\(dq;
 exception        = \(dq-\(dq;
@@ -132,18 +134,18 @@ Optional symbols are taken whenever possible.
 Concatenation has higher precedence than alternation,
 groups
 .RB (\(dq ( "\(dq, ..., \(dq" ) \(dq)
-have no semantic meaning and are useful only to put a
-alternation inside a concatenation without creating a
-new rule for that.
+have no semantic meaning and are useful only for including
+alternations inside concatenations or put alternations
+or concatenations inside a commitment or rejection.
 .PP
-The parser is non-deterministic, which can make it really
-slow. The speed up parsing, you can add commits. Once the
-committed sentence has been matched, the branching-points
-inside it are unrecorded, so when the parser backtracks,
-it will not try different choices inside the committed
-sentence. Commit sentences are undone by the parser when
-it backtracks to a branching-point outside the committed
-sentence.
+The parser has the ability to be non-deterministic, which
+can make it really slow. The speed up parsing, you can add
+commits. Once the committed sentence has been matched,
+the branching-points inside it are unrecorded, so when
+the parser backtracks, it will not try different choices
+inside the committed sentence. Commit sentences are undone
+by the parser when it backtracks to a branching-point
+outside the committed sentence.
 .PP
 In character ranges, the
 .B _high
diff --git a/libparser.c b/libparser.c
index 7ef7267..5ecb7ee 100644
--- a/libparser.c
+++ b/libparser.c
@@ -267,6 +267,9 @@ sentence_type_string(enum libparser_sentence_type type)
 	case LIBPARSER_SENTENCE_TYPE_EOF: return "EOF";
 	case LIBPARSER_SENTENCE_TYPE_EPSILON: return "EPSILON";
 	case LIBPARSER_SENTENCE_TYPE_COMMITTED: return "COMMITTED";
+	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL: return "ND_OPTIONAL";
+	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED: return "ND_REPEATED";
+	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION: return "ND_ALTERNATION";
 	default: return "<invalid>";
 	}
 }
@@ -375,6 +378,15 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
+		fprintf(stderr, "(");
+		print_sentence(sentence->binary.left, indent + 1);
+		fprintf(stderr, " ?| \n%*.s", indent + 1, "");
+		indent = print_sentence(sentence->binary.right, indent + 1);
+		fprintf(stderr, ")");
+		indent += 1;
+		break;
+
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 		fprintf(stderr, "(");
 		print_sentence(sentence->binary.left, indent + 1);
@@ -398,6 +410,9 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
+		fprintf(stderr, "?");
+		/* fall through */
 	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
 		fprintf(stderr, "[");
 		indent = print_sentence(sentence->unary.sentence, indent + 1);
@@ -405,6 +420,9 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
+		fprintf(stderr, "?");
+		/* fall through */
 	case LIBPARSER_SENTENCE_TYPE_REPEATED:
 		fprintf(stderr, "{");
 		indent = print_sentence(sentence->unary.sentence, indent + 1);
@@ -827,6 +845,7 @@ add_unit(const char *rule, const union libparser_sentence *sentence, struct cont
 #endif
 
 	switch (sentence->type) {
+	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 		if (braching(rule, sentence, start, ctx, &choice))
 			return -1;
@@ -837,6 +856,7 @@ add_unit(const char *rule, const union libparser_sentence *sentence, struct cont
 			return -1;
 		return 1;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
 	case LIBPARSER_SENTENCE_TYPE_REPEATED:
 		if (braching(rule, sentence, start, ctx, &choice))
 			return -1;
@@ -860,6 +880,7 @@ add_unit(const char *rule, const union libparser_sentence *sentence, struct cont
 		}
 		return 1;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
 	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
 		if (braching(rule, sentence, start, ctx, &choice))
 			return -1;
@@ -1005,7 +1026,9 @@ make_tree(struct context *ctx, size_t *unit_i, struct libparser_unit **last)
 
 	switch (unit->sentence->type) {
 	case LIBPARSER_SENTENCE_TYPE_RULE:
+	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
 	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
+	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 	case LIBPARSER_SENTENCE_TYPE_COMMITTED:
 #if PRINT_ACTIONS
@@ -1033,6 +1056,7 @@ make_tree(struct context *ctx, size_t *unit_i, struct libparser_unit **last)
 		*last = unit;
 		goto out;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
 	case LIBPARSER_SENTENCE_TYPE_REPEATED:
 	case LIBPARSER_SENTENCE_TYPE_CONCATENATION:
 #if PRINT_ACTIONS
@@ -1179,7 +1203,6 @@ libparser_parse_file(const struct libparser_rule *const rules[], const char *dat
 #endif
 
 	/* TODO guard against left-side recursion */
-	/* TODO make branching opt-in ("?") */
 	/* TODO break REPEATED on empty match */
 followup:
 #if PRINT_ACTIONS
@@ -1249,9 +1272,13 @@ followup:
 			}
 			if (!match) 
 				goto mismatch;
-			if (sentence->type == LIBPARSER_SENTENCE_TYPE_COMMITTED) {
+			if (sentence->type == LIBPARSER_SENTENCE_TYPE_COMMITTED ||
+			    sentence->type == LIBPARSER_SENTENCE_TYPE_ALTERNATION ||
+			    sentence->type == LIBPARSER_SENTENCE_TYPE_OPTIONAL ||
+			    sentence->type == LIBPARSER_SENTENCE_TYPE_REPEATED) {
 #if PRINT_ACTIONS
-				fprintf(stderr, "Removing interior choices for matched COMMITTED\n");
+				fprintf(stderr, "Removing interior choices for matched %s\n",
+				        sentence_type_string(sentence->type));
 #endif
 				j = ctx.followups.followups[i].unit_index;
 				while (ctx.choices.count &&
diff --git a/libparser.h b/libparser.h
index 66bedea..54e3829 100644
--- a/libparser.h
+++ b/libparser.h
@@ -21,7 +21,10 @@ enum libparser_sentence_type {
 	LIBPARSER_SENTENCE_TYPE_EXCEPTION,     /* (none) */
 	LIBPARSER_SENTENCE_TYPE_EOF,           /* (none) */
 	LIBPARSER_SENTENCE_TYPE_EPSILON,       /* (none) */
-	LIBPARSER_SENTENCE_TYPE_COMMITTED      /* .unary */
+	LIBPARSER_SENTENCE_TYPE_COMMITTED,     /* .unary */
+	LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL,   /* .unary */
+	LIBPARSER_SENTENCE_TYPE_ND_REPEATED,   /* .unary */
+	LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION /* .binary */
 };
 
 struct libparser_sentence_binary {
diff --git a/print-syntax.c b/print-syntax.c
index 3228401..07b3858 100644
--- a/print-syntax.c
+++ b/print-syntax.c
@@ -26,6 +26,15 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_ALTERNATION:
+		printf("(");
+		print_sentence(sentence->binary.left, indent + 1);
+		printf(" ?| \n%*.s", indent + 1, "");
+		indent = print_sentence(sentence->binary.right, indent + 1);
+		printf(")");
+		indent += 1;
+		break;
+
 	case LIBPARSER_SENTENCE_TYPE_ALTERNATION:
 		printf("(");
 		print_sentence(sentence->binary.left, indent + 1);
@@ -49,6 +58,9 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_OPTIONAL:
+		printf("?");
+		/* fall through */
 	case LIBPARSER_SENTENCE_TYPE_OPTIONAL:
 		printf("[");
 		indent = print_sentence(sentence->unary.sentence, indent + 1);
@@ -56,6 +68,9 @@ print_sentence(const union libparser_sentence *sentence, int indent)
 		indent += 1;
 		break;
 
+	case LIBPARSER_SENTENCE_TYPE_ND_REPEATED:
+		printf("?");
+		/* fall through */
 	case LIBPARSER_SENTENCE_TYPE_REPEATED:
 		printf("{");
 		indent = print_sentence(sentence->unary.sentence, indent + 1);