-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser_combinator.js
132 lines (126 loc) · 2.85 KB
/
parser_combinator.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// A very simple parser combinator library.
//
// The idea is to build complex parsers using very simple building blocks.
// Everything is done in code, which removes the need to learn a meta-parsing
// language.
//
// A parser is defined as a function which takes an input and either returns
// [false, input] or returns [token, remaining input].
//
// The simple building blocks take parsers and return a function
// (which is a parser).
//
// Another nice thing about parser combinators is that there is no need for a
// separate lexer. We do need to take whitespace into account explicitly.
//
// See parser_combinator_test.js for sample usage.
//
// Some useful reading
// * https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
// * https://wiki.haskell.org/Parsec
// * https://hackage.haskell.org/package/parsec-3.1.9/docs/src/Text-Parsec-Combinator.html
/**
* Returns the result from the first parser which succeeds.
*/
var any = function(parsers) {
return input => {
for (var i=0; i<parsers.length; i++) {
var t = parsers[i](input);
if (t[0]) {
return t;
}
}
return [false, input];
};
};
exports.any = any;
/**
* Returns a success if all the parser succeed
*/
var seq = function(parsers) {
return input => {
var r = [];
var input_ = input;
for (var i=0; i<parsers.length; i++) {
var t = parsers[i](input_);
if (!t[0]) {
return [false, input];
}
if (t[0] !== true) {
r.push(t[0]);
}
input_ = t[1];
}
return [r, input_];
};
};
exports.seq = seq;
/**
* Applies the parser as many times as possible. Useful for parsing
* sequences. Always succeeds.
*/
var rep = function(parser) {
return input => {
var r = [];
var input_ = input;
while (1) {
var t = parser(input_);
if (!t[0]) {
return [r, input_];
}
if (t[0] !== true) {
r.push(t[0]);
}
input_ = t[1];
}
};
};
exports.rep = rep;
/**
* Tries to apply the parser. Always succeeds.
*/
var opt = function(parser) {
return input => {
var t = parser(input);
if (t[0]) {
return t;
}
return [true, input];
};
};
exports.opt = opt;
/**
* Useful for parsing comma delimited sequences.
*/
var repsep = function(parser, sep) {
return input => {
var t = seq([parser, rep(seq([sep, parser]))])(input);
if (!t[0]) {
return t;
}
// convert [1 [[, 2] [, 3] ...]] into [1 2 3 4]
var u = t[0];
var v = u[1].map(e => e[1]);
v.unshift(u[0]);
return [v, t[1]];
};
};
exports.repsep = repsep;
/**
* Parses using a regexp.
*/
var re = function(re) {
return input => {
var re_ = re;
if (re_.source[0] !== '^') {
// prepend ^ to force match at the beginning of the string.
re_ = new RegExp('^' + re_.source);
}
var t = re_.exec(input);
if (!t) {
return [false, input];
}
return [t[0], input.substr(t[0].length)];
};
};
exports.re = re;