1 | # fastparse
|
2 |
|
3 | A very simple and stupid parser, based on a statemachine and regular expressions.
|
4 |
|
5 | It's not intended for complex languages. It's intended to easily write a simple parser for a simple language.
|
6 |
|
7 |
|
8 |
|
9 | ## Usage
|
10 |
|
11 | Pass a description of statemachine to the constructor. The description must be in this form:
|
12 |
|
13 | ``` javascript
|
14 | new Parser(description)
|
15 |
|
16 | description is {
|
17 | // The key is the name of the state
|
18 | // The value is an object containing possible transitions
|
19 | "state-name": {
|
20 | // The key is a regular expression
|
21 | // If the regular expression matches the transition is executed
|
22 | // The value can be "true", a other state name or a function
|
23 |
|
24 | "a": true,
|
25 | // true will make the parser stay in the current state
|
26 |
|
27 | "b": "other-state-name",
|
28 | // a string will make the parser transit to a new state
|
29 |
|
30 | "[cde]": function(match, index, matchLength) {
|
31 | // "match" will be the matched string
|
32 | // "index" will be the position in the complete string
|
33 | // "matchLength" will be "match.length"
|
34 |
|
35 | // "this" will be the "context" passed to the "parse" method"
|
36 |
|
37 | // A new state name (string) can be returned
|
38 | return "other-state-name";
|
39 | },
|
40 |
|
41 | "([0-9]+)(\\.[0-9]+)?": function(match, first, second, index, matchLength) {
|
42 | // groups can be used in the regular expression
|
43 | // they will match to arguments "first", "second"
|
44 | },
|
45 |
|
46 | // the parser stops when it cannot match the string anymore
|
47 |
|
48 | // order of keys is the order in which regular expressions are matched
|
49 | // if the javascript runtime preserves the order of keys in an object
|
50 | // (this is not standardized, but it's a de-facto standard)
|
51 | }
|
52 | }
|
53 | ```
|
54 |
|
55 | The statemachine is compiled down to a single regular expression per state. So basically the parsing work is delegated to the (native) regular expression logic of the javascript runtime.
|
56 |
|
57 |
|
58 | ``` javascript
|
59 | Parser.prototype.parse(initialState: String, parsedString: String, context: Object)
|
60 | ```
|
61 |
|
62 | `initialState`: state where the parser starts to parse.
|
63 |
|
64 | `parsedString`: the string which should be parsed.
|
65 |
|
66 | `context`: an object which can be used to save state and results. Available as `this` in transition functions.
|
67 |
|
68 | returns `context`
|
69 |
|
70 |
|
71 |
|
72 |
|
73 | ## Example
|
74 |
|
75 | ``` javascript
|
76 | var Parser = require("fastparse");
|
77 |
|
78 | // A simple parser that extracts @licence ... from comments in a JS file
|
79 | var parser = new Parser({
|
80 | // The "source" state
|
81 | "source": {
|
82 | // matches comment start
|
83 | "/\\*": "comment",
|
84 | "//": "linecomment",
|
85 |
|
86 | // this would be necessary for a complex language like JS
|
87 | // but omitted here for simplicity
|
88 | // "\"": "string1",
|
89 | // "\'": "string2",
|
90 | // "\/": "regexp"
|
91 |
|
92 | },
|
93 | // The "comment" state
|
94 | "comment": {
|
95 | "\\*/": "source",
|
96 | "@licen[cs]e\\s((?:[^*\n]|\\*+[^*/\n])*)": function(match, licenseText) {
|
97 | this.licences.push(licenseText.trim());
|
98 | }
|
99 | },
|
100 | // The "linecomment" state
|
101 | "linecomment": {
|
102 | "\n": "source",
|
103 | "@licen[cs]e\\s(.*)": function(match, licenseText) {
|
104 | this.licences.push(licenseText.trim());
|
105 | }
|
106 | }
|
107 | });
|
108 |
|
109 | var licences = parser.parse("source", sourceCode, { licences: [] }).licences;
|
110 |
|
111 | console.log(licences);
|
112 | ```
|
113 |
|
114 |
|
115 |
|
116 | ## License
|
117 |
|
118 | MIT (http://www.opensource.org/licenses/mit-license.php)
|