UNPKG

9.05 kBtext/coffeescriptView Raw
1fs = require 'fs'
2{EventEmitter} = require 'events'
3{pointToErrorLocation} = require './helpers'
4StringScanner = require 'StringScanner'
5
6inspect = (o) -> (require 'util').inspect o, no, 9e9, yes
7
8
9# TODO: better comments
10# TODO: support win32-style line endings
11
12@Preprocessor = class Preprocessor extends EventEmitter
13
14 ws = '\\t\\x0B\\f\\r \\xA0\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000\\uFEFF'
15 INDENT = '\uEFEF'
16 DEDENT = '\uEFFE'
17 TERM = '\uEFFF'
18
19 constructor: ->
20 # `base` is either `null` or a regexp that matches the base indentation
21 @base = null
22 # `indents` is an array of successive indentation characters.
23 @indents = []
24 @context = []
25 @ss = new StringScanner ''
26
27 err: (c) ->
28 token =
29 switch c
30 when INDENT
31 'INDENT'
32 when DEDENT
33 'DEDENT'
34 when TERM
35 'TERM'
36 else
37 inspect c
38 # This isn't perfect for error location tracking, but since we normally call this after a scan, it tends to work well.
39 lines = @ss.str.substr(0, @ss.pos).split(/\n/) || ['']
40 columns = if lines[lines.length-1]? then lines[lines.length-1].length else 0
41 context = pointToErrorLocation @ss.str, lines.length, columns
42 throw new Error "Unexpected #{token}\n#{context}"
43
44 peek: -> if @context.length then @context[@context.length - 1] else null
45
46 observe: (c) ->
47 top = @peek()
48 switch c
49 # opening token is closing token
50 when '"""', '\'\'\'', '"', '\'', '###', '`', '///', '/'
51 if top is c then do @context.pop
52 else @context.push c
53 # strictly opening tokens
54 when INDENT, '#', '#{', '[', '(', '{', '\\', 'regexp-[', 'regexp-(', 'regexp-{', 'heregexp-#', 'heregexp-[', 'heregexp-(', 'heregexp-{'
55 @context.push c
56 # strictly closing tokens
57 when DEDENT
58 (@err c) unless top is INDENT
59 do @context.pop
60 when '\n'
61 (@err c) unless top in ['#', 'heregexp-#']
62 do @context.pop
63 when ']'
64 (@err c) unless top in ['[', 'regexp-[', 'heregexp-[']
65 do @context.pop
66 when ')'
67 (@err c) unless top in ['(', 'regexp-(', 'heregexp-(']
68 do @context.pop
69 when '}'
70 (@err c) unless top in ['#{', '{', 'regexp-{', 'heregexp-{']
71 do @context.pop
72 when 'end-\\'
73 (@err c) unless top is '\\'
74 do @context.pop
75 else throw new Error "undefined token observed: " + c
76 @context
77
78 p: (s) ->
79 if s? then @emit 'data', s
80 s
81
82 scan: (r) -> @p @ss.scan r
83
84 processInput = (isEnd) -> (data) ->
85 @ss.concat data unless isEnd
86
87 until @ss.eos()
88 switch @peek()
89 when null, INDENT, '#{', '[', '(', '{'
90 if @ss.bol() or @scan /// (?:[#{ws}]* \n)+ ///
91
92 @scan /// (?: [#{ws}]* (\#\#?(?!\#)[^\n]*)? \n )+ ///
93
94 # we might require more input to determine indentation
95 return if not isEnd and (@ss.check /// [#{ws}\n]* $ ///)?
96
97 # consume base indentation
98 if @base?
99 unless (@scan @base)?
100 throw new Error "inconsistent base indentation"
101 else
102 @base = /// #{@scan /// [#{ws}]* ///} ///
103
104 # move through each level of indentation
105 indentIndex = 0
106 while indentIndex < @indents.length
107 indent = @indents[indentIndex]
108 if @ss.check /// #{indent} ///
109 # an existing indent
110 @scan /// #{indent} ///
111 else if @ss.check /// [^#{ws}] ///
112 # we lost an indent
113 @indents.splice indentIndex, 1
114 --indentIndex
115 @observe DEDENT
116 @p "#{DEDENT}#{TERM}"
117 else
118 # Some ambiguous dedent
119 lines = @ss.str.substr(0, @ss.pos).split(/\n/) || ['']
120 message = "Syntax error on line #{lines.length}: indention is ambiguous"
121 lineLen = @indents.reduce ((l, r) -> l + r.length), 0
122 context = pointToErrorLocation @ss.str, lines.length, lineLen
123 throw new Error "#{message}\n#{context}"
124 ++indentIndex
125 if @ss.check /// [#{ws}]+ [^#{ws}#] ///
126 # an indent
127 @indents.push @scan /// [#{ws}]+ ///
128 @observe INDENT
129 @p INDENT
130
131 tok = switch @peek()
132 when '['
133 # safe things, but not closing bracket
134 @scan /[^\n'"\\\/#`[({\]]+/
135 @scan /\]/
136 when '('
137 # safe things, but not closing paren
138 @scan /[^\n'"\\\/#`[({)]+/
139 @scan /\)/
140 when '#{', '{'
141 # safe things, but not closing brace
142 @scan /[^\n'"\\\/#`[({}]+/
143 @scan /\}/
144 else
145 # scan safe characters (anything that doesn't *introduce* context)
146 @scan /[^\n'"\\\/#`[({]+/
147 null
148 if tok
149 @observe tok
150 continue
151
152 if tok = @scan /"""|'''|\/\/\/|###|["'`#[({\\]/
153 @observe tok
154 else if tok = @scan /\//
155 # unfortunately, we must look behind us to determine if this is a regexp or division
156 pos = @ss.position()
157 if pos > 1
158 lastChar = @ss.string()[pos - 2]
159 spaceBefore = ///[#{ws}]///.test lastChar
160 nonIdentifierBefore = /[\W_$]/.test lastChar # TODO: this should perform a real test
161 if pos is 1 or (if spaceBefore then not @ss.check /// [#{ws}=] /// else nonIdentifierBefore)
162 @observe '/'
163 when '\\'
164 if (@scan /[\s\S]/) then @observe 'end-\\'
165 # TODO: somehow prevent indent tokens from being inserted after these newlines
166 when '"""'
167 @scan /(?:[^"#\\]+|""?(?!")|#(?!{)|\\.)+/
168 @ss.scan /\\\n/
169 if tok = @scan /#{|"""/ then @observe tok
170 else if tok = @scan /#{|"""/ then @observe tok
171 when '"'
172 @scan /(?:[^"#\\]+|#(?!{)|\\.)+/
173 @ss.scan /\\\n/
174 if tok = @scan /#{|"/ then @observe tok
175 when '\'\'\''
176 @scan /(?:[^'\\]+|''?(?!')|\\.)+/
177 @ss.scan /\\\n/
178 if tok = @scan /'''/ then @observe tok
179 when '\''
180 @scan /(?:[^'\\]+|\\.)+/
181 @ss.scan /\\\n/
182 if tok = @scan /'/ then @observe tok
183 when '###'
184 @scan /(?:[^#]+|##?(?!#))+/
185 if tok = @scan /###/ then @observe tok
186 when '#'
187 @scan /[^\n]+/
188 if tok = @scan /\n/ then @observe tok
189 when '`'
190 @scan /[^`]+/
191 if tok = @scan /`/ then @observe tok
192 when '///'
193 @scan /(?:[^[/#\\]+|\/\/?(?!\/)|\\.)+/
194 if tok = @scan /#{|\/\/\/|\\/ then @observe tok
195 else if @ss.scan /#/ then @observe 'heregexp-#'
196 else if tok = @scan /[\[]/ then @observe "heregexp-#{tok}"
197 when 'heregexp-['
198 @scan /(?:[^\]\/\\]+|\/\/?(?!\/))+/
199 if tok = @scan /[\]\\]|#{|\/\/\// then @observe tok
200 when 'heregexp-#'
201 @ss.scan /(?:[^\n/]+|\/\/?(?!\/))+/
202 if tok = @scan /\n|\/\/\// then @observe tok
203 #when 'heregexp-('
204 # @scan /(?:[^)/[({#\\]+|\/\/?(?!\/))+/
205 # if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'
206 # else if tok = @scan /[)\\]|#{|\/\/\// then @observe tok
207 # else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"
208 #when 'heregexp-{'
209 # @scan /(?:[^}/[({#\\]+|\/\/?(?!\/))+/
210 # if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'
211 # else if tok = @scan /[}/\\]|#{|\/\/\// then @observe tok
212 # else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"
213 when '/'
214 @scan /[^[/\\]+/
215 if tok = @scan /[\/\\]/ then @observe tok
216 else if tok = @scan /\[/ then @observe "regexp-#{tok}"
217 when 'regexp-['
218 @scan /[^\]\\]+/
219 if tok = @scan /[\]\\]/ then @observe tok
220 #when 'regexp-('
221 # @scan /[^)/[({\\]+/
222 # if tok = @scan /[)/\\]/ then @observe tok
223 # else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"
224 #when 'regexp-{'
225 # @scan /[^}/[({\\]+/
226 # if tok = @scan /[}/\\]/ then @observe tok
227 # else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"
228
229 # reached the end of the file
230 if isEnd
231 @scan /// [#{ws}\n]* $ ///
232 while @context.length and INDENT is @peek()
233 @observe DEDENT
234 @p "#{DEDENT}#{TERM}"
235 if @context.length
236 # TODO: store offsets of tokens when inserted and report position of unclosed starting token
237 throw new Error 'Unclosed ' + (inspect @peek()) + ' at EOF'
238 @emit 'end'
239 return
240
241 return
242
243 processData: processInput no
244 processEnd: processInput yes
245 @processSync = (input) ->
246 pre = new Preprocessor
247 output = ''
248 pre.emit = (type, data) -> output += data if type is 'data'
249 pre.processData input
250 do pre.processEnd
251 output