UNPKG

coffee-script-redux/src/preprocessor.coffee

Version:

9.05 kBtext/coffeescriptView Raw

1fs = require 'fs'
2{EventEmitter} = require 'events'
3{pointToErrorLocation} = require './helpers'
4StringScanner = require 'StringScanner'
5
6inspect = (o) -> (require 'util').inspect o, no, 9e9, yes
7
8
9# TODO: better comments
10# TODO: support win32-style line endings
11
12@Preprocessor = class Preprocessor extends EventEmitter
13
ws = '\\t\\x0B\\f\\r \\xA0\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000\\uFEFF'
INDENT = '\uEFEF'
DEDENT = '\uEFFE'
TERM   = '\uEFFF'
18
constructor: ->
  # `base` is either `null` or a regexp that matches the base indentation
  @base = null
  # `indents` is an array of successive indentation characters.
  @indents = []
  @context = []
  @ss = new StringScanner ''
26
err: (c) ->
  token =
    switch c
      when INDENT
        'INDENT'
      when DEDENT
        'DEDENT'
      when TERM
        'TERM'
      else
        inspect c
  # This isn't perfect for error location tracking, but since we normally call this after a scan, it tends to work well.
  lines = @ss.str.substr(0, @ss.pos).split(/\n/) || ['']
  columns = if lines[lines.length-1]? then lines[lines.length-1].length else 0
  context = pointToErrorLocation @ss.str, lines.length, columns
  throw new Error "Unexpected #{token}\n#{context}"
43
peek: -> if @context.length then @context[@context.length - 1] else null
45
observe: (c) ->
  top = @peek()
  switch c
    # opening token is closing token
    when '"""', '\'\'\'', '"', '\'', '###', '`', '///', '/'
      if top is c then do @context.pop
      else @context.push c
    # strictly opening tokens
    when INDENT, '#', '#{', '[', '(', '{', '\\', 'regexp-[', 'regexp-(', 'regexp-{', 'heregexp-#', 'heregexp-[', 'heregexp-(', 'heregexp-{'
      @context.push c
    # strictly closing tokens
    when DEDENT
      (@err c) unless top is INDENT
      do @context.pop
    when '\n'
      (@err c) unless top in ['#', 'heregexp-#']
      do @context.pop
    when ']'
      (@err c) unless top in ['[', 'regexp-[', 'heregexp-[']
      do @context.pop
    when ')'
      (@err c) unless top in ['(', 'regexp-(', 'heregexp-(']
      do @context.pop
    when '}'
      (@err c) unless top in ['#{', '{', 'regexp-{', 'heregexp-{']
      do @context.pop
    when 'end-\\'
      (@err c) unless top is '\\'
      do @context.pop
    else throw new Error "undefined token observed: " + c
  @context
77
p: (s) ->
  if s? then @emit 'data', s
  s
81
scan: (r) -> @p @ss.scan r
83
processInput = (isEnd) -> (data) ->
  @ss.concat data unless isEnd
86
  until @ss.eos()
    switch @peek()
      when null, INDENT, '#{', '[', '(', '{'
        if @ss.bol() or @scan /// (?:[#{ws}]* \n)+ ///
91
          @scan /// (?: [#{ws}]* (\#\#?(?!\#)[^\n]*)? \n )+ ///
93
          # we might require more input to determine indentation
          return if not isEnd and (@ss.check /// [#{ws}\n]* $ ///)?
96
          # consume base indentation
          if @base?
            unless (@scan @base)?
              throw new Error "inconsistent base indentation"
          else
            @base = /// #{@scan /// [#{ws}]* ///} ///
103
          # move through each level of indentation
          indentIndex = 0
          while indentIndex < @indents.length
            indent = @indents[indentIndex]
            if @ss.check /// #{indent} ///
              # an existing indent
              @scan /// #{indent} ///
            else if @ss.check /// [^#{ws}] ///
              # we lost an indent
              @indents.splice indentIndex, 1
              --indentIndex
              @observe DEDENT
              @p "#{DEDENT}#{TERM}"
            else
              # Some ambiguous dedent
              lines = @ss.str.substr(0, @ss.pos).split(/\n/) || ['']
              message = "Syntax error on line #{lines.length}: indention is ambiguous"
              lineLen = @indents.reduce ((l, r) -> l + r.length), 0
              context = pointToErrorLocation @ss.str, lines.length, lineLen
              throw new Error "#{message}\n#{context}"
            ++indentIndex
          if @ss.check /// [#{ws}]+ [^#{ws}#] ///
            # an indent
            @indents.push @scan /// [#{ws}]+ ///
            @observe INDENT
            @p INDENT
130
        tok = switch @peek()
          when '['
            # safe things, but not closing bracket
            @scan /[^\n'"\\\/#`[({\]]+/
            @scan /\]/
          when '('
            # safe things, but not closing paren
            @scan /[^\n'"\\\/#`[({)]+/
            @scan /\)/
          when '#{', '{'
            # safe things, but not closing brace
            @scan /[^\n'"\\\/#`[({}]+/
            @scan /\}/
          else
            # scan safe characters (anything that doesn't *introduce* context)
            @scan /[^\n'"\\\/#`[({]+/
            null
        if tok
          @observe tok
          continue
151
        if tok = @scan /"""|'''|\/\/\/|###|["'`#[({\\]/
          @observe tok
        else if tok = @scan /\//
          # unfortunately, we must look behind us to determine if this is a regexp or division
          pos = @ss.position()
          if pos > 1
            lastChar = @ss.string()[pos - 2]
            spaceBefore = ///[#{ws}]///.test lastChar
            nonIdentifierBefore = /[\W_$]/.test lastChar # TODO: this should perform a real test
          if pos is 1 or (if spaceBefore then not @ss.check /// [#{ws}=] /// else nonIdentifierBefore)
            @observe '/'
      when '\\'
        if (@scan /[\s\S]/) then @observe 'end-\\'
        # TODO: somehow prevent indent tokens from being inserted after these newlines
      when '"""'
        @scan /(?:[^"#\\]+|""?(?!")|#(?!{)|\\.)+/
        @ss.scan /\\\n/
        if tok = @scan /#{|"""/ then @observe tok
        else if tok = @scan /#{|"""/ then @observe tok
      when '"'
        @scan /(?:[^"#\\]+|#(?!{)|\\.)+/
        @ss.scan /\\\n/
        if tok = @scan /#{|"/ then @observe tok
      when '\'\'\''
        @scan /(?:[^'\\]+|''?(?!')|\\.)+/
        @ss.scan /\\\n/
        if tok = @scan /'''/ then @observe tok
      when '\''
        @scan /(?:[^'\\]+|\\.)+/
        @ss.scan /\\\n/
        if tok = @scan /'/ then @observe tok
      when '###'
        @scan /(?:[^#]+|##?(?!#))+/
        if tok = @scan /###/ then @observe tok
      when '#'
        @scan /[^\n]+/
        if tok = @scan /\n/ then @observe tok
      when '`'
        @scan /[^`]+/
        if tok = @scan /`/ then @observe tok
      when '///'
        @scan /(?:[^[/#\\]+|\/\/?(?!\/)|\\.)+/
        if tok = @scan /#{|\/\/\/|\\/ then @observe tok
        else if @ss.scan /#/ then @observe 'heregexp-#'
        else if tok = @scan /[\[]/ then @observe "heregexp-#{tok}"
      when 'heregexp-['
        @scan /(?:[^\]\/\\]+|\/\/?(?!\/))+/
        if tok = @scan /[\]\\]|#{|\/\/\// then @observe tok
      when 'heregexp-#'
        @ss.scan /(?:[^\n/]+|\/\/?(?!\/))+/
        if tok = @scan /\n|\/\/\// then @observe tok
      #when 'heregexp-('
      #  @scan /(?:[^)/[({#\\]+|\/\/?(?!\/))+/
      #  if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'
      #  else if tok = @scan /[)\\]|#{|\/\/\// then @observe tok
      #  else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"
      #when 'heregexp-{'
      #  @scan /(?:[^}/[({#\\]+|\/\/?(?!\/))+/
      #  if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'
      #  else if tok = @scan /[}/\\]|#{|\/\/\// then @observe tok
      #  else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"
      when '/'
        @scan /[^[/\\]+/
        if tok = @scan /[\/\\]/ then @observe tok
        else if tok = @scan /\[/ then @observe "regexp-#{tok}"
      when 'regexp-['
        @scan /[^\]\\]+/
        if tok = @scan /[\]\\]/ then @observe tok
      #when 'regexp-('
      #  @scan /[^)/[({\\]+/
      #  if tok = @scan /[)/\\]/ then @observe tok
      #  else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"
      #when 'regexp-{'
      #  @scan /[^}/[({\\]+/
      #  if tok = @scan /[}/\\]/ then @observe tok
      #  else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"
228
  # reached the end of the file
  if isEnd
    @scan /// [#{ws}\n]* $ ///
    while @context.length and INDENT is @peek()
      @observe DEDENT
      @p "#{DEDENT}#{TERM}"
    if @context.length
      # TODO: store offsets of tokens when inserted and report position of unclosed starting token
      throw new Error 'Unclosed ' + (inspect @peek()) + ' at EOF'
    @emit 'end'
    return
240
  return
242
processData: processInput no
processEnd: processInput yes
@processSync = (input) ->
  pre = new Preprocessor
  output = ''
  pre.emit = (type, data) -> output += data if type is 'data'
  pre.processData input
  do pre.processEnd
  output

1	`fs = require 'fs'`
2	`{EventEmitter} = require 'events'`
3	`{pointToErrorLocation} = require './helpers'`
4	`StringScanner = require 'StringScanner'`
5
6	`inspect = (o) -> (require 'util').inspect o, no, 9e9, yes`
7
8
9	`# TODO: better comments`
10	`# TODO: support win32-style line endings`
11
12	`@Preprocessor = class Preprocessor extends EventEmitter`
13
14	`ws = '\\t\\x0B\\f\\r \\xA0\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000\\uFEFF'`
15	`INDENT = '\uEFEF'`
16	`DEDENT = '\uEFFE'`
17	`TERM = '\uEFFF'`
18
19	`constructor: ->`
20	# `base` is either `null` or a regexp that matches the base indentation
21	`@base = null`
22	# `indents` is an array of successive indentation characters.
23	`@indents = []`
24	`@context = []`
25	`@ss = new StringScanner ''`
26
27	`err: (c) ->`
28	`token =`
29	`switch c`
30	`when INDENT`
31	`'INDENT'`
32	`when DEDENT`
33	`'DEDENT'`
34	`when TERM`
35	`'TERM'`
36	`else`
37	`inspect c`
38	`# This isn't perfect for error location tracking, but since we normally call this after a scan, it tends to work well.`
39	`lines = @ss.str.substr(0, @ss.pos).split(/\n/) \|\| ['']`
40	`columns = if lines[lines.length-1]? then lines[lines.length-1].length else 0`
41	`context = pointToErrorLocation @ss.str, lines.length, columns`
42	`throw new Error "Unexpected #{token}\n#{context}"`
43
44	`peek: -> if @context.length then @context[@context.length - 1] else null`
45
46	`observe: (c) ->`
47	`top = @peek()`
48	`switch c`
49	`# opening token is closing token`
50	when '"""', '\'\'\'', '"', '\'', '###', '`', '///', '/'
51	`if top is c then do @context.pop`
52	`else @context.push c`
53	`# strictly opening tokens`
54	`when INDENT, '#', '#{', '[', '(', '{', '\\', 'regexp-[', 'regexp-(', 'regexp-{', 'heregexp-#', 'heregexp-[', 'heregexp-(', 'heregexp-{'`
55	`@context.push c`
56	`# strictly closing tokens`
57	`when DEDENT`
58	`(@err c) unless top is INDENT`
59	`do @context.pop`
60	`when '\n'`
61	`(@err c) unless top in ['#', 'heregexp-#']`
62	`do @context.pop`
63	`when ']'`
64	`(@err c) unless top in ['[', 'regexp-[', 'heregexp-[']`
65	`do @context.pop`
66	`when ')'`
67	`(@err c) unless top in ['(', 'regexp-(', 'heregexp-(']`
68	`do @context.pop`
69	`when '}'`
70	`(@err c) unless top in ['#{', '{', 'regexp-{', 'heregexp-{']`
71	`do @context.pop`
72	`when 'end-\\'`
73	`(@err c) unless top is '\\'`
74	`do @context.pop`
75	`else throw new Error "undefined token observed: " + c`
76	`@context`
77
78	`p: (s) ->`
79	`if s? then @emit 'data', s`
80	`s`
81
82	`scan: (r) -> @p @ss.scan r`
83
84	`processInput = (isEnd) -> (data) ->`
85	`@ss.concat data unless isEnd`
86
87	`until @ss.eos()`
88	`switch @peek()`
89	`when null, INDENT, '#{', '[', '(', '{'`
90	`if @ss.bol() or @scan /// (?:[#{ws}]* \n)+ ///`
91
92	`@scan /// (?: [#{ws}]* (\#\#?(?!\#)[^\n]*)? \n )+ ///`
93
94	`# we might require more input to determine indentation`
95	`return if not isEnd and (@ss.check /// [#{ws}\n]* $ ///)?`
96
97	`# consume base indentation`
98	`if @base?`
99	`unless (@scan @base)?`
100	`throw new Error "inconsistent base indentation"`
101	`else`
102	`@base = /// #{@scan /// [#{ws}]* ///} ///`
103
104	`# move through each level of indentation`
105	`indentIndex = 0`
106	`while indentIndex < @indents.length`
107	`indent = @indents[indentIndex]`
108	`if @ss.check /// #{indent} ///`
109	`# an existing indent`
110	`@scan /// #{indent} ///`
111	`else if @ss.check /// [^#{ws}] ///`
112	`# we lost an indent`
113	`@indents.splice indentIndex, 1`
114	`--indentIndex`
115	`@observe DEDENT`
116	`@p "#{DEDENT}#{TERM}"`
117	`else`
118	`# Some ambiguous dedent`
119	`lines = @ss.str.substr(0, @ss.pos).split(/\n/) \|\| ['']`
120	`message = "Syntax error on line #{lines.length}: indention is ambiguous"`
121	`lineLen = @indents.reduce ((l, r) -> l + r.length), 0`
122	`context = pointToErrorLocation @ss.str, lines.length, lineLen`
123	`throw new Error "#{message}\n#{context}"`
124	`++indentIndex`
125	`if @ss.check /// [#{ws}]+ [^#{ws}#] ///`
126	`# an indent`
127	`@indents.push @scan /// [#{ws}]+ ///`
128	`@observe INDENT`
129	`@p INDENT`
130
131	`tok = switch @peek()`
132	`when '['`
133	`# safe things, but not closing bracket`
134	@scan /[^\n'"\\\/#`[({\]]+/
135	`@scan /\]/`
136	`when '('`
137	`# safe things, but not closing paren`
138	@scan /[^\n'"\\\/#`[({)]+/
139	`@scan /\)/`
140	`when '#{', '{'`
141	`# safe things, but not closing brace`
142	@scan /[^\n'"\\\/#`[({}]+/
143	`@scan /\}/`
144	`else`
145	`# scan safe characters (anything that doesn't introduce context)`
146	@scan /[^\n'"\\\/#`[({]+/
147	`null`
148	`if tok`
149	`@observe tok`
150	`continue`
151
152	if tok = @scan /"""\|'''\|\/\/\/\|###\|["'`#[({\\]/
153	`@observe tok`
154	`else if tok = @scan /\//`
155	`# unfortunately, we must look behind us to determine if this is a regexp or division`
156	`pos = @ss.position()`
157	`if pos > 1`
158	`lastChar = @ss.string()[pos - 2]`
159	`spaceBefore = ///[#{ws}]///.test lastChar`
160	`nonIdentifierBefore = /[\W_$]/.test lastChar # TODO: this should perform a real test`
161	`if pos is 1 or (if spaceBefore then not @ss.check /// [#{ws}=] /// else nonIdentifierBefore)`
162	`@observe '/'`
163	`when '\\'`
164	`if (@scan /[\s\S]/) then @observe 'end-\\'`
165	`# TODO: somehow prevent indent tokens from being inserted after these newlines`
166	`when '"""'`
167	`@scan /(?:[^"#\\]+\|""?(?!")\|#(?!{)\|\\.)+/`
168	`@ss.scan /\\\n/`
169	`if tok = @scan /#{\|"""/ then @observe tok`
170	`else if tok = @scan /#{\|"""/ then @observe tok`
171	`when '"'`
172	`@scan /(?:[^"#\\]+\|#(?!{)\|\\.)+/`
173	`@ss.scan /\\\n/`
174	`if tok = @scan /#{\|"/ then @observe tok`
175	`when '\'\'\''`
176	`@scan /(?:[^'\\]+\|''?(?!')\|\\.)+/`
177	`@ss.scan /\\\n/`
178	`if tok = @scan /'''/ then @observe tok`
179	`when '\''`
180	`@scan /(?:[^'\\]+\|\\.)+/`
181	`@ss.scan /\\\n/`
182	`if tok = @scan /'/ then @observe tok`
183	`when '###'`
184	`@scan /(?:[^#]+\|##?(?!#))+/`
185	`if tok = @scan /###/ then @observe tok`
186	`when '#'`
187	`@scan /[^\n]+/`
188	`if tok = @scan /\n/ then @observe tok`
189	when '`'
190	@scan /[^`]+/
191	if tok = @scan /`/ then @observe tok
192	`when '///'`
193	`@scan /(?:[^[/#\\]+\|\/\/?(?!\/)\|\\.)+/`
194	`if tok = @scan /#{\|\/\/\/\|\\/ then @observe tok`
195	`else if @ss.scan /#/ then @observe 'heregexp-#'`
196	`else if tok = @scan /[\[]/ then @observe "heregexp-#{tok}"`
197	`when 'heregexp-['`
198	`@scan /(?:[^\]\/\\]+\|\/\/?(?!\/))+/`
199	`if tok = @scan /[\]\\]\|#{\|\/\/\// then @observe tok`
200	`when 'heregexp-#'`
201	`@ss.scan /(?:[^\n/]+\|\/\/?(?!\/))+/`
202	`if tok = @scan /\n\|\/\/\// then @observe tok`
203	`#when 'heregexp-('`
204	`# @scan /(?:[^)/[({#\\]+\|\/\/?(?!\/))+/`
205	`# if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'`
206	`# else if tok = @scan /[)\\]\|#{\|\/\/\// then @observe tok`
207	`# else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"`
208	`#when 'heregexp-{'`
209	`# @scan /(?:[^}/[({#\\]+\|\/\/?(?!\/))+/`
210	`# if tok = @ss.scan /#(?!{)/ then @observe 'heregexp-#'`
211	`# else if tok = @scan /[}/\\]\|#{\|\/\/\// then @observe tok`
212	`# else if tok = @scan /[[({]/ then @observe "heregexp-#{tok}"`
213	`when '/'`
214	`@scan /[^[/\\]+/`
215	`if tok = @scan /[\/\\]/ then @observe tok`
216	`else if tok = @scan /\[/ then @observe "regexp-#{tok}"`
217	`when 'regexp-['`
218	`@scan /[^\]\\]+/`
219	`if tok = @scan /[\]\\]/ then @observe tok`
220	`#when 'regexp-('`
221	`# @scan /[^)/[({\\]+/`
222	`# if tok = @scan /[)/\\]/ then @observe tok`
223	`# else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"`
224	`#when 'regexp-{'`
225	`# @scan /[^}/[({\\]+/`
226	`# if tok = @scan /[}/\\]/ then @observe tok`
227	`# else if tok = @scan /[[({]/ then @observe "regexp-#{tok}"`
228
229	`# reached the end of the file`
230	`if isEnd`
231	`@scan /// [#{ws}\n]* $ ///`
232	`while @context.length and INDENT is @peek()`
233	`@observe DEDENT`
234	`@p "#{DEDENT}#{TERM}"`
235	`if @context.length`
236	`# TODO: store offsets of tokens when inserted and report position of unclosed starting token`
237	`throw new Error 'Unclosed ' + (inspect @peek()) + ' at EOF'`
238	`@emit 'end'`
239	`return`
240
241	`return`
242
243	`processData: processInput no`
244	`processEnd: processInput yes`
245	`@processSync = (input) ->`
246	`pre = new Preprocessor`
247	`output = ''`
248	`pre.emit = (type, data) -> output += data if type is 'data'`
249	`pre.processData input`
250	`do pre.processEnd`
251	`output`