UNPKG

16.7 kBJavaScriptView Raw
1(function (Prism) {
2
3 var comment_inside = {
4 'function': /\b(?:TODOS?|FIX(?:MES?)?|NOTES?|BUGS?|XX+|HACKS?|WARN(?:ING)?|\?{2,}|!{2,})\b/
5 };
6 var string_inside = {
7 'number': /\\[^\s']|%\w/
8 };
9
10 var factor = {
11 'comment': [
12 {
13 // ! single-line exclamation point comments with whitespace after/around the !
14 pattern: /(^|\s)(?:! .*|!$)/,
15 lookbehind: true,
16 inside: comment_inside
17 },
18
19 /* from basis/multiline: */
20 {
21 // /* comment */, /* comment*/
22 pattern: /(^|\s)\/\*\s[\s\S]*?\*\/(?=\s|$)/,
23 lookbehind: true,
24 greedy: true,
25 inside: comment_inside
26 },
27 {
28 // ![[ comment ]] , ![===[ comment]===]
29 pattern: /(^|\s)!\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
30 lookbehind: true,
31 greedy: true,
32 inside: comment_inside
33 }
34 ],
35
36 'number': [
37 {
38 // basic base 10 integers 9, -9
39 pattern: /(^|\s)[+-]?\d+(?=\s|$)/,
40 lookbehind: true
41 },
42 {
43 // base prefix integers 0b010 0o70 0xad 0d10 0XAD -0xa9
44 pattern: /(^|\s)[+-]?0(?:b[01]+|o[0-7]+|d\d+|x[\dA-F]+)(?=\s|$)/i,
45 lookbehind: true
46 },
47 {
48 // fractional ratios 1/5 -1/5 and the literal float approximations 1/5. -1/5.
49 pattern: /(^|\s)[+-]?\d+\/\d+\.?(?=\s|$)/,
50 lookbehind: true
51 },
52 {
53 // positive mixed numbers 23+1/5 +23+1/5
54 pattern: /(^|\s)\+?\d+\+\d+\/\d+(?=\s|$)/,
55 lookbehind: true
56 },
57 {
58 // negative mixed numbers -23-1/5
59 pattern: /(^|\s)-\d+-\d+\/\d+(?=\s|$)/,
60 lookbehind: true
61 },
62 {
63 // basic decimal floats -0.01 0. .0 .1 -.1 -1. -12.13 +12.13
64 // and scientific notation with base 10 exponents 3e4 3e-4 .3e-4
65 pattern: /(^|\s)[+-]?(?:\d*\.\d+|\d+\.\d*|\d+)(?:e[+-]?\d+)?(?=\s|$)/i,
66 lookbehind: true
67 },
68 {
69 // NAN literal syntax NAN: 80000deadbeef, NAN: a
70 pattern: /(^|\s)NAN:\s+[\da-fA-F]+(?=\s|$)/,
71 lookbehind: true
72 },
73 {
74 /*
75 base prefix floats 0x1.0p3 (8.0) 0b1.010p2 (5.0) 0x1.p1 0b1.11111111p11111...
76 "The normalized hex form ±0x1.MMMMMMMMMMMMM[pP]±EEEE allows any floating-point number to be specified precisely.
77 The values of MMMMMMMMMMMMM and EEEE map directly to the mantissa and exponent fields of the binary IEEE 754 representation."
78 <https://docs.factorcode.org/content/article-syntax-floats.html>
79 */
80 pattern: /(^|\s)[+-]?0(?:b1\.[01]*|o1\.[0-7]*|d1\.\d*|x1\.[\dA-F]*)p\d+(?=\s|$)/i,
81 lookbehind: true
82 }
83 ],
84
85 // R/ regexp?\/\\/
86 'regexp': {
87 pattern: /(^|\s)R\/\s(?:\\\S|[^\\/])*\/(?:[idmsr]*|[idmsr]+-[idmsr]+)(?=\s|$)/,
88 lookbehind: true,
89 alias: 'number',
90 inside: {
91 'variable': /\\\S/,
92 'keyword': /[+?*\[\]^$(){}.|]/,
93 'operator': {
94 pattern: /(\/)[idmsr]+(?:-[idmsr]+)?/,
95 lookbehind: true
96 }
97 }
98 },
99
100 'boolean': {
101 pattern: /(^|\s)[tf](?=\s|$)/,
102 lookbehind: true
103 },
104
105 // SBUF" asd", URL" ://...", P" /etc/"
106 'custom-string': {
107 pattern: /(^|\s)[A-Z0-9\-]+"\s(?:\\\S|[^"\\])*"/,
108 lookbehind: true,
109 greedy: true,
110 alias: 'string',
111 inside: {
112 'number': /\\\S|%\w|\//
113 }
114 },
115
116 'multiline-string': [
117 {
118 // STRING: name \n content \n ; -> CONSTANT: name "content" (symbol)
119 pattern: /(^|\s)STRING:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*;(?=\s|$)/,
120 lookbehind: true,
121 greedy: true,
122 alias: 'string',
123 inside: {
124 'number': string_inside.number,
125 // trailing semicolon on its own line
126 'semicolon-or-setlocal': {
127 pattern: /((?:\n|\r\n)\s*);(?=\s|$)/,
128 lookbehind: true,
129 alias: 'function'
130 }
131 }
132 },
133 {
134 // HEREDOC: marker \n content \n marker ; -> "content" (immediate)
135 pattern: /(^|\s)HEREDOC:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*\S+(?=\s|$)/,
136 lookbehind: true,
137 greedy: true,
138 alias: 'string',
139 inside: string_inside
140 },
141 {
142 // [[ string ]], [==[ string]==]
143 pattern: /(^|\s)\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
144 lookbehind: true,
145 greedy: true,
146 alias: 'string',
147 inside: string_inside
148 }
149 ],
150
151 'special-using': {
152 pattern: /(^|\s)USING:(?:\s\S+)*(?=\s+;(?:\s|$))/,
153 lookbehind: true,
154 alias: 'function',
155 inside: {
156 // this is essentially a regex for vocab names, which i don't want to specify
157 // but the USING: gets picked up as a vocab name
158 'string': {
159 pattern: /(\s)[^:\s]+/,
160 lookbehind: true
161 }
162 }
163 },
164
165 /* this description of stack effect literal syntax is not complete and not as specific as theoretically possible
166 trying to do better is more work and regex-computation-time than it's worth though.
167 - we'd like to have the "delimiter" parts of the stack effect [ (, --, and ) ] be a different (less-important or comment-like) colour to the stack effect contents
168 - we'd like if nested stack effects were treated as such rather than just appearing flat (with `inside`)
169 - we'd like if the following variable name conventions were recognised specifically:
170 special row variables = ..a b..
171 type and stack effect annotations end with a colon = ( quot: ( a: ( -- ) -- b ) -- x ), ( x: number -- )
172 word throws unconditional error = *
173 any other word-like variable name = a ? q' etc
174
175 https://docs.factorcode.org/content/article-effects.html
176
177 these are pretty complicated to highlight properly without a real parser, and therefore out of scope
178 the old pattern, which may be later useful, was: (^|\s)(?:call|execute|eval)?\((?:\s+[^"\r\n\t ]\S*)*?\s+--(?:\s+[^"\n\t ]\S*)*?\s+\)(?=\s|$)
179 */
180
181 // current solution is not great
182 'stack-effect-delimiter': [
183 {
184 // opening parenthesis
185 pattern: /(^|\s)(?:call|execute|eval)?\((?=\s)/,
186 lookbehind: true,
187 alias: 'operator'
188 },
189 {
190 // middle --
191 pattern: /(\s)--(?=\s)/,
192 lookbehind: true,
193 alias: 'operator'
194 },
195 {
196 // closing parenthesis
197 pattern: /(\s)\)(?=\s|$)/,
198 lookbehind: true,
199 alias: 'operator'
200 }
201 ],
202
203 'combinators': {
204 pattern: null,
205 lookbehind: true,
206 alias: 'keyword'
207 },
208
209 'kernel-builtin': {
210 pattern: null,
211 lookbehind: true,
212 alias: 'variable'
213 },
214
215 'sequences-builtin': {
216 pattern: null,
217 lookbehind: true,
218 alias: 'variable'
219 },
220
221 'math-builtin': {
222 pattern: null,
223 lookbehind: true,
224 alias: 'variable'
225 },
226
227 'constructor-word': {
228 // <array> but not <=>
229 pattern: /(^|\s)<(?!=+>|-+>)\S+>(?=\s|$)/,
230 lookbehind: true,
231 alias: 'keyword'
232 },
233
234 'other-builtin-syntax': {
235 pattern: null,
236 lookbehind: true,
237 alias: 'operator'
238 },
239
240 /*
241 full list of supported word naming conventions: (the convention appears outside of the [brackets])
242 set-[x]
243 change-[x]
244 with-[x]
245 new-[x]
246 >[string]
247 [base]>
248 [string]>[number]
249 +[symbol]+
250 [boolean-word]?
251 ?[of]
252 [slot-reader]>>
253 >>[slot-setter]
254 [slot-writer]<<
255 ([implementation-detail])
256 [mutater]!
257 [variant]*
258 [prettyprint].
259 $[help-markup]
260
261 <constructors>, SYNTAX:, etc are supported by their own patterns.
262
263 `with` and `new` from `kernel` are their own builtins.
264
265 see <https://docs.factorcode.org/content/article-conventions.html>
266 */
267 'conventionally-named-word': {
268 pattern: /(^|\s)(?!")(?:(?:set|change|with|new)-\S+|\$\S+|>[^>\s]+|[^:>\s]+>|[^>\s]+>[^>\s]+|\+[^+\s]+\+|[^?\s]+\?|\?[^?\s]+|[^>\s]+>>|>>[^>\s]+|[^<\s]+<<|\([^()\s]+\)|[^!\s]+!|[^*\s]\S*\*|[^.\s]\S*\.)(?=\s|$)/,
269 lookbehind: true,
270 alias: 'keyword'
271 },
272
273 'colon-syntax': {
274 pattern: /(^|\s)(?:[A-Z0-9\-]+#?)?:{1,2}\s+(?:;\S+|(?!;)\S+)(?=\s|$)/,
275 lookbehind: true,
276 greedy: true,
277 alias: 'function'
278 },
279
280 'semicolon-or-setlocal': {
281 pattern: /(\s)(?:;|:>)(?=\s|$)/,
282 lookbehind: true,
283 alias: 'function'
284 },
285
286 // do not highlight leading } or trailing X{ at the begin/end of the file as it's invalid syntax
287 'curly-brace-literal-delimiter': [
288 {
289 // opening
290 pattern: /(^|\s)[a-z]*\{(?=\s)/i,
291 lookbehind: true,
292 alias: 'operator'
293 },
294 {
295 // closing
296 pattern: /(\s)\}(?=\s|$)/,
297 lookbehind: true,
298 alias: 'operator'
299 },
300
301 ],
302
303 // do not highlight leading ] or trailing [ at the begin/end of the file as it's invalid syntax
304 'quotation-delimiter': [
305 {
306 // opening
307 pattern: /(^|\s)\[(?=\s)/,
308 lookbehind: true,
309 alias: 'operator'
310 },
311 {
312 // closing
313 pattern: /(\s)\](?=\s|$)/,
314 lookbehind: true,
315 alias: 'operator'
316 },
317 ],
318
319 'normal-word': {
320 pattern: /(^|\s)[^"\s]\S*(?=\s|$)/,
321 lookbehind: true
322 },
323
324 /*
325 basic first-class string "a"
326 with escaped double-quote "a\""
327 escaped backslash "\\"
328 and general escapes since Factor has so many "\N"
329
330 syntax that works in the reference implementation that isn't fully
331 supported because it's an implementation detail:
332 "string 1""string 2" -> 2 strings (works anyway)
333 "string"5 -> string, 5
334 "string"[ ] -> string, quotation
335 { "a"} -> array<string>
336
337 the rest of those examples all properly recognise the string, but not
338 the other object (number, quotation, etc)
339 this is fine for a regex-only implementation.
340 */
341 'string': {
342 pattern: /"(?:\\\S|[^"\\])*"/,
343 greedy: true,
344 inside: string_inside
345 }
346 };
347
348 var escape = function (str) {
349 return (str+'').replace(/([.?*+\^$\[\]\\(){}|\-])/g, '\\$1');
350 };
351
352 var arrToWordsRegExp = function (arr) {
353 return new RegExp(
354 '(^|\\s)(?:' + arr.map(escape).join('|') + ')(?=\\s|$)'
355 );
356 };
357
358 var builtins = {
359 'kernel-builtin': [
360 'or', '2nipd', '4drop', 'tuck', 'wrapper', 'nip', 'wrapper?', 'callstack>array', 'die', 'dupd', 'callstack', 'callstack?', '3dup', 'hashcode', 'pick', '4nip', 'build', '>boolean', 'nipd', 'clone', '5nip', 'eq?', '?', '=', 'swapd', '2over', 'clear', '2dup', 'get-retainstack', 'not', 'tuple?', 'dup', '3nipd', 'call', '-rotd', 'object', 'drop', 'assert=', 'assert?', '-rot', 'execute', 'boa', 'get-callstack', 'curried?', '3drop', 'pickd', 'overd', 'over', 'roll', '3nip', 'swap', 'and', '2nip', 'rotd', 'throw', '(clone)', 'hashcode*', 'spin', 'reach', '4dup', 'equal?', 'get-datastack', 'assert', '2drop', '<wrapper>', 'boolean?', 'identity-hashcode', 'identity-tuple?', 'null', 'composed?', 'new', '5drop', 'rot', '-roll', 'xor', 'identity-tuple', 'boolean'
361 ],
362 'other-builtin-syntax': [
363 // syntax
364 '=======', 'recursive', 'flushable', '>>', '<<<<<<', 'M\\', 'B', 'PRIVATE>', '\\', '======', 'final', 'inline', 'delimiter', 'deprecated', '<PRIVATE', '>>>>>>', '<<<<<<<', 'parse-complex', 'malformed-complex', 'read-only', '>>>>>>>', 'call-next-method', '<<', 'foldable',
365 // literals
366 '$', '$[', '${'
367 ],
368 'sequences-builtin': [
369 'member-eq?', 'mismatch', 'append', 'assert-sequence=', 'longer', 'repetition', 'clone-like', '3sequence', 'assert-sequence?', 'last-index-from', 'reversed', 'index-from', 'cut*', 'pad-tail', 'join-as', 'remove-eq!', 'concat-as', 'but-last', 'snip', 'nths', 'nth', 'sequence', 'longest', 'slice?', '<slice>', 'remove-nth', 'tail-slice', 'empty?', 'tail*', 'member?', 'virtual-sequence?', 'set-length', 'drop-prefix', 'iota', 'unclip', 'bounds-error?', 'unclip-last-slice', 'non-negative-integer-expected', 'non-negative-integer-expected?', 'midpoint@', 'longer?', '?set-nth', '?first', 'rest-slice', 'prepend-as', 'prepend', 'fourth', 'sift', 'subseq-start', 'new-sequence', '?last', 'like', 'first4', '1sequence', 'reverse', 'slice', 'virtual@', 'repetition?', 'set-last', 'index', '4sequence', 'max-length', 'set-second', 'immutable-sequence', 'first2', 'first3', 'supremum', 'unclip-slice', 'suffix!', 'insert-nth', 'tail', '3append', 'short', 'suffix', 'concat', 'flip', 'immutable?', 'reverse!', '2sequence', 'sum', 'delete-all', 'indices', 'snip-slice', '<iota>', 'check-slice', 'sequence?', 'head', 'append-as', 'halves', 'sequence=', 'collapse-slice', '?second', 'slice-error?', 'product', 'bounds-check?', 'bounds-check', 'immutable', 'virtual-exemplar', 'harvest', 'remove', 'pad-head', 'last', 'set-fourth', 'cartesian-product', 'remove-eq', 'shorten', 'shorter', 'reversed?', 'shorter?', 'shortest', 'head-slice', 'pop*', 'tail-slice*', 'but-last-slice', 'iota?', 'append!', 'cut-slice', 'new-resizable', 'head-slice*', 'sequence-hashcode', 'pop', 'set-nth', '?nth', 'second', 'join', 'immutable-sequence?', '<reversed>', '3append-as', 'virtual-sequence', 'subseq?', 'remove-nth!', 'length', 'last-index', 'lengthen', 'assert-sequence', 'copy', 'move', 'third', 'first', 'tail?', 'set-first', 'prefix', 'bounds-error', '<repetition>', 'exchange', 'surround', 'cut', 'min-length', 'set-third', 'push-all', 'head?', 'subseq-start-from', 'delete-slice', 'rest', 'sum-lengths', 'head*', 'infimum', 'remove!', 'glue', 'slice-error', 'subseq', 'push', 'replace-slice', 'subseq-as', 'unclip-last'
370 ],
371 'math-builtin': [
372 'number=', 'next-power-of-2', '?1+', 'fp-special?', 'imaginary-part', 'float>bits', 'number?', 'fp-infinity?', 'bignum?', 'fp-snan?', 'denominator', 'gcd', '*', '+', 'fp-bitwise=', '-', 'u>=', '/', '>=', 'bitand', 'power-of-2?', 'log2-expects-positive', 'neg?', '<', 'log2', '>', 'integer?', 'number', 'bits>double', '2/', 'zero?', 'bits>float', 'float?', 'shift', 'ratio?', 'rect>', 'even?', 'ratio', 'fp-sign', 'bitnot', '>fixnum', 'complex?', '/i', 'integer>fixnum', '/f', 'sgn', '>bignum', 'next-float', 'u<', 'u>', 'mod', 'recip', 'rational', '>float', '2^', 'integer', 'fixnum?', 'neg', 'fixnum', 'sq', 'bignum', '>rect', 'bit?', 'fp-qnan?', 'simple-gcd', 'complex', '<fp-nan>', 'real', '>fraction', 'double>bits', 'bitor', 'rem', 'fp-nan-payload', 'real-part', 'log2-expects-positive?', 'prev-float', 'align', 'unordered?', 'float', 'fp-nan?', 'abs', 'bitxor', 'integer>fixnum-strict', 'u<=', 'odd?', '<=', '/mod', '>integer', 'real?', 'rational?', 'numerator'
373 ]
374 // that's all for now
375 };
376
377 Object.keys(builtins).forEach(function (k) {
378 factor[k].pattern = arrToWordsRegExp( builtins[k] );
379 });
380
381 var combinators = [
382 // kernel
383 '2bi', 'while', '2tri', 'bi*', '4dip', 'both?', 'same?', 'tri@', 'curry', 'prepose', '3bi', '?if', 'tri*', '2keep', '3keep', 'curried', '2keepd', 'when', '2bi*', '2tri*', '4keep', 'bi@', 'keepdd', 'do', 'unless*', 'tri-curry', 'if*', 'loop', 'bi-curry*', 'when*', '2bi@', '2tri@', 'with', '2with', 'either?', 'bi', 'until', '3dip', '3curry', 'tri-curry*', 'tri-curry@', 'bi-curry', 'keepd', 'compose', '2dip', 'if', '3tri', 'unless', 'tuple', 'keep', '2curry', 'tri', 'most', 'while*', 'dip', 'composed', 'bi-curry@',
384 // sequences
385 'find-last-from', 'trim-head-slice', 'map-as', 'each-from', 'none?', 'trim-tail', 'partition', 'if-empty', 'accumulate*', 'reject!', 'find-from', 'accumulate-as', 'collector-for-as', 'reject', 'map', 'map-sum', 'accumulate!', '2each-from', 'follow', 'supremum-by', 'map!', 'unless-empty', 'collector', 'padding', 'reduce-index', 'replicate-as', 'infimum-by', 'trim-tail-slice', 'count', 'find-index', 'filter', 'accumulate*!', 'reject-as', 'map-integers', 'map-find', 'reduce', 'selector', 'interleave', '2map', 'filter-as', 'binary-reduce', 'map-index-as', 'find', 'produce', 'filter!', 'replicate', 'cartesian-map', 'cartesian-each', 'find-index-from', 'map-find-last', '3map-as', '3map', 'find-last', 'selector-as', '2map-as', '2map-reduce', 'accumulate', 'each', 'each-index', 'accumulate*-as', 'when-empty', 'all?', 'collector-as', 'push-either', 'new-like', 'collector-for', '2selector', 'push-if', '2all?', 'map-reduce', '3each', 'any?', 'trim-slice', '2reduce', 'change-nth', 'produce-as', '2each', 'trim', 'trim-head', 'cartesian-find', 'map-index',
386 // math
387 'if-zero', 'each-integer', 'unless-zero', '(find-integer)', 'when-zero', 'find-last-integer', '(all-integers?)', 'times', '(each-integer)', 'find-integer', 'all-integers?',
388 // math.combinators
389 'unless-negative', 'if-positive', 'when-positive', 'when-negative', 'unless-positive', 'if-negative',
390 // combinators
391 'case', '2cleave', 'cond>quot', 'case>quot', '3cleave', 'wrong-values', 'to-fixed-point', 'alist>quot', 'cond', 'cleave', 'call-effect', 'recursive-hashcode', 'spread', 'deep-spread>quot',
392 // combinators.short-circuit
393 '2||', '0||', 'n||', '0&&', '2&&', '3||', '1||', '1&&', 'n&&', '3&&',
394 // combinators.smart
395 'smart-unless*', 'keep-inputs', 'reduce-outputs', 'smart-when*', 'cleave>array', 'smart-with', 'smart-apply', 'smart-if', 'inputs/outputs', 'output>sequence-n', 'map-outputs', 'map-reduce-outputs', 'dropping', 'output>array', 'smart-map-reduce', 'smart-2map-reduce', 'output>array-n', 'nullary', 'input<sequence', 'append-outputs', 'drop-inputs', 'inputs', 'smart-2reduce', 'drop-outputs', 'smart-reduce', 'preserving', 'smart-when', 'outputs', 'append-outputs-as', 'smart-unless', 'smart-if*', 'sum-outputs', 'input<sequence-unsafe', 'output>sequence',
396 // tafn
397 ];
398
399 factor.combinators.pattern = arrToWordsRegExp(combinators);
400
401 Prism.languages.factor = factor;
402
403})(Prism);