UNPKG

43.4 kBJavaScriptView Raw
1// Released under MIT license
2// Copyright (c) 2009-2010 Dominic Baggott
3// Copyright (c) 2009-2010 Ash Berlin
4// Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com)
5
6(function( expose ) {
7
8/**
9 * class Markdown
10 *
11 * Markdown processing in Javascript done right. We have very particular views
12 * on what constitutes 'right' which include:
13 *
14 * - produces well-formed HTML (this means that em and strong nesting is
15 * important)
16 *
17 * - has an intermediate representation to allow processing of parsed data (We
18 * in fact have two, both as [JsonML]: a markdown tree and an HTML tree).
19 *
20 * - is easily extensible to add new dialects without having to rewrite the
21 * entire parsing mechanics
22 *
23 * - has a good test suite
24 *
25 * This implementation fulfills all of these (except that the test suite could
26 * do with expanding to automatically run all the fixtures from other Markdown
27 * implementations.)
28 *
29 * ##### Intermediate Representation
30 *
31 * *TODO* Talk about this :) Its JsonML, but document the node names we use.
32 *
33 * [JsonML]: http://jsonml.org/ "JSON Markup Language"
34 **/
35var Markdown = expose.Markdown = function Markdown(dialect) {
36 switch (typeof dialect) {
37 case "undefined":
38 this.dialect = Markdown.dialects.Gruber;
39 break;
40 case "object":
41 this.dialect = dialect;
42 break;
43 default:
44 if (dialect in Markdown.dialects) {
45 this.dialect = Markdown.dialects[dialect];
46 }
47 else {
48 throw new Error("Unknown Markdown dialect '" + String(dialect) + "'");
49 }
50 break;
51 }
52 this.em_state = [];
53 this.strong_state = [];
54 this.debug_indent = "";
55}
56
57/**
58 * parse( markdown, [dialect] ) -> JsonML
59 * - markdown (String): markdown string to parse
60 * - dialect (String | Dialect): the dialect to use, defaults to gruber
61 *
62 * Parse `markdown` and return a markdown document as a Markdown.JsonML tree.
63 **/
64expose.parse = function( source, dialect ) {
65 // dialect will default if undefined
66 var md = new Markdown( dialect );
67 return md.toTree( source );
68}
69
70/**
71 * toHTML( markdown, [dialect] ) -> String
72 * toHTML( md_tree ) -> String
73 * - markdown (String): markdown string to parse
74 * - md_tree (Markdown.JsonML): parsed markdown tree
75 *
76 * Take markdown (either as a string or as a JsonML tree) and run it through
77 * [[toHTMLTree]] then turn it into a well-formated HTML fragment.
78 **/
79expose.toHTML = function toHTML( source , dialect , options ) {
80 var input = expose.toHTMLTree( source , dialect , options );
81
82 return expose.renderJsonML( input );
83}
84
85/**
86 * toHTMLTree( markdown, [dialect] ) -> JsonML
87 * toHTMLTree( md_tree ) -> JsonML
88 * - markdown (String): markdown string to parse
89 * - dialect (String | Dialect): the dialect to use, defaults to gruber
90 * - md_tree (Markdown.JsonML): parsed markdown tree
91 *
92 * Turn markdown into HTML, represented as a JsonML tree. If a string is given
93 * to this function, it is first parsed into a markdown tree by calling
94 * [[parse]].
95 **/
96expose.toHTMLTree = function toHTMLTree( input, dialect , options ) {
97 // convert string input to an MD tree
98 if ( typeof input ==="string" ) input = this.parse( input, dialect );
99
100 // Now convert the MD tree to an HTML tree
101
102 // remove references from the tree
103 var attrs = extract_attr( input ),
104 refs = {};
105
106 if ( attrs && attrs.references ) {
107 refs = attrs.references;
108 }
109
110 var html = convert_tree_to_html( input, refs , options );
111 merge_text_nodes( html );
112 return html;
113}
114
115var mk_block = Markdown.mk_block = function(block, trail, line) {
116 // Be helpful for default case in tests.
117 if ( arguments.length == 1 ) trail = "\n\n";
118
119 var s = new String(block);
120 s.trailing = trail;
121 // To make it clear its not just a string
122 s.toSource = function() {
123 return "Markdown.mk_block( " +
124 uneval(block) +
125 ", " +
126 uneval(trail) +
127 ", " +
128 uneval(line) +
129 " )"
130 }
131
132 if (line != undefined)
133 s.lineNumber = line;
134
135 return s;
136}
137
138function count_lines( str ) {
139 var n = 0, i = -1;;
140 while ( ( i = str.indexOf('\n', i+1) ) != -1) n++;
141 return n;
142}
143
144// Internal - split source into rough blocks
145Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) {
146 // [\s\S] matches _anything_ (newline or space)
147 var re = /([\s\S]+?)($|\n(?:\s*\n|$)+)/g,
148 blocks = [],
149 m;
150
151 var line_no = 1;
152
153 if ( ( m = /^(\s*\n)/.exec(input) ) != null ) {
154 // skip (but count) leading blank lines
155 line_no += count_lines( m[0] );
156 re.lastIndex = m[0].length;
157 }
158
159 while ( ( m = re.exec(input) ) != null ) {
160 blocks.push( mk_block( m[1], m[2], line_no ) );
161 line_no += count_lines( m[0] );
162 }
163
164 return blocks;
165}
166
167/**
168 * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ]
169 * - block (String): the block to process
170 * - next (Array): the following blocks
171 *
172 * Process `block` and return an array of JsonML nodes representing `block`.
173 *
174 * It does this by asking each block level function in the dialect to process
175 * the block until one can. Succesful handling is indicated by returning an
176 * array (with zero or more JsonML nodes), failure by a false value.
177 *
178 * Blocks handlers are responsible for calling [[Markdown#processInline]]
179 * themselves as appropriate.
180 *
181 * If the blocks were split incorrectly or adjacent blocks need collapsing you
182 * can adjust `next` in place using shift/splice etc.
183 *
184 * If any of this default behaviour is not right for the dialect, you can
185 * define a `__call__` method on the dialect that will get invoked to handle
186 * the block processing.
187 */
188Markdown.prototype.processBlock = function processBlock( block, next ) {
189 var cbs = this.dialect.block,
190 ord = cbs.__order__;
191
192 if ( "__call__" in cbs ) {
193 return cbs.__call__.call(this, block, next);
194 }
195
196 for ( var i = 0; i < ord.length; i++ ) {
197 //D:this.debug( "Testing", ord[i] );
198 var res = cbs[ ord[i] ].call( this, block, next );
199 if ( res ) {
200 //D:this.debug(" matched");
201 if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) )
202 this.debug(ord[i], "didn't return a proper array");
203 //D:this.debug( "" );
204 return res;
205 }
206 }
207
208 // Uhoh! no match! Should we throw an error?
209 return [];
210}
211
212Markdown.prototype.processInline = function processInline( block ) {
213 return this.dialect.inline.__call__.call( this, String( block ) );
214}
215
216/**
217 * Markdown#toTree( source ) -> JsonML
218 * - source (String): markdown source to parse
219 *
220 * Parse `source` into a JsonML tree representing the markdown document.
221 **/
222// custom_tree means set this.tree to `custom_tree` and restore old value on return
223Markdown.prototype.toTree = function toTree( source, custom_root ) {
224 var blocks = source instanceof Array
225 ? source
226 : this.split_blocks( source );
227
228 // Make tree a member variable so its easier to mess with in extensions
229 var old_tree = this.tree;
230 try {
231 this.tree = custom_root || this.tree || [ "markdown" ];
232
233 blocks:
234 while ( blocks.length ) {
235 var b = this.processBlock( blocks.shift(), blocks );
236
237 // Reference blocks and the like won't return any content
238 if ( !b.length ) continue blocks;
239
240 this.tree.push.apply( this.tree, b );
241 }
242 return this.tree;
243 }
244 finally {
245 if ( custom_root )
246 this.tree = old_tree;
247 }
248
249}
250
251// Noop by default
252Markdown.prototype.debug = function () {
253 var args = Array.prototype.slice.call( arguments);
254 args.unshift(this.debug_indent);
255 if (typeof print !== "undefined")
256 print.apply( print, args );
257 if (typeof console !== "undefined" && typeof console.log !== "undefined")
258 console.log.apply( null, args );
259}
260
261Markdown.prototype.loop_re_over_block = function( re, block, cb ) {
262 // Dont use /g regexps with this
263 var m,
264 b = block.valueOf();
265
266 while ( b.length && (m = re.exec(b) ) != null) {
267 b = b.substr( m[0].length );
268 cb.call(this, m);
269 }
270 return b;
271}
272
273/**
274 * Markdown.dialects
275 *
276 * Namespace of built-in dialects.
277 **/
278Markdown.dialects = {};
279
280/**
281 * Markdown.dialects.Gruber
282 *
283 * The default dialect that follows the rules set out by John Gruber's
284 * markdown.pl as closely as possible. Well actually we follow the behaviour of
285 * that script which in some places is not exactly what the syntax web page
286 * says.
287 **/
288Markdown.dialects.Gruber = {
289 block: {
290 atxHeader: function atxHeader( block, next ) {
291 var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ );
292
293 if ( !m ) return undefined;
294
295 var header = [ "header", { level: m[ 1 ].length } ];
296 Array.prototype.push.apply(header, this.processInline(m[ 2 ]));
297
298 if ( m[0].length < block.length )
299 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
300
301 return [ header ];
302 },
303
304 setextHeader: function setextHeader( block, next ) {
305 var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ );
306
307 if ( !m ) return undefined;
308
309 var level = ( m[ 2 ] === "=" ) ? 1 : 2;
310 var header = [ "header", { level : level }, m[ 1 ] ];
311
312 if ( m[0].length < block.length )
313 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
314
315 return [ header ];
316 },
317
318 code: function code( block, next ) {
319 // | Foo
320 // |bar
321 // should be a code block followed by a paragraph. Fun
322 //
323 // There might also be adjacent code block to merge.
324
325 var ret = [],
326 re = /^(?: {0,3}\t| {4})(.*)\n?/,
327 lines;
328
329 // 4 spaces + content
330 var m = block.match( re );
331
332 if ( !m ) return undefined;
333
334 block_search:
335 do {
336 // Now pull out the rest of the lines
337 var b = this.loop_re_over_block(
338 re, block.valueOf(), function( m ) { ret.push( m[1] ) } );
339
340 if (b.length) {
341 // Case alluded to in first comment. push it back on as a new block
342 next.unshift( mk_block(b, block.trailing) );
343 break block_search;
344 }
345 else if (next.length) {
346 // Check the next block - it might be code too
347 var m = next[0].match( re );
348
349 if ( !m ) break block_search;
350
351 // Pull how how many blanks lines follow - minus two to account for .join
352 ret.push ( block.trailing.replace(/[^\n]/g, '').substring(2) );
353
354 block = next.shift();
355 }
356 else
357 break block_search;
358 } while (true);
359
360 return [ [ "code_block", ret.join("\n") ] ];
361 },
362
363 horizRule: function horizRule( block, next ) {
364 // this needs to find any hr in the block to handle abutting blocks
365 var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ );
366
367 if ( !m ) {
368 return undefined;
369 }
370
371 var jsonml = [ [ "hr" ] ];
372
373 // if there's a leading abutting block, process it
374 if ( m[ 1 ] ) {
375 jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) );
376 }
377
378 // if there's a trailing abutting block, stick it into next
379 if ( m[ 3 ] ) {
380 next.unshift( mk_block( m[ 3 ] ) );
381 }
382
383 return jsonml;
384 },
385
386 // There are two types of lists. Tight and loose. Tight lists have no whitespace
387 // between the items (and result in text just in the <li>) and loose lists,
388 // which have an empty line between list items, resulting in (one or more)
389 // paragraphs inside the <li>.
390 //
391 // There are all sorts weird edge cases about the original markdown.pl's
392 // handling of lists:
393 //
394 // * Nested lists are supposed to be indented by four chars per level. But
395 // if they aren't, you can get a nested list by indenting by less than
396 // four so long as the indent doesn't match an indent of an existing list
397 // item in the 'nest stack'.
398 //
399 // * The type of the list (bullet or number) is controlled just by the
400 // first item at the indent. Subsequent changes are ignored unless they
401 // are for nested lists
402 //
403 lists: (function( ) {
404 // Use a closure to hide a few variables.
405 var any_list = "[*+-]|\\d\\.",
406 bullet_list = /[*+-]/,
407 number_list = /\d+\./,
408 // Capture leading indent as it matters for determining nested lists.
409 is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ),
410 indent_re = "(?: {0,3}\\t| {4})";
411
412 // TODO: Cache this regexp for certain depths.
413 // Create a regexp suitable for matching an li for a given stack depth
414 function regex_for_depth( depth ) {
415
416 return new RegExp(
417 // m[1] = indent, m[2] = list_type
418 "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" +
419 // m[3] = cont
420 "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})"
421 );
422 }
423 function expand_tab( input ) {
424 return input.replace( / {0,3}\t/g, " " );
425 }
426
427 // Add inline content `inline` to `li`. inline comes from processInline
428 // so is an array of content
429 function add(li, loose, inline, nl) {
430 if (loose) {
431 li.push( [ "para" ].concat(inline) );
432 return;
433 }
434 // Hmmm, should this be any block level element or just paras?
435 var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para"
436 ? li[li.length -1]
437 : li;
438
439 // If there is already some content in this list, add the new line in
440 if (nl && li.length > 1) inline.unshift(nl);
441
442 for (var i=0; i < inline.length; i++) {
443 var what = inline[i],
444 is_str = typeof what == "string";
445 if (is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" )
446 {
447 add_to[ add_to.length-1 ] += what;
448 }
449 else {
450 add_to.push( what );
451 }
452 }
453 }
454
455 // contained means have an indent greater than the current one. On
456 // *every* line in the block
457 function get_contained_blocks( depth, blocks ) {
458
459 var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ),
460 replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"),
461 ret = [];
462
463 while ( blocks.length > 0 ) {
464 if ( re.exec( blocks[0] ) ) {
465 var b = blocks.shift(),
466 // Now remove that indent
467 x = b.replace( replace, "");
468
469 ret.push( mk_block( x, b.trailing, b.lineNumber ) );
470 }
471 break;
472 }
473 return ret;
474 }
475
476 // passed to stack.forEach to turn list items up the stack into paras
477 function paragraphify(s, i, stack) {
478 var list = s.list;
479 var last_li = list[list.length-1];
480
481 if (last_li[1] instanceof Array && last_li[1][0] == "para") {
482 return;
483 }
484 if (i+1 == stack.length) {
485 // Last stack frame
486 // Keep the same array, but replace the contents
487 last_li.push( ["para"].concat( last_li.splice(1) ) );
488 }
489 else {
490 var sublist = last_li.pop();
491 last_li.push( ["para"].concat( last_li.splice(1) ), sublist );
492 }
493 }
494
495 // The matcher function
496 return function( block, next ) {
497 var m = block.match( is_list_re );
498 if ( !m ) return undefined;
499
500 function make_list( m ) {
501 var list = bullet_list.exec( m[2] )
502 ? ["bulletlist"]
503 : ["numberlist"];
504
505 stack.push( { list: list, indent: m[1] } );
506 return list;
507 }
508
509
510 var stack = [], // Stack of lists for nesting.
511 list = make_list( m ),
512 last_li,
513 loose = false,
514 ret = [ stack[0].list ];
515
516 // Loop to search over block looking for inner block elements and loose lists
517 loose_search:
518 while( true ) {
519 // Split into lines preserving new lines at end of line
520 var lines = block.split( /(?=\n)/ );
521
522 // We have to grab all lines for a li and call processInline on them
523 // once as there are some inline things that can span lines.
524 var li_accumulate = "";
525
526 // Loop over the lines in this block looking for tight lists.
527 tight_search:
528 for (var line_no=0; line_no < lines.length; line_no++) {
529 var nl = "",
530 l = lines[line_no].replace(/^\n/, function(n) { nl = n; return "" });
531
532 // TODO: really should cache this
533 var line_re = regex_for_depth( stack.length );
534
535 m = l.match( line_re );
536 //print( "line:", uneval(l), "\nline match:", uneval(m) );
537
538 // We have a list item
539 if ( m[1] !== undefined ) {
540 // Process the previous list item, if any
541 if ( li_accumulate.length ) {
542 add( last_li, loose, this.processInline( li_accumulate ), nl );
543 // Loose mode will have been dealt with. Reset it
544 loose = false;
545 li_accumulate = "";
546 }
547
548 m[1] = expand_tab( m[1] );
549 var wanted_depth = Math.floor(m[1].length/4)+1;
550 //print( "want:", wanted_depth, "stack:", stack.length);
551 if ( wanted_depth > stack.length ) {
552 // Deep enough for a nested list outright
553 //print ( "new nested list" );
554 list = make_list( m );
555 last_li.push( list );
556 last_li = list[1] = [ "listitem" ];
557 }
558 else {
559 // We aren't deep enough to be strictly a new level. This is
560 // where Md.pl goes nuts. If the indent matches a level in the
561 // stack, put it there, else put it one deeper then the
562 // wanted_depth deserves.
563 var found = stack.some(function(s, i) {
564 if ( s.indent != m[1] ) return false;
565 list = s.list; // Found the level we want
566 stack.splice(i+1); // Remove the others
567 //print("found");
568 return true; // And stop looping
569 });
570
571 if (!found) {
572 //print("not found. l:", uneval(l));
573 wanted_depth++;
574 if (wanted_depth <= stack.length) {
575 stack.splice(wanted_depth);
576 //print("Desired depth now", wanted_depth, "stack:", stack.length);
577 list = stack[wanted_depth-1].list;
578 //print("list:", uneval(list) );
579 }
580 else {
581 //print ("made new stack for messy indent");
582 list = make_list(m);
583 last_li.push(list);
584 }
585 }
586
587 //print( uneval(list), "last", list === stack[stack.length-1].list );
588 last_li = [ "listitem" ];
589 list.push(last_li);
590 } // end depth of shenegains
591 nl = "";
592 }
593
594 // Add content
595 if (l.length > m[0].length) {
596 li_accumulate += nl + l.substr( m[0].length );
597 }
598 } // tight_search
599
600 if ( li_accumulate.length ) {
601 add( last_li, loose, this.processInline( li_accumulate ), nl );
602 // Loose mode will have been dealt with. Reset it
603 loose = false;
604 li_accumulate = "";
605 }
606
607 // Look at the next block - we might have a loose list. Or an extra
608 // paragraph for the current li
609 var contained = get_contained_blocks( stack.length, next );
610
611 // Deal with code blocks or properly nested lists
612 if (contained.length > 0) {
613 // Make sure all listitems up the stack are paragraphs
614 stack.forEach( paragraphify, this );
615
616 last_li.push.apply( last_li, this.toTree( contained, [] ) );
617 }
618
619 var next_block = next[0] && next[0].valueOf() || "";
620
621 if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) {
622 block = next.shift();
623
624 // Check for an HR following a list: features/lists/hr_abutting
625 var hr = this.dialect.block.horizRule( block, next );
626
627 if (hr) {
628 ret.push.apply(ret, hr);
629 break;
630 }
631
632 // Make sure all listitems up the stack are paragraphs
633 stack.forEach( paragraphify , this );
634
635 loose = true;
636 continue loose_search;
637 }
638 break;
639 } // loose_search
640
641 return ret;
642 }
643 })(),
644
645 blockquote: function blockquote( block, next ) {
646 if ( !block.match( /^>/m ) )
647 return undefined;
648
649 var jsonml = [];
650
651 // separate out the leading abutting block, if any
652 if ( block[ 0 ] != ">" ) {
653 var lines = block.split( /\n/ ),
654 prev = [];
655
656 // keep shifting lines until you find a crotchet
657 while ( lines.length && lines[ 0 ][ 0 ] != ">" ) {
658 prev.push( lines.shift() );
659 }
660
661 // reassemble!
662 block = lines.join( "\n" );
663 jsonml.push.apply( jsonml, this.processBlock( prev.join( "\n" ), [] ) );
664 }
665
666 // if the next block is also a blockquote merge it in
667 while ( next.length && next[ 0 ][ 0 ] == ">" ) {
668 var b = next.shift();
669 block += block.trailing + b;
670 block.trailing = b.trailing;
671 }
672
673 // Strip off the leading "> " and re-process as a block.
674 var input = block.replace( /^> ?/gm, '' ),
675 old_tree = this.tree;
676 jsonml.push( this.toTree( input, [ "blockquote" ] ) );
677
678 return jsonml;
679 },
680
681 referenceDefn: function referenceDefn( block, next) {
682 var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/;
683 // interesting matches are [ , ref_id, url, , title, title ]
684
685 if ( !block.match(re) )
686 return undefined;
687
688 // make an attribute node if it doesn't exist
689 if ( !extract_attr( this.tree ) ) {
690 this.tree.splice( 1, 0, {} );
691 }
692
693 var attrs = extract_attr( this.tree );
694
695 // make a references hash if it doesn't exist
696 if ( attrs.references === undefined ) {
697 attrs.references = {};
698 }
699
700 var b = this.loop_re_over_block(re, block, function( m ) {
701
702 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
703 m[2] = m[2].substring( 1, m[2].length - 1 );
704
705 var ref = attrs.references[ m[1].toLowerCase() ] = {
706 href: m[2]
707 };
708
709 if (m[4] !== undefined)
710 ref.title = m[4];
711 else if (m[5] !== undefined)
712 ref.title = m[5];
713
714 } );
715
716 if (b.length)
717 next.unshift( mk_block( b, block.trailing ) );
718
719 return [];
720 },
721
722 para: function para( block, next ) {
723 // everything's a para!
724 return [ ["para"].concat( this.processInline( block ) ) ];
725 }
726 }
727}
728
729Markdown.dialects.Gruber.inline = {
730 __call__: function inline( text, patterns ) {
731 // Hmmm - should this function be directly in Md#processInline, or
732 // conversely, should Md#processBlock be moved into block.__call__ too
733 var out = [ ],
734 m,
735 // Look for the next occurange of a special character/pattern
736 re = new RegExp( "([\\s\\S]*?)(" + (patterns.source || patterns) + ")", "g" ),
737 lastIndex = 0;
738
739 //D:var self = this;
740 //D:self.debug("processInline:", uneval(text) );
741 function add(x) {
742 //D:self.debug(" adding output", uneval(x));
743 if (typeof x == "string" && typeof out[out.length-1] == "string")
744 out[ out.length-1 ] += x;
745 else
746 out.push(x);
747 }
748
749 while ( ( m = re.exec(text) ) != null) {
750 if ( m[1] ) add( m[1] ); // Some un-interesting text matched
751 else m[1] = { length: 0 }; // Or there was none, but make m[1].length == 0
752
753 var res;
754 if ( m[2] in this.dialect.inline ) {
755 res = this.dialect.inline[ m[2] ].call(
756 this,
757 text.substr( m.index + m[1].length ), m, out );
758 }
759 // Default for now to make dev easier. just slurp special and output it.
760 res = res || [ m[2].length, m[2] ];
761
762 var len = res.shift();
763 // Update how much input was consumed
764 re.lastIndex += ( len - m[2].length );
765
766 // Add children
767 res.forEach(add);
768
769 lastIndex = re.lastIndex;
770 }
771
772 // Add last 'boring' chunk
773 if ( text.length > lastIndex )
774 add( text.substr( lastIndex ) );
775
776 return out;
777 },
778
779 "\\": function escaped( text ) {
780 // [ length of input processed, node/children to add... ]
781 // Only esacape: \ ` * _ { } [ ] ( ) # * + - . !
782 if ( text.match( /^\\[\\`\*_{}\[\]()#\+.!\-]/ ) )
783 return [ 2, text[1] ];
784 else
785 // Not an esacpe
786 return [ 1, "\\" ];
787 },
788
789 "![": function image( text ) {
790 // ![Alt text](/path/to/img.jpg "Optional title")
791 // 1 2 3 4 <--- captures
792 var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*(\S*)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
793
794 if ( m ) {
795 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
796 m[2] = m[2].substring( 1, m[2].length - 1 );
797
798 m[2] == this.dialect.inline.__call__.call( this, m[2], /\\/ )[0];
799
800 var attrs = { alt: m[1], href: m[2] || "" };
801 if ( m[4] !== undefined)
802 attrs.title = m[4];
803
804 return [ m[0].length, [ "img", attrs ] ];
805 }
806
807 // ![Alt text][id]
808 m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ );
809
810 if ( m ) {
811 // We can't check if the reference is known here as it likely wont be
812 // found till after. Check it in md tree->hmtl tree conversion
813 return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), text: m[0] } ] ];
814 }
815
816 // Just consume the '!['
817 return [ 2, "![" ];
818 },
819
820 "[": function link( text ) {
821 // [link text](/path/to/img.jpg "Optional title")
822 // 1 2 3 4 <--- captures
823 var m = text.match( /^\[([\s\S]*?)\][ \t]*\([ \t]*(\S+)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
824
825 if ( m ) {
826 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' )
827 m[2] = m[2].substring( 1, m[2].length - 1 );
828
829 // Process escapes only
830 m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0];
831
832 var attrs = { href: m[2] || "" };
833 if ( m[4] !== undefined)
834 attrs.title = m[4];
835
836 var link = [ "link", attrs ];
837 Array.prototype.push.apply( link, this.processInline( m[1] ) );
838 return [ m[0].length, link ];
839 }
840
841 // [Alt text][id]
842 // [Alt text] [id]
843 // [id]
844 m = text.match( /^\[([\s\S]*?)\](?: ?\[(.*?)\])?/ );
845
846 if ( m ) {
847 // [id] case, text == id
848 if ( m[2] === undefined || m[2] === "" ) m[2] = m[1];
849
850 attrs = { ref: m[ 2 ].toLowerCase(), original: m[ 0 ] };
851 link = [ "link_ref", attrs ];
852 Array.prototype.push.apply( link, this.processInline( m[1] ) );
853
854 // We can't check if the reference is known here as it likely wont be
855 // found till after. Check it in md tree->hmtl tree conversion.
856 // Store the original so that conversion can revert if the ref isn't found.
857 return [
858 m[ 0 ].length,
859 link
860 ];
861 }
862
863 // Just consume the '['
864 return [ 1, "[" ];
865 },
866
867
868 "<": function autoLink( text ) {
869 var m;
870
871 if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) {
872 if ( m[3] ) {
873 return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ];
874
875 }
876 else if ( m[2] == "mailto" ) {
877 return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ];
878 }
879 else
880 return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ];
881 }
882
883 return [ 1, "<" ];
884 },
885
886 "`": function inlineCode( text ) {
887 // Inline code block. as many backticks as you like to start it
888 // Always skip over the opening ticks.
889 var m = text.match( /(`+)(([\s\S]*?)\1)/ );
890
891 if ( m && m[2] )
892 return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ];
893 else {
894 // TODO: No matching end code found - warn!
895 return [ 1, "`" ];
896 }
897 },
898
899 " \n": function lineBreak( text ) {
900 return [ 3, [ "linebreak" ] ];
901 }
902
903}
904
905// Meta Helper/generator method for em and strong handling
906function strong_em( tag, md ) {
907
908 var state_slot = tag + "_state",
909 other_slot = tag == "strong" ? "em_state" : "strong_state";
910
911 function CloseTag(len) {
912 this.len_after = len;
913 this.name = "close_" + md;
914 }
915
916 return function ( text, orig_match ) {
917
918 if (this[state_slot][0] == md) {
919 // Most recent em is of this type
920 //D:this.debug("closing", md);
921 this[state_slot].shift();
922
923 // "Consume" everything to go back to the recrusion in the else-block below
924 return[ text.length, new CloseTag(text.length-md.length) ];
925 }
926 else {
927 // Store a clone of the em/strong states
928 var other = this[other_slot].slice(),
929 state = this[state_slot].slice();
930
931 this[state_slot].unshift(md);
932
933 //D:this.debug_indent += " ";
934
935 // Recurse
936 var res = this.processInline( text.substr( md.length ) );
937 //D:this.debug_indent = this.debug_indent.substr(2);
938
939 var last = res[res.length - 1];
940
941 //D:this.debug("processInline from", tag + ": ", uneval( res ) );
942
943 var check = this[state_slot].shift();
944 if (last instanceof CloseTag) {
945 res.pop();
946 // We matched! Huzzah.
947 var consumed = text.length - last.len_after;
948 return [ consumed, [ tag ].concat(res) ];
949 }
950 else {
951 // Restore the state of the other kind. We might have mistakenly closed it.
952 this[other_slot] = other;
953 this[state_slot] = state;
954
955 // We can't reuse the processed result as it could have wrong parsing contexts in it.
956 return [ md.length, md ];
957 }
958 }
959 } // End returned function
960}
961
962Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**");
963Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__");
964Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*");
965Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_");
966
967
968// Build default order from insertion order.
969Markdown.buildBlockOrder = function(d) {
970 var ord = [];
971 for ( var i in d ) {
972 if ( i == "__order__" || i == "__call__" ) continue;
973 ord.push( i );
974 }
975 d.__order__ = ord;
976}
977
978// Build patterns for inline matcher
979Markdown.buildInlinePatterns = function(d) {
980 var patterns = [];
981
982 for ( var i in d ) {
983 if (i == "__call__") continue;
984 var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" )
985 .replace( /\n/, "\\n" );
986 patterns.push( i.length == 1 ? l : "(?:" + l + ")" );
987 }
988
989 patterns = patterns.join("|");
990 //print("patterns:", uneval( patterns ) );
991
992 var fn = d.__call__;
993 d.__call__ = function(text, pattern) {
994 if (pattern != undefined)
995 return fn.call(this, text, pattern);
996 else
997 return fn.call(this, text, patterns);
998 }
999}
1000
1001// Helper function to make sub-classing a dialect easier
1002Markdown.subclassDialect = function( d ) {
1003 function Block() {};
1004 Block.prototype = d.block;
1005 function Inline() {};
1006 Inline.prototype = d.inline;
1007
1008 return { block: new Block(), inline: new Inline() };
1009}
1010
1011Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block );
1012Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline );
1013
1014Markdown.dialects.Maruku = Markdown.subclassDialect( Markdown.dialects.Gruber );
1015
1016Markdown.dialects.Maruku.block.document_meta = function document_meta( block, next ) {
1017 // we're only interested in the first block
1018 if ( block.lineNumber > 1 ) return undefined;
1019
1020 // document_meta blocks consist of one or more lines of `Key: Value\n`
1021 if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) return undefined;
1022
1023 // make an attribute node if it doesn't exist
1024 if ( !extract_attr( this.tree ) ) {
1025 this.tree.splice( 1, 0, {} );
1026 }
1027
1028 var pairs = block.split( /\n/ );
1029 for ( p in pairs ) {
1030 var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ),
1031 key = m[ 1 ].toLowerCase(),
1032 value = m[ 2 ];
1033
1034 this.tree[ 1 ][ key ] = value;
1035 }
1036
1037 // document_meta produces no content!
1038 return [];
1039}
1040
1041Markdown.dialects.Maruku.block.block_meta = function block_meta( block, next ) {
1042 // check if the last line of the block is an meta hash
1043 var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ );
1044 if ( !m ) return undefined;
1045
1046 // process the meta hash
1047 var attr = process_meta_hash( m[ 2 ] );
1048
1049 // if we matched ^ then we need to apply meta to the previous block
1050 if ( m[ 1 ] === "" ) {
1051 var node = this.tree[ this.tree.length - 1 ],
1052 hash = extract_attr( node );
1053
1054 // if the node is a string (rather than JsonML), bail
1055 if ( typeof node === "string" ) return undefined;
1056
1057 // create the attribute hash if it doesn't exist
1058 if ( !hash ) {
1059 hash = {};
1060 node.splice( 1, 0, hash );
1061 }
1062
1063 // add the attributes in
1064 for ( a in attr ) {
1065 hash[ a ] = attr[ a ];
1066 }
1067
1068 // return nothing so the meta hash is removed
1069 return [];
1070 }
1071
1072 // pull the meta hash off the block and process what's left
1073 var b = block.replace( /\n.*$/, "" ),
1074 result = this.processBlock( b, [] );
1075
1076 // get or make the attributes hash
1077 var hash = extract_attr( result[ 0 ] );
1078 if ( !hash ) {
1079 hash = {};
1080 result[ 0 ].splice( 1, 0, hash );
1081 }
1082
1083 // attach the attributes to the block
1084 for ( a in attr ) {
1085 hash[ a ] = attr[ a ];
1086 }
1087
1088 return result;
1089}
1090
1091Markdown.dialects.Maruku.block.definition_list = function definition_list( block, next ) {
1092 // one or more terms followed by one or more definitions, in a single block
1093 var tight = /^((?:[^\s:].*\n)+):\s+([^]+)$/,
1094 list = [ "dl" ];
1095
1096 // see if we're dealing with a tight or loose block
1097 if ( ( m = block.match( tight ) ) ) {
1098 // pull subsequent tight DL blocks out of `next`
1099 var blocks = [ block ];
1100 while ( next.length && tight.exec( next[ 0 ] ) ) {
1101 blocks.push( next.shift() );
1102 }
1103
1104 for ( var b = 0; b < blocks.length; ++b ) {
1105 var m = blocks[ b ].match( tight ),
1106 terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ),
1107 defns = m[ 2 ].split( /\n:\s+/ );
1108
1109 // print( uneval( m ) );
1110
1111 for ( var i = 0; i < terms.length; ++i ) {
1112 list.push( [ "dt", terms[ i ] ] );
1113 }
1114
1115 for ( var i = 0; i < defns.length; ++i ) {
1116 // run inline processing over the definition
1117 list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) );
1118 }
1119 }
1120 }
1121 else {
1122 return undefined;
1123 }
1124
1125 return [ list ];
1126}
1127
1128Markdown.dialects.Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) {
1129 if ( !out.length ) {
1130 return [ 2, "{:" ];
1131 }
1132
1133 // get the preceeding element
1134 var before = out[ out.length - 1 ];
1135
1136 if ( typeof before === "string" ) {
1137 return [ 2, "{:" ];
1138 }
1139
1140 // match a meta hash
1141 var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ );
1142
1143 // no match, false alarm
1144 if ( !m ) {
1145 return [ 2, "{:" ];
1146 }
1147
1148 // attach the attributes to the preceeding element
1149 var meta = process_meta_hash( m[ 1 ] ),
1150 attr = extract_attr( before );
1151
1152 if ( !attr ) {
1153 attr = {};
1154 before.splice( 1, 0, attr );
1155 }
1156
1157 for ( var k in meta ) {
1158 attr[ k ] = meta[ k ];
1159 }
1160
1161 // cut out the string and replace it with nothing
1162 return [ m[ 0 ].length, "" ];
1163}
1164
1165Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block );
1166Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline );
1167
1168var isArray = expose.isArray = function(obj) {
1169 return (obj instanceof Array || typeof obj === "array" || Array.isArray(obj));
1170}
1171
1172function extract_attr( jsonml ) {
1173 return isArray(jsonml)
1174 && jsonml.length > 1
1175 && typeof jsonml[ 1 ] === "object"
1176 && !( isArray(jsonml[ 1 ]) )
1177 ? jsonml[ 1 ]
1178 : undefined;
1179}
1180
1181function process_meta_hash( meta_string ) {
1182 var meta = split_meta_hash( meta_string ),
1183 attr = {};
1184
1185 for ( var i = 0; i < meta.length; ++i ) {
1186 // id: #foo
1187 if ( /^#/.test( meta[ i ] ) ) {
1188 attr.id = meta[ i ].substring( 1 );
1189 }
1190 // class: .foo
1191 else if ( /^\./.test( meta[ i ] ) ) {
1192 // if class already exists, append the new one
1193 if ( attr['class'] ) {
1194 attr['class'] = attr['class'] + meta[ i ].replace( /./, " " );
1195 }
1196 else {
1197 attr['class'] = meta[ i ].substring( 1 );
1198 }
1199 }
1200 // attribute: foo=bar
1201 else if ( /=/.test( meta[ i ] ) ) {
1202 var s = meta[ i ].split( /=/ );
1203 attr[ s[ 0 ] ] = s[ 1 ];
1204 }
1205 }
1206
1207 return attr;
1208}
1209
1210function split_meta_hash( meta_string ) {
1211 var meta = meta_string.split( "" ),
1212 parts = [ "" ],
1213 in_quotes = false;
1214
1215 while ( meta.length ) {
1216 var letter = meta.shift();
1217 switch ( letter ) {
1218 case " " :
1219 // if we're in a quoted section, keep it
1220 if ( in_quotes ) {
1221 parts[ parts.length - 1 ] += letter;
1222 }
1223 // otherwise make a new part
1224 else {
1225 parts.push( "" );
1226 }
1227 break;
1228 case "'" :
1229 case '"' :
1230 // reverse the quotes and move straight on
1231 in_quotes = !in_quotes;
1232 break;
1233 case "\\" :
1234 // shift off the next letter to be used straight away.
1235 // it was escaped so we'll keep it whatever it is
1236 letter = meta.shift();
1237 default :
1238 parts[ parts.length - 1 ] += letter;
1239 break;
1240 }
1241 }
1242
1243 return parts;
1244}
1245
1246/**
1247 * renderJsonML( jsonml[, options] ) -> String
1248 * - jsonml (Array): JsonML array to render to XML
1249 * - options (Object): options
1250 *
1251 * Converts the given JsonML into well-formed XML.
1252 *
1253 * The options currently understood are:
1254 *
1255 * - root (Boolean): wether or not the root node should be included in the
1256 * output, or just its children. The default `false` is to not include the
1257 * root itself.
1258 */
1259expose.renderJsonML = function( jsonml, options ) {
1260 options = options || {};
1261 // include the root element in the rendered output?
1262 options.root = options.root || false;
1263
1264 var content = [];
1265
1266 if ( options.root ) {
1267 content.push( render_tree( jsonml ) );
1268 }
1269 else {
1270 jsonml.shift(); // get rid of the tag
1271 if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) {
1272 jsonml.shift(); // get rid of the attributes
1273 }
1274
1275 while ( jsonml.length ) {
1276 content.push( render_tree( jsonml.shift() ) );
1277 }
1278 }
1279
1280 return content.join( "\n\n" );
1281}
1282
1283function escapeHTML( text ) {
1284 return text.replace( /&/g, "&amp;" )
1285 .replace( /</g, "&lt;" )
1286 .replace( />/g, "&gt;" )
1287 .replace( /"/g, "&quot;" )
1288 .replace( /'/g, "&#39;" );
1289}
1290
1291function render_tree( jsonml ) {
1292 // basic case
1293 if ( typeof jsonml === "string" ) {
1294 return escapeHTML( jsonml );
1295 }
1296
1297 var tag = jsonml.shift(),
1298 attributes = {},
1299 content = [];
1300
1301 if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) {
1302 attributes = jsonml.shift();
1303 }
1304
1305 while ( jsonml.length ) {
1306 content.push( arguments.callee( jsonml.shift() ) );
1307 }
1308
1309 var tag_attrs = "";
1310 for ( var a in attributes ) {
1311 tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"';
1312 }
1313
1314 // be careful about adding whitespace here for inline elements
1315 return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">";
1316}
1317
1318function convert_tree_to_html( tree, references, options ) {
1319 options = options || {};
1320
1321 // shallow clone
1322 var jsonml = tree.slice( 0 );
1323
1324 if (typeof options.preprocessTreeNode === "function") {
1325 jsonml = options.preprocessTreeNode(jsonml, references);
1326 }
1327
1328 // Clone attributes if they exist
1329 var attrs = extract_attr( jsonml );
1330 if ( attrs ) {
1331 jsonml[ 1 ] = {};
1332 for ( var i in attrs ) {
1333 jsonml[ 1 ][ i ] = attrs[ i ];
1334 }
1335 attrs = jsonml[ 1 ];
1336 }
1337
1338 // basic case
1339 if ( typeof jsonml === "string" ) {
1340 return jsonml;
1341 }
1342
1343 // convert this node
1344 switch ( jsonml[ 0 ] ) {
1345 case "header":
1346 jsonml[ 0 ] = "h" + jsonml[ 1 ].level;
1347 delete jsonml[ 1 ].level;
1348 break;
1349 case "bulletlist":
1350 jsonml[ 0 ] = "ul";
1351 break;
1352 case "numberlist":
1353 jsonml[ 0 ] = "ol";
1354 break;
1355 case "listitem":
1356 jsonml[ 0 ] = "li";
1357 break;
1358 case "para":
1359 jsonml[ 0 ] = "p";
1360 break;
1361 case "markdown":
1362 jsonml[ 0 ] = "html";
1363 if ( attrs ) delete attrs.references;
1364 break;
1365 case "code_block":
1366 jsonml[ 0 ] = "pre";
1367 var i = attrs ? 2 : 1;
1368 var code = [ "code" ];
1369 code.push.apply( code, jsonml.splice( i ) );
1370 jsonml[ i ] = code;
1371 break;
1372 case "inlinecode":
1373 jsonml[ 0 ] = "code";
1374 break;
1375 case "img":
1376 jsonml[ 1 ].src = jsonml[ 1 ].href;
1377 delete jsonml[ 1 ].href;
1378 break;
1379 case "linebreak":
1380 jsonml[0] = "br";
1381 break;
1382 case "link":
1383 jsonml[ 0 ] = "a";
1384 break;
1385 case "link_ref":
1386 jsonml[ 0 ] = "a";
1387
1388 // grab this ref and clean up the attribute node
1389 var ref = references[ attrs.ref ];
1390
1391 // if the reference exists, make the link
1392 if ( ref ) {
1393 delete attrs.ref;
1394
1395 // add in the href and title, if present
1396 attrs.href = ref.href;
1397 if ( ref.title ) {
1398 attrs.title = ref.title;
1399 }
1400
1401 // get rid of the unneeded original text
1402 delete attrs.original;
1403 }
1404 // the reference doesn't exist, so revert to plain text
1405 else {
1406 return attrs.original;
1407 }
1408 break;
1409 }
1410
1411 // convert all the children
1412 var i = 1;
1413
1414 // deal with the attribute node, if it exists
1415 if ( attrs ) {
1416 // if there are keys, skip over it
1417 for ( var key in jsonml[ 1 ] ) {
1418 i = 2;
1419 }
1420 // if there aren't, remove it
1421 if ( i === 1 ) {
1422 jsonml.splice( i, 1 );
1423 }
1424 }
1425
1426 for ( ; i < jsonml.length; ++i ) {
1427 jsonml[ i ] = arguments.callee( jsonml[ i ], references, options );
1428 }
1429
1430 return jsonml;
1431}
1432
1433
1434// merges adjacent text nodes into a single node
1435function merge_text_nodes( jsonml ) {
1436 // skip the tag name and attribute hash
1437 var i = extract_attr( jsonml ) ? 2 : 1;
1438
1439 while ( i < jsonml.length ) {
1440 // if it's a string check the next item too
1441 if ( typeof jsonml[ i ] === "string" ) {
1442 if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) {
1443 // merge the second string into the first and remove it
1444 jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ];
1445 }
1446 else {
1447 ++i;
1448 }
1449 }
1450 // if it's not a string recurse
1451 else {
1452 arguments.callee( jsonml[ i ] );
1453 ++i;
1454 }
1455 }
1456}
1457
1458} )( (function() {
1459 if ( typeof exports === "undefined" ) {
1460 window.markdown = {};
1461 return window.markdown;
1462 }
1463 else {
1464 return exports;
1465 }
1466} )() );