UNPKG

20.2 kBJavaScriptView Raw
1/*
2
3 Style HTML
4---------------
5
6 Written by Nochum Sossonko, (nsossonko@hotmail.com)
7
8 Based on code initially developed by: Einar Lielmanis, <elfz@laacz.lv>
9 http://jsbeautifier.org/
10
11
12 You are free to use this in any way you want, in case you find this useful or working for you.
13
14 Usage:
15 style_html(html_source);
16
17 style_html(html_source, options);
18
19 The options are:
20 indent_size (default 4) — indentation size,
21 indent_char (default space) — character to indent with,
22 max_char (default 70) - maximum amount of characters per line,
23 brace_style (default "collapse") - "collapse" | "expand" | "end-expand"
24 put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line.
25 unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted
26 indent_scripts (default normal) - "keep"|"separate"|"normal"
27
28 e.g.
29
30 style_html(html_source, {
31 'indent_size': 2,
32 'indent_char': ' ',
33 'max_char': 78,
34 'brace_style': 'expand',
35 'unformatted': ['a', 'sub', 'sup', 'b', 'i', 'u']
36 });
37*/
38
39function style_html(html_source, options) {
40//Wrapper function to invoke all the necessary constructors and deal with the output.
41
42 var multi_parser,
43 indent_size,
44 indent_character,
45 max_char,
46 brace_style,
47 unformatted;
48
49 options = options || {};
50 indent_size = options.indent_size || 4;
51 indent_character = options.indent_char || ' ';
52 brace_style = options.brace_style || 'collapse';
53 max_char = options.max_char == 0 ? Infinity : options.max_char || 70;
54 unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins', 'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
55
56 function Parser() {
57
58 this.pos = 0; //Parser position
59 this.token = '';
60 this.current_mode = 'CONTENT'; //reflects the current Parser mode: TAG/CONTENT
61 this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values
62 parent: 'parent1',
63 parentcount: 1,
64 parent1: ''
65 };
66 this.tag_type = '';
67 this.token_text = this.last_token = this.last_text = this.token_type = '';
68
69 this.Utils = { //Uilities made available to the various functions
70 whitespace: "\n\r\t ".split(''),
71 single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed,?php,?,?='.split(','), //all the single tags for HTML
72 extra_liners: 'head,body,/html'.split(','), //for tags that need a line of whitespace before them
73 in_array: function (what, arr) {
74 for (var i=0; i<arr.length; i++) {
75 if (what === arr[i]) {
76 return true;
77 }
78 }
79 return false;
80 }
81 }
82
83 this.get_content = function () { //function to capture regular content between tags
84
85 var input_char = '',
86 content = [],
87 space = false; //if a space is needed
88
89 while (this.input.charAt(this.pos) !== '<') {
90 if (this.pos >= this.input.length) {
91 return content.length?content.join(''):['', 'TK_EOF'];
92 }
93
94 input_char = this.input.charAt(this.pos);
95 this.pos++;
96 this.line_char_count++;
97
98 if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
99 if (content.length) {
100 space = true;
101 }
102 this.line_char_count--;
103 continue; //don't want to insert unnecessary space
104 }
105 else if (space) {
106 if (this.line_char_count >= this.max_char) { //insert a line when the max_char is reached
107 content.push('\n');
108 for (var i=0; i<this.indent_level; i++) {
109 content.push(this.indent_string);
110 }
111 this.line_char_count = 0;
112 }
113 else{
114 content.push(' ');
115 this.line_char_count++;
116 }
117 space = false;
118 }
119 content.push(input_char); //letter at-a-time (or string) inserted to an array
120 }
121 return content.length?content.join(''):'';
122 }
123
124 this.get_contents_to = function (name) { //get the full content of a script or style to pass to js_beautify
125 if (this.pos == this.input.length) {
126 return ['', 'TK_EOF'];
127 }
128 var input_char = '';
129 var content = '';
130 var reg_match = new RegExp('\<\/' + name + '\\s*\>', 'igm');
131 reg_match.lastIndex = this.pos;
132 var reg_array = reg_match.exec(this.input);
133 var end_script = reg_array?reg_array.index:this.input.length; //absolute end of script
134 if(this.pos < end_script) { //get everything in between the script tags
135 content = this.input.substring(this.pos, end_script);
136 this.pos = end_script;
137 }
138 return content;
139 }
140
141 this.record_tag = function (tag){ //function to record a tag and its parent in this.tags Object
142 if (this.tags[tag + 'count']) { //check for the existence of this tag type
143 this.tags[tag + 'count']++;
144 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
145 }
146 else { //otherwise initialize this tag type
147 this.tags[tag + 'count'] = 1;
148 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
149 }
150 this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent)
151 this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1')
152 }
153
154 this.retrieve_tag = function (tag) { //function to retrieve the opening tag to the corresponding closer
155 if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it
156 var temp_parent = this.tags.parent; //check to see if it's a closable tag.
157 while (temp_parent) { //till we reach '' (the initial value);
158 if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it
159 break;
160 }
161 temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree
162 }
163 if (temp_parent) { //if we caught something
164 this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly
165 this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent
166 }
167 delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference...
168 delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself
169 if (this.tags[tag + 'count'] == 1) {
170 delete this.tags[tag + 'count'];
171 }
172 else {
173 this.tags[tag + 'count']--;
174 }
175 }
176 }
177
178 this.get_tag = function () { //function to get a full tag and parse its type
179 var input_char = '',
180 content = [],
181 space = false,
182 tag_start, tag_end;
183
184 do {
185 if (this.pos >= this.input.length) {
186 return content.length?content.join(''):['', 'TK_EOF'];
187 }
188
189 input_char = this.input.charAt(this.pos);
190 this.pos++;
191 this.line_char_count++;
192
193 if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space
194 space = true;
195 this.line_char_count--;
196 continue;
197 }
198
199 if (input_char === "'" || input_char === '"') {
200 if (!content[1] || content[1] !== '!') { //if we're in a comment strings don't get treated specially
201 input_char += this.get_unformatted(input_char);
202 space = true;
203 }
204 }
205
206 if (input_char === '=') { //no space before =
207 space = false;
208 }
209
210 if (content.length && content[content.length-1] !== '=' && input_char !== '>'
211 && space) { //no space after = or before >
212 if (this.line_char_count >= this.max_char) {
213 this.print_newline(false, content);
214 this.line_char_count = 0;
215 }
216 else {
217 content.push(' ');
218 this.line_char_count++;
219 }
220 space = false;
221 }
222 if (input_char === '<') {
223 tag_start = this.pos - 1;
224 }
225 content.push(input_char); //inserts character at-a-time (or string)
226 } while (input_char !== '>');
227
228 var tag_complete = content.join('');
229 var tag_index;
230 if (tag_complete.indexOf(' ') != -1) { //if there's whitespace, thats where the tag name ends
231 tag_index = tag_complete.indexOf(' ');
232 }
233 else { //otherwise go with the tag ending
234 tag_index = tag_complete.indexOf('>');
235 }
236 var tag_check = tag_complete.substring(1, tag_index).toLowerCase();
237 if (tag_complete.charAt(tag_complete.length-2) === '/' ||
238 this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /)
239 this.tag_type = 'SINGLE';
240 }
241 else if (tag_check === 'script') { //for later script handling
242 this.record_tag(tag_check);
243 this.tag_type = 'SCRIPT';
244 }
245 else if (tag_check === 'style') { //for future style handling (for now it justs uses get_content)
246 this.record_tag(tag_check);
247 this.tag_type = 'STYLE';
248 }
249 else if (this.Utils.in_array(tag_check, unformatted)) { // do not reformat the "unformatted" tags
250 var comment = this.get_unformatted('</'+tag_check+'>', tag_complete); //...delegate to get_unformatted function
251 content.push(comment);
252 // Preserve collapsed whitespace either before or after this tag.
253 if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)){
254 content.splice(0, 0, this.input.charAt(tag_start - 1));
255 }
256 tag_end = this.pos - 1;
257 if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)){
258 content.push(this.input.charAt(tag_end + 1));
259 }
260 this.tag_type = 'SINGLE';
261 }
262 else if (tag_check.charAt(0) === '!') { //peek for <!-- comment
263 if (tag_check.indexOf('[if') != -1) { //peek for <!--[if conditional comment
264 if (tag_complete.indexOf('!IE') != -1) { //this type needs a closing --> so...
265 var comment = this.get_unformatted('-->', tag_complete); //...delegate to get_unformatted
266 content.push(comment);
267 }
268 this.tag_type = 'START';
269 }
270 else if (tag_check.indexOf('[endif') != -1) {//peek for <!--[endif end conditional comment
271 this.tag_type = 'END';
272 this.unindent();
273 }
274 else if (tag_check.indexOf('[cdata[') != -1) { //if it's a <[cdata[ comment...
275 var comment = this.get_unformatted(']]>', tag_complete); //...delegate to get_unformatted function
276 content.push(comment);
277 this.tag_type = 'SINGLE'; //<![CDATA[ comments are treated like single tags
278 }
279 else {
280 var comment = this.get_unformatted('-->', tag_complete);
281 content.push(comment);
282 this.tag_type = 'SINGLE';
283 }
284 }
285 else {
286 if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending
287 this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors
288 this.tag_type = 'END';
289 }
290 else { //otherwise it's a start-tag
291 this.record_tag(tag_check); //push it on the tag stack
292 this.tag_type = 'START';
293 }
294 if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line
295 this.print_newline(true, this.output);
296 }
297 }
298 return content.join(''); //returns fully formatted tag
299 }
300
301 this.get_unformatted = function (delimiter, orig_tag) { //function to return unformatted content in its entirety
302
303 if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) != -1) {
304 return '';
305 }
306 var input_char = '';
307 var content = '';
308 var space = true;
309 do {
310
311 if (this.pos >= this.input.length) {
312 return content;
313 }
314
315 input_char = this.input.charAt(this.pos);
316 this.pos++
317
318 if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
319 if (!space) {
320 this.line_char_count--;
321 continue;
322 }
323 if (input_char === '\n' || input_char === '\r') {
324 content += '\n';
325 /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect <pre> tags if they are specified in the 'unformatted array'
326 for (var i=0; i<this.indent_level; i++) {
327 content += this.indent_string;
328 }
329 space = false; //...and make sure other indentation is erased
330 */
331 this.line_char_count = 0;
332 continue;
333 }
334 }
335 content += input_char;
336 this.line_char_count++;
337 space = true;
338
339
340 } while (content.toLowerCase().indexOf(delimiter) == -1);
341 return content;
342 }
343
344 this.get_token = function () { //initial handler for token-retrieval
345 var token;
346
347 if (this.last_token === 'TK_TAG_SCRIPT' || this.last_token === 'TK_TAG_STYLE') { //check if we need to format javascript
348 var type = this.last_token.substr(7)
349 token = this.get_contents_to(type);
350 if (typeof token !== 'string') {
351 return token;
352 }
353 return [token, 'TK_' + type];
354 }
355 if (this.current_mode === 'CONTENT') {
356 token = this.get_content();
357 if (typeof token !== 'string') {
358 return token;
359 }
360 else {
361 return [token, 'TK_CONTENT'];
362 }
363 }
364
365 if (this.current_mode === 'TAG') {
366 token = this.get_tag();
367 if (typeof token !== 'string') {
368 return token;
369 }
370 else {
371 var tag_name_type = 'TK_TAG_' + this.tag_type;
372 return [token, tag_name_type];
373 }
374 }
375 }
376
377 this.get_full_indent = function (level) {
378 level = this.indent_level + level || 0;
379 if (level < 1)
380 return '';
381
382 return Array(level + 1).join(this.indent_string);
383 }
384
385
386 this.printer = function (js_source, indent_character, indent_size, max_char, brace_style) { //handles input/output and some other printing functions
387
388 this.input = js_source || ''; //gets the input for the Parser
389 this.output = [];
390 this.indent_character = indent_character;
391 this.indent_string = '';
392 this.indent_size = indent_size;
393 this.brace_style = brace_style;
394 this.indent_level = 0;
395 this.max_char = max_char;
396 this.line_char_count = 0; //count to see if max_char was exceeded
397
398 for (var i=0; i<this.indent_size; i++) {
399 this.indent_string += this.indent_character;
400 }
401
402 this.print_newline = function (ignore, arr) {
403 this.line_char_count = 0;
404 if (!arr || !arr.length) {
405 return;
406 }
407 if (!ignore) { //we might want the extra line
408 while (this.Utils.in_array(arr[arr.length-1], this.Utils.whitespace)) {
409 arr.pop();
410 }
411 }
412 arr.push('\n');
413 for (var i=0; i<this.indent_level; i++) {
414 arr.push(this.indent_string);
415 }
416 }
417
418 this.print_token = function (text) {
419 this.output.push(text);
420 }
421
422 this.indent = function () {
423 this.indent_level++;
424 }
425
426 this.unindent = function () {
427 if (this.indent_level > 0) {
428 this.indent_level--;
429 }
430 }
431 }
432 return this;
433 }
434
435 /*_____________________--------------------_____________________*/
436
437 multi_parser = new Parser(); //wrapping functions Parser
438 multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style); //initialize starting values
439
440 while (true) {
441 var t = multi_parser.get_token();
442 multi_parser.token_text = t[0];
443 multi_parser.token_type = t[1];
444
445 if (multi_parser.token_type === 'TK_EOF') {
446 break;
447 }
448
449 switch (multi_parser.token_type) {
450 case 'TK_TAG_START':
451 multi_parser.print_newline(false, multi_parser.output);
452 multi_parser.print_token(multi_parser.token_text);
453 multi_parser.indent();
454 multi_parser.current_mode = 'CONTENT';
455 break;
456 case 'TK_TAG_STYLE':
457 case 'TK_TAG_SCRIPT':
458 multi_parser.print_newline(false, multi_parser.output);
459 multi_parser.print_token(multi_parser.token_text);
460 multi_parser.current_mode = 'CONTENT';
461 break;
462 case 'TK_TAG_END':
463 //Print new line only if the tag has no content and has child
464 if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
465 var tag_name = multi_parser.token_text.match(/\w+/)[0];
466 var tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length -1].match(/<\s*(\w+)/);
467 if (tag_extracted_from_last_output === null || tag_extracted_from_last_output[1] !== tag_name)
468 multi_parser.print_newline(true, multi_parser.output);
469 }
470 multi_parser.print_token(multi_parser.token_text);
471 multi_parser.current_mode = 'CONTENT';
472 break;
473 case 'TK_TAG_SINGLE':
474 // Don't add a newline before elements that should remain unformatted.
475 var tag_check = multi_parser.token_text.match(/^\s*<([a-z]+)/i);
476 if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)){
477 multi_parser.print_newline(false, multi_parser.output);
478 }
479 multi_parser.print_token(multi_parser.token_text);
480 multi_parser.current_mode = 'CONTENT';
481 break;
482 case 'TK_CONTENT':
483 if (multi_parser.token_text !== '') {
484 multi_parser.print_token(multi_parser.token_text);
485 }
486 multi_parser.current_mode = 'TAG';
487 break;
488 case 'TK_STYLE':
489 case 'TK_SCRIPT':
490 if (multi_parser.token_text !== '') {
491 multi_parser.output.push('\n');
492 var text = multi_parser.token_text;
493 if (multi_parser.token_type == 'TK_SCRIPT') {
494 var _beautifier = typeof js_beautify == 'function' && js_beautify;
495 } else if (multi_parser.token_type == 'TK_STYLE') {
496 var _beautifier = typeof css_beautify == 'function' && css_beautify;
497 }
498
499 if (options.indent_scripts == "keep") {
500 var script_indent_level = 0;
501 } else if (options.indent_scripts == "separate") {
502 var script_indent_level = -multi_parser.indent_level;
503 } else {
504 var script_indent_level = 1;
505 }
506
507 var indentation = multi_parser.get_full_indent(script_indent_level);
508 if (_beautifier) {
509 // call the Beautifier if avaliable
510 text = _beautifier(text.replace(/^\s*/, indentation), options);
511 } else {
512 // simply indent the string otherwise
513 var white = text.match(/^\s*/)[0];
514 var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
515 var reindent = multi_parser.get_full_indent(script_indent_level -_level);
516 text = text.replace(/^\s*/, indentation)
517 .replace(/\r\n|\r|\n/g, '\n' + reindent)
518 .replace(/\s*$/, '');
519 }
520 if (text) {
521 multi_parser.print_token(text);
522 multi_parser.print_newline(true, multi_parser.output);
523 }
524 }
525 multi_parser.current_mode = 'TAG';
526 break;
527 }
528 multi_parser.last_token = multi_parser.token_type;
529 multi_parser.last_text = multi_parser.token_text;
530 }
531 return multi_parser.output.join('');
532}
533
534module.exports = {
535 prettyPrint: style_html
536};
\No newline at end of file