1 | ;
|
2 | const STATE_PLAINTEXT = Symbol('plaintext');
|
3 | const STATE_HTML = Symbol('html');
|
4 | const STATE_COMMENT = Symbol('comment');
|
5 | // eslint-disable-next-line @typescript-eslint/ban-types
|
6 | function striptags(html = '') {
|
7 | // if not string, then safely return an empty string
|
8 | if (typeof html !== 'string' && !(html instanceof String)) {
|
9 | return '';
|
10 | }
|
11 | let state = STATE_PLAINTEXT;
|
12 | let tag_buffer = '';
|
13 | let depth = 0;
|
14 | let in_quote_char = '';
|
15 | let output = '';
|
16 | const { length } = html;
|
17 | for (let idx = 0; idx < length; idx++) {
|
18 | const char = html[idx];
|
19 | if (state === STATE_PLAINTEXT) {
|
20 | switch (char) {
|
21 | case '<':
|
22 | state = STATE_HTML;
|
23 | tag_buffer = tag_buffer + char;
|
24 | break;
|
25 | default:
|
26 | output += char;
|
27 | break;
|
28 | }
|
29 | }
|
30 | else if (state === STATE_HTML) {
|
31 | switch (char) {
|
32 | case '<':
|
33 | // ignore '<' if inside a quote
|
34 | if (in_quote_char)
|
35 | break;
|
36 | // we're seeing a nested '<'
|
37 | depth++;
|
38 | break;
|
39 | case '>':
|
40 | // ignore '>' if inside a quote
|
41 | if (in_quote_char) {
|
42 | break;
|
43 | }
|
44 | // something like this is happening: '<<>>'
|
45 | if (depth) {
|
46 | depth--;
|
47 | break;
|
48 | }
|
49 | // this is closing the tag in tag_buffer
|
50 | in_quote_char = '';
|
51 | state = STATE_PLAINTEXT;
|
52 | // tag_buffer += '>';
|
53 | tag_buffer = '';
|
54 | break;
|
55 | case '"':
|
56 | case '\'':
|
57 | // catch both single and double quotes
|
58 | if (char === in_quote_char) {
|
59 | in_quote_char = '';
|
60 | }
|
61 | else {
|
62 | in_quote_char = in_quote_char || char;
|
63 | }
|
64 | tag_buffer = tag_buffer + char;
|
65 | break;
|
66 | case '-':
|
67 | if (tag_buffer === '<!-') {
|
68 | state = STATE_COMMENT;
|
69 | }
|
70 | tag_buffer = tag_buffer + char;
|
71 | break;
|
72 | case ' ':
|
73 | case '\n':
|
74 | if (tag_buffer === '<') {
|
75 | state = STATE_PLAINTEXT;
|
76 | output += '< ';
|
77 | tag_buffer = '';
|
78 | break;
|
79 | }
|
80 | tag_buffer = tag_buffer + char;
|
81 | break;
|
82 | default:
|
83 | tag_buffer = tag_buffer + char;
|
84 | break;
|
85 | }
|
86 | }
|
87 | else if (state === STATE_COMMENT) {
|
88 | switch (char) {
|
89 | case '>':
|
90 | if (tag_buffer.slice(-2) === '--') {
|
91 | // close the comment
|
92 | state = STATE_PLAINTEXT;
|
93 | }
|
94 | tag_buffer = '';
|
95 | break;
|
96 | default:
|
97 | tag_buffer = tag_buffer + char;
|
98 | break;
|
99 | }
|
100 | }
|
101 | }
|
102 | return output;
|
103 | }
|
104 | module.exports = striptags;
|
105 | //# sourceMappingURL=strip_html.js.map |
\ | No newline at end of file |