UNPKG

5.82 kBJavaScriptView Raw
1// Copyright 2004 Erik Arvidsson. All Rights Reserved.
2//
3// This code is triple licensed using Apache Software License 2.0,
4// Mozilla Public License or GNU Public License
5//
6///////////////////////////////////////////////////////////////////////////////
7//
8// Licensed under the Apache License, Version 2.0 (the "License"); you may not
9// use this file except in compliance with the License. You may obtain a copy
10// of the License at http://www.apache.org/licenses/LICENSE-2.0
11//
12///////////////////////////////////////////////////////////////////////////////
13//
14// The contents of this file are subject to the Mozilla Public License
15// Version 1.1 (the "License"); you may not use this file except in
16// compliance with the License. You may obtain a copy of the License at
17// http://www.mozilla.org/MPL/
18//
19// Software distributed under the License is distributed on an "AS IS"
20// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
21// License for the specific language governing rights and limitations
22// under the License.
23//
24// The Original Code is Simple HTML Parser.
25//
26// The Initial Developer of the Original Code is Erik Arvidsson.
27// Portions created by Erik Arvidssson are Copyright (C) 2004. All Rights
28// Reserved.
29//
30///////////////////////////////////////////////////////////////////////////////
31//
32// This program is free software; you can redistribute it and/or
33// modify it under the terms of the GNU General Public License
34// as published by the Free Software Foundation; either version 2
35// of the License, or (at your option) any later version.
36//
37// This program is distributed in the hope that it will be useful,
38// but WITHOUT ANY WARRANTY; without even the implied warranty of
39// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40// GNU General Public License for more details.
41//
42// You should have received a copy of the GNU General Public License
43// along with this program; if not, write to the Free Software
44// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
45//
46///////////////////////////////////////////////////////////////////////////////
47
48/*
49var handler ={
50 startElement: function (sTagName, oAttrs) {},
51 endElement: function (sTagName) {},
52 characters: function (s) {},
53 comment: function (s) {}
54};
55*/
56
57function SimpleHtmlParser() {}
58
59SimpleHtmlParser.prototype = {
60 handler: null,
61
62 // regexps
63
64 startTagRe: /^<([^>\s\/]+)((\s+[^=>\s]+(\s*=\s*((\"[^"]*\")|(\'[^']*\')|[^>\s]+))?)*)\s*\/?\s*>/m,
65 endTagRe: /^<\/([^>\s]+)[^>]*>/m,
66 attrRe: /([^=\s]+)(\s*=\s*((\"([^"]*)\")|(\'([^']*)\')|[^>\s]+))?/gm,
67
68 parse: function (s, oHandler) {
69 if (oHandler) this.contentHandler = oHandler
70
71 var i = 0
72 var res, lc, lm, rc, index
73 var treatAsChars = false
74 var oThis = this
75 while (s.length > 0) {
76 // Comment
77 if (s.substring(0, 4) == '<!--') {
78 index = s.indexOf('-->')
79 if (index != -1) {
80 this.contentHandler.comment(s.substring(4, index))
81 s = s.substring(index + 3)
82 treatAsChars = false
83 } else {
84 treatAsChars = true
85 }
86 }
87
88 // end tag
89 else if (s.substring(0, 2) == '</') {
90 if (this.endTagRe.test(s)) {
91 lc = RegExp.leftContext
92 lm = RegExp.lastMatch
93 rc = RegExp.rightContext
94
95 lm.replace(this.endTagRe, function () {
96 return oThis.parseEndTag.apply(oThis, arguments)
97 })
98
99 s = rc
100 treatAsChars = false
101 } else {
102 treatAsChars = true
103 }
104 }
105 // start tag
106 else if (s.charAt(0) == '<') {
107 if (this.startTagRe.test(s)) {
108 lc = RegExp.leftContext
109 lm = RegExp.lastMatch
110 rc = RegExp.rightContext
111
112 lm.replace(this.startTagRe, function () {
113 return oThis.parseStartTag.apply(oThis, arguments)
114 })
115
116 s = rc
117 treatAsChars = false
118 } else {
119 treatAsChars = true
120 }
121 }
122
123 if (treatAsChars) {
124 index = s.indexOf('<')
125 if (index == -1) {
126 this.contentHandler.characters(s)
127 s = ''
128 } else {
129 this.contentHandler.characters(s.substring(0, index))
130 s = s.substring(index)
131 }
132 }
133
134 treatAsChars = true
135 }
136 },
137
138 parseStartTag: function (sTag, sTagName, sRest) {
139 var attrs = this.parseAttributes(sTagName, sRest)
140 this.contentHandler.startElement(sTagName, attrs)
141 },
142
143 parseEndTag: function (sTag, sTagName) {
144 this.contentHandler.endElement(sTagName)
145 },
146
147 parseAttributes: function (sTagName, s) {
148 var oThis = this
149 var attrs = []
150 s.replace(this.attrRe, function (a0, a1, a2, a3, a4, a5, a6) {
151 attrs.push(oThis.parseAttribute(sTagName, a0, a1, a2, a3, a4, a5, a6))
152 })
153 return attrs
154 },
155
156 parseAttribute: function (sTagName, sAttribute, sName) {
157 var value = ''
158 if (arguments[7]) value = arguments[8]
159 else if (arguments[5]) value = arguments[6]
160 else if (arguments[3]) value = arguments[4]
161
162 var empty = !value && !arguments[3]
163 return { name: sName, value: empty ? null : value }
164 },
165}
166
167// export default SimpleHtmlParser
168module.exports = SimpleHtmlParser