| Leo Repp | 58b9f11 | 2021-11-22 11:57:47 +0100 | [diff] [blame^] | 1 | ;(function (sax) { // wrapper for non-node envs |
| 2 | sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } |
| 3 | sax.SAXParser = SAXParser |
| 4 | sax.SAXStream = SAXStream |
| 5 | sax.createStream = createStream |
| 6 | |
| 7 | // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. |
| 8 | // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), |
| 9 | // since that's the earliest that a buffer overrun could occur. This way, checks are |
| 10 | // as rare as required, but as often as necessary to ensure never crossing this bound. |
| 11 | // Furthermore, buffers are only tested at most once per write(), so passing a very |
| 12 | // large string into write() might have undesirable effects, but this is manageable by |
| 13 | // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme |
| 14 | // edge case, result in creating at most one complete copy of the string passed in. |
| 15 | // Set to Infinity to have unlimited buffers. |
| 16 | sax.MAX_BUFFER_LENGTH = 64 * 1024 |
| 17 | |
| 18 | var buffers = [ |
| 19 | 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', |
| 20 | 'procInstName', 'procInstBody', 'entity', 'attribName', |
| 21 | 'attribValue', 'cdata', 'script' |
| 22 | ] |
| 23 | |
| 24 | sax.EVENTS = [ |
| 25 | 'text', |
| 26 | 'processinginstruction', |
| 27 | 'sgmldeclaration', |
| 28 | 'doctype', |
| 29 | 'comment', |
| 30 | 'opentagstart', |
| 31 | 'attribute', |
| 32 | 'opentag', |
| 33 | 'closetag', |
| 34 | 'opencdata', |
| 35 | 'cdata', |
| 36 | 'closecdata', |
| 37 | 'error', |
| 38 | 'end', |
| 39 | 'ready', |
| 40 | 'script', |
| 41 | 'opennamespace', |
| 42 | 'closenamespace' |
| 43 | ] |
| 44 | |
| 45 | function SAXParser (strict, opt) { |
| 46 | if (!(this instanceof SAXParser)) { |
| 47 | return new SAXParser(strict, opt) |
| 48 | } |
| 49 | |
| 50 | var parser = this |
| 51 | clearBuffers(parser) |
| 52 | parser.q = parser.c = '' |
| 53 | parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH |
| 54 | parser.opt = opt || {} |
| 55 | parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags |
| 56 | parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' |
| 57 | parser.tags = [] |
| 58 | parser.closed = parser.closedRoot = parser.sawRoot = false |
| 59 | parser.tag = parser.error = null |
| 60 | parser.strict = !!strict |
| 61 | parser.noscript = !!(strict || parser.opt.noscript) |
| 62 | parser.state = S.BEGIN |
| 63 | parser.strictEntities = parser.opt.strictEntities |
| 64 | parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) |
| 65 | parser.attribList = [] |
| 66 | |
| 67 | // namespaces form a prototype chain. |
| 68 | // it always points at the current tag, |
| 69 | // which protos to its parent tag. |
| 70 | if (parser.opt.xmlns) { |
| 71 | parser.ns = Object.create(rootNS) |
| 72 | } |
| 73 | |
| 74 | // mostly just for error reporting |
| 75 | parser.trackPosition = parser.opt.position !== false |
| 76 | if (parser.trackPosition) { |
| 77 | parser.position = parser.line = parser.column = 0 |
| 78 | } |
| 79 | emit(parser, 'onready') |
| 80 | } |
| 81 | |
| 82 | if (!Object.create) { |
| 83 | Object.create = function (o) { |
| 84 | function F () {} |
| 85 | F.prototype = o |
| 86 | var newf = new F() |
| 87 | return newf |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | if (!Object.keys) { |
| 92 | Object.keys = function (o) { |
| 93 | var a = [] |
| 94 | for (var i in o) if (o.hasOwnProperty(i)) a.push(i) |
| 95 | return a |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | function checkBufferLength (parser) { |
| 100 | var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) |
| 101 | var maxActual = 0 |
| 102 | for (var i = 0, l = buffers.length; i < l; i++) { |
| 103 | var len = parser[buffers[i]].length |
| 104 | if (len > maxAllowed) { |
| 105 | // Text/cdata nodes can get big, and since they're buffered, |
| 106 | // we can get here under normal conditions. |
| 107 | // Avoid issues by emitting the text node now, |
| 108 | // so at least it won't get any bigger. |
| 109 | switch (buffers[i]) { |
| 110 | case 'textNode': |
| 111 | closeText(parser) |
| 112 | break |
| 113 | |
| 114 | case 'cdata': |
| 115 | emitNode(parser, 'oncdata', parser.cdata) |
| 116 | parser.cdata = '' |
| 117 | break |
| 118 | |
| 119 | case 'script': |
| 120 | emitNode(parser, 'onscript', parser.script) |
| 121 | parser.script = '' |
| 122 | break |
| 123 | |
| 124 | default: |
| 125 | error(parser, 'Max buffer length exceeded: ' + buffers[i]) |
| 126 | } |
| 127 | } |
| 128 | maxActual = Math.max(maxActual, len) |
| 129 | } |
| 130 | // schedule the next check for the earliest possible buffer overrun. |
| 131 | var m = sax.MAX_BUFFER_LENGTH - maxActual |
| 132 | parser.bufferCheckPosition = m + parser.position |
| 133 | } |
| 134 | |
| 135 | function clearBuffers (parser) { |
| 136 | for (var i = 0, l = buffers.length; i < l; i++) { |
| 137 | parser[buffers[i]] = '' |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | function flushBuffers (parser) { |
| 142 | closeText(parser) |
| 143 | if (parser.cdata !== '') { |
| 144 | emitNode(parser, 'oncdata', parser.cdata) |
| 145 | parser.cdata = '' |
| 146 | } |
| 147 | if (parser.script !== '') { |
| 148 | emitNode(parser, 'onscript', parser.script) |
| 149 | parser.script = '' |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | SAXParser.prototype = { |
| 154 | end: function () { end(this) }, |
| 155 | write: write, |
| 156 | resume: function () { this.error = null; return this }, |
| 157 | close: function () { return this.write(null) }, |
| 158 | flush: function () { flushBuffers(this) } |
| 159 | } |
| 160 | |
| 161 | var Stream |
| 162 | try { |
| 163 | Stream = require('stream').Stream |
| 164 | } catch (ex) { |
| 165 | Stream = function () {} |
| 166 | } |
| 167 | |
| 168 | var streamWraps = sax.EVENTS.filter(function (ev) { |
| 169 | return ev !== 'error' && ev !== 'end' |
| 170 | }) |
| 171 | |
| 172 | function createStream (strict, opt) { |
| 173 | return new SAXStream(strict, opt) |
| 174 | } |
| 175 | |
| 176 | function SAXStream (strict, opt) { |
| 177 | if (!(this instanceof SAXStream)) { |
| 178 | return new SAXStream(strict, opt) |
| 179 | } |
| 180 | |
| 181 | Stream.apply(this) |
| 182 | |
| 183 | this._parser = new SAXParser(strict, opt) |
| 184 | this.writable = true |
| 185 | this.readable = true |
| 186 | |
| 187 | var me = this |
| 188 | |
| 189 | this._parser.onend = function () { |
| 190 | me.emit('end') |
| 191 | } |
| 192 | |
| 193 | this._parser.onerror = function (er) { |
| 194 | me.emit('error', er) |
| 195 | |
| 196 | // if didn't throw, then means error was handled. |
| 197 | // go ahead and clear error, so we can write again. |
| 198 | me._parser.error = null |
| 199 | } |
| 200 | |
| 201 | this._decoder = null |
| 202 | |
| 203 | streamWraps.forEach(function (ev) { |
| 204 | Object.defineProperty(me, 'on' + ev, { |
| 205 | get: function () { |
| 206 | return me._parser['on' + ev] |
| 207 | }, |
| 208 | set: function (h) { |
| 209 | if (!h) { |
| 210 | me.removeAllListeners(ev) |
| 211 | me._parser['on' + ev] = h |
| 212 | return h |
| 213 | } |
| 214 | me.on(ev, h) |
| 215 | }, |
| 216 | enumerable: true, |
| 217 | configurable: false |
| 218 | }) |
| 219 | }) |
| 220 | } |
| 221 | |
| 222 | SAXStream.prototype = Object.create(Stream.prototype, { |
| 223 | constructor: { |
| 224 | value: SAXStream |
| 225 | } |
| 226 | }) |
| 227 | |
| 228 | SAXStream.prototype.write = function (data) { |
| 229 | if (typeof Buffer === 'function' && |
| 230 | typeof Buffer.isBuffer === 'function' && |
| 231 | Buffer.isBuffer(data)) { |
| 232 | if (!this._decoder) { |
| 233 | var SD = require('string_decoder').StringDecoder |
| 234 | this._decoder = new SD('utf8') |
| 235 | } |
| 236 | data = this._decoder.write(data) |
| 237 | } |
| 238 | |
| 239 | this._parser.write(data.toString()) |
| 240 | this.emit('data', data) |
| 241 | return true |
| 242 | } |
| 243 | |
| 244 | SAXStream.prototype.end = function (chunk) { |
| 245 | if (chunk && chunk.length) { |
| 246 | this.write(chunk) |
| 247 | } |
| 248 | this._parser.end() |
| 249 | return true |
| 250 | } |
| 251 | |
| 252 | SAXStream.prototype.on = function (ev, handler) { |
| 253 | var me = this |
| 254 | if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) { |
| 255 | me._parser['on' + ev] = function () { |
| 256 | var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) |
| 257 | args.splice(0, 0, ev) |
| 258 | me.emit.apply(me, args) |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | return Stream.prototype.on.call(me, ev, handler) |
| 263 | } |
| 264 | |
| 265 | // this really needs to be replaced with character classes. |
| 266 | // XML allows all manner of ridiculous numbers and digits. |
| 267 | var CDATA = '[CDATA[' |
| 268 | var DOCTYPE = 'DOCTYPE' |
| 269 | var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' |
| 270 | var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' |
| 271 | var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } |
| 272 | |
| 273 | // http://www.w3.org/TR/REC-xml/#NT-NameStartChar |
| 274 | // This implementation works on strings, a single character at a time |
| 275 | // as such, it cannot ever support astral-plane characters (10000-EFFFF) |
| 276 | // without a significant breaking change to either this parser, or the |
| 277 | // JavaScript language. Implementation of an emoji-capable xml parser |
| 278 | // is left as an exercise for the reader. |
| 279 | var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ |
| 280 | |
| 281 | var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ |
| 282 | |
| 283 | var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ |
| 284 | var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ |
| 285 | |
| 286 | function isWhitespace (c) { |
| 287 | return c === ' ' || c === '\n' || c === '\r' || c === '\t' |
| 288 | } |
| 289 | |
| 290 | function isQuote (c) { |
| 291 | return c === '"' || c === '\'' |
| 292 | } |
| 293 | |
| 294 | function isAttribEnd (c) { |
| 295 | return c === '>' || isWhitespace(c) |
| 296 | } |
| 297 | |
| 298 | function isMatch (regex, c) { |
| 299 | return regex.test(c) |
| 300 | } |
| 301 | |
| 302 | function notMatch (regex, c) { |
| 303 | return !isMatch(regex, c) |
| 304 | } |
| 305 | |
| 306 | var S = 0 |
| 307 | sax.STATE = { |
| 308 | BEGIN: S++, // leading byte order mark or whitespace |
| 309 | BEGIN_WHITESPACE: S++, // leading whitespace |
| 310 | TEXT: S++, // general stuff |
| 311 | TEXT_ENTITY: S++, // & and such. |
| 312 | OPEN_WAKA: S++, // < |
| 313 | SGML_DECL: S++, // <!BLARG |
| 314 | SGML_DECL_QUOTED: S++, // <!BLARG foo "bar |
| 315 | DOCTYPE: S++, // <!DOCTYPE |
| 316 | DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah |
| 317 | DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ... |
| 318 | DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo |
| 319 | COMMENT_STARTING: S++, // <!- |
| 320 | COMMENT: S++, // <!-- |
| 321 | COMMENT_ENDING: S++, // <!-- blah - |
| 322 | COMMENT_ENDED: S++, // <!-- blah -- |
| 323 | CDATA: S++, // <![CDATA[ something |
| 324 | CDATA_ENDING: S++, // ] |
| 325 | CDATA_ENDING_2: S++, // ]] |
| 326 | PROC_INST: S++, // <?hi |
| 327 | PROC_INST_BODY: S++, // <?hi there |
| 328 | PROC_INST_ENDING: S++, // <?hi "there" ? |
| 329 | OPEN_TAG: S++, // <strong |
| 330 | OPEN_TAG_SLASH: S++, // <strong / |
| 331 | ATTRIB: S++, // <a |
| 332 | ATTRIB_NAME: S++, // <a foo |
| 333 | ATTRIB_NAME_SAW_WHITE: S++, // <a foo _ |
| 334 | ATTRIB_VALUE: S++, // <a foo= |
| 335 | ATTRIB_VALUE_QUOTED: S++, // <a foo="bar |
| 336 | ATTRIB_VALUE_CLOSED: S++, // <a foo="bar" |
| 337 | ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar |
| 338 | ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar=""" |
| 339 | ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=" |
| 340 | CLOSE_TAG: S++, // </a |
| 341 | CLOSE_TAG_SAW_WHITE: S++, // </a > |
| 342 | SCRIPT: S++, // <script> ... |
| 343 | SCRIPT_ENDING: S++ // <script> ... < |
| 344 | } |
| 345 | |
| 346 | sax.XML_ENTITIES = { |
| 347 | 'amp': '&', |
| 348 | 'gt': '>', |
| 349 | 'lt': '<', |
| 350 | 'quot': '"', |
| 351 | 'apos': "'" |
| 352 | } |
| 353 | |
| 354 | sax.ENTITIES = { |
| 355 | 'amp': '&', |
| 356 | 'gt': '>', |
| 357 | 'lt': '<', |
| 358 | 'quot': '"', |
| 359 | 'apos': "'", |
| 360 | 'AElig': 198, |
| 361 | 'Aacute': 193, |
| 362 | 'Acirc': 194, |
| 363 | 'Agrave': 192, |
| 364 | 'Aring': 197, |
| 365 | 'Atilde': 195, |
| 366 | 'Auml': 196, |
| 367 | 'Ccedil': 199, |
| 368 | 'ETH': 208, |
| 369 | 'Eacute': 201, |
| 370 | 'Ecirc': 202, |
| 371 | 'Egrave': 200, |
| 372 | 'Euml': 203, |
| 373 | 'Iacute': 205, |
| 374 | 'Icirc': 206, |
| 375 | 'Igrave': 204, |
| 376 | 'Iuml': 207, |
| 377 | 'Ntilde': 209, |
| 378 | 'Oacute': 211, |
| 379 | 'Ocirc': 212, |
| 380 | 'Ograve': 210, |
| 381 | 'Oslash': 216, |
| 382 | 'Otilde': 213, |
| 383 | 'Ouml': 214, |
| 384 | 'THORN': 222, |
| 385 | 'Uacute': 218, |
| 386 | 'Ucirc': 219, |
| 387 | 'Ugrave': 217, |
| 388 | 'Uuml': 220, |
| 389 | 'Yacute': 221, |
| 390 | 'aacute': 225, |
| 391 | 'acirc': 226, |
| 392 | 'aelig': 230, |
| 393 | 'agrave': 224, |
| 394 | 'aring': 229, |
| 395 | 'atilde': 227, |
| 396 | 'auml': 228, |
| 397 | 'ccedil': 231, |
| 398 | 'eacute': 233, |
| 399 | 'ecirc': 234, |
| 400 | 'egrave': 232, |
| 401 | 'eth': 240, |
| 402 | 'euml': 235, |
| 403 | 'iacute': 237, |
| 404 | 'icirc': 238, |
| 405 | 'igrave': 236, |
| 406 | 'iuml': 239, |
| 407 | 'ntilde': 241, |
| 408 | 'oacute': 243, |
| 409 | 'ocirc': 244, |
| 410 | 'ograve': 242, |
| 411 | 'oslash': 248, |
| 412 | 'otilde': 245, |
| 413 | 'ouml': 246, |
| 414 | 'szlig': 223, |
| 415 | 'thorn': 254, |
| 416 | 'uacute': 250, |
| 417 | 'ucirc': 251, |
| 418 | 'ugrave': 249, |
| 419 | 'uuml': 252, |
| 420 | 'yacute': 253, |
| 421 | 'yuml': 255, |
| 422 | 'copy': 169, |
| 423 | 'reg': 174, |
| 424 | 'nbsp': 160, |
| 425 | 'iexcl': 161, |
| 426 | 'cent': 162, |
| 427 | 'pound': 163, |
| 428 | 'curren': 164, |
| 429 | 'yen': 165, |
| 430 | 'brvbar': 166, |
| 431 | 'sect': 167, |
| 432 | 'uml': 168, |
| 433 | 'ordf': 170, |
| 434 | 'laquo': 171, |
| 435 | 'not': 172, |
| 436 | 'shy': 173, |
| 437 | 'macr': 175, |
| 438 | 'deg': 176, |
| 439 | 'plusmn': 177, |
| 440 | 'sup1': 185, |
| 441 | 'sup2': 178, |
| 442 | 'sup3': 179, |
| 443 | 'acute': 180, |
| 444 | 'micro': 181, |
| 445 | 'para': 182, |
| 446 | 'middot': 183, |
| 447 | 'cedil': 184, |
| 448 | 'ordm': 186, |
| 449 | 'raquo': 187, |
| 450 | 'frac14': 188, |
| 451 | 'frac12': 189, |
| 452 | 'frac34': 190, |
| 453 | 'iquest': 191, |
| 454 | 'times': 215, |
| 455 | 'divide': 247, |
| 456 | 'OElig': 338, |
| 457 | 'oelig': 339, |
| 458 | 'Scaron': 352, |
| 459 | 'scaron': 353, |
| 460 | 'Yuml': 376, |
| 461 | 'fnof': 402, |
| 462 | 'circ': 710, |
| 463 | 'tilde': 732, |
| 464 | 'Alpha': 913, |
| 465 | 'Beta': 914, |
| 466 | 'Gamma': 915, |
| 467 | 'Delta': 916, |
| 468 | 'Epsilon': 917, |
| 469 | 'Zeta': 918, |
| 470 | 'Eta': 919, |
| 471 | 'Theta': 920, |
| 472 | 'Iota': 921, |
| 473 | 'Kappa': 922, |
| 474 | 'Lambda': 923, |
| 475 | 'Mu': 924, |
| 476 | 'Nu': 925, |
| 477 | 'Xi': 926, |
| 478 | 'Omicron': 927, |
| 479 | 'Pi': 928, |
| 480 | 'Rho': 929, |
| 481 | 'Sigma': 931, |
| 482 | 'Tau': 932, |
| 483 | 'Upsilon': 933, |
| 484 | 'Phi': 934, |
| 485 | 'Chi': 935, |
| 486 | 'Psi': 936, |
| 487 | 'Omega': 937, |
| 488 | 'alpha': 945, |
| 489 | 'beta': 946, |
| 490 | 'gamma': 947, |
| 491 | 'delta': 948, |
| 492 | 'epsilon': 949, |
| 493 | 'zeta': 950, |
| 494 | 'eta': 951, |
| 495 | 'theta': 952, |
| 496 | 'iota': 953, |
| 497 | 'kappa': 954, |
| 498 | 'lambda': 955, |
| 499 | 'mu': 956, |
| 500 | 'nu': 957, |
| 501 | 'xi': 958, |
| 502 | 'omicron': 959, |
| 503 | 'pi': 960, |
| 504 | 'rho': 961, |
| 505 | 'sigmaf': 962, |
| 506 | 'sigma': 963, |
| 507 | 'tau': 964, |
| 508 | 'upsilon': 965, |
| 509 | 'phi': 966, |
| 510 | 'chi': 967, |
| 511 | 'psi': 968, |
| 512 | 'omega': 969, |
| 513 | 'thetasym': 977, |
| 514 | 'upsih': 978, |
| 515 | 'piv': 982, |
| 516 | 'ensp': 8194, |
| 517 | 'emsp': 8195, |
| 518 | 'thinsp': 8201, |
| 519 | 'zwnj': 8204, |
| 520 | 'zwj': 8205, |
| 521 | 'lrm': 8206, |
| 522 | 'rlm': 8207, |
| 523 | 'ndash': 8211, |
| 524 | 'mdash': 8212, |
| 525 | 'lsquo': 8216, |
| 526 | 'rsquo': 8217, |
| 527 | 'sbquo': 8218, |
| 528 | 'ldquo': 8220, |
| 529 | 'rdquo': 8221, |
| 530 | 'bdquo': 8222, |
| 531 | 'dagger': 8224, |
| 532 | 'Dagger': 8225, |
| 533 | 'bull': 8226, |
| 534 | 'hellip': 8230, |
| 535 | 'permil': 8240, |
| 536 | 'prime': 8242, |
| 537 | 'Prime': 8243, |
| 538 | 'lsaquo': 8249, |
| 539 | 'rsaquo': 8250, |
| 540 | 'oline': 8254, |
| 541 | 'frasl': 8260, |
| 542 | 'euro': 8364, |
| 543 | 'image': 8465, |
| 544 | 'weierp': 8472, |
| 545 | 'real': 8476, |
| 546 | 'trade': 8482, |
| 547 | 'alefsym': 8501, |
| 548 | 'larr': 8592, |
| 549 | 'uarr': 8593, |
| 550 | 'rarr': 8594, |
| 551 | 'darr': 8595, |
| 552 | 'harr': 8596, |
| 553 | 'crarr': 8629, |
| 554 | 'lArr': 8656, |
| 555 | 'uArr': 8657, |
| 556 | 'rArr': 8658, |
| 557 | 'dArr': 8659, |
| 558 | 'hArr': 8660, |
| 559 | 'forall': 8704, |
| 560 | 'part': 8706, |
| 561 | 'exist': 8707, |
| 562 | 'empty': 8709, |
| 563 | 'nabla': 8711, |
| 564 | 'isin': 8712, |
| 565 | 'notin': 8713, |
| 566 | 'ni': 8715, |
| 567 | 'prod': 8719, |
| 568 | 'sum': 8721, |
| 569 | 'minus': 8722, |
| 570 | 'lowast': 8727, |
| 571 | 'radic': 8730, |
| 572 | 'prop': 8733, |
| 573 | 'infin': 8734, |
| 574 | 'ang': 8736, |
| 575 | 'and': 8743, |
| 576 | 'or': 8744, |
| 577 | 'cap': 8745, |
| 578 | 'cup': 8746, |
| 579 | 'int': 8747, |
| 580 | 'there4': 8756, |
| 581 | 'sim': 8764, |
| 582 | 'cong': 8773, |
| 583 | 'asymp': 8776, |
| 584 | 'ne': 8800, |
| 585 | 'equiv': 8801, |
| 586 | 'le': 8804, |
| 587 | 'ge': 8805, |
| 588 | 'sub': 8834, |
| 589 | 'sup': 8835, |
| 590 | 'nsub': 8836, |
| 591 | 'sube': 8838, |
| 592 | 'supe': 8839, |
| 593 | 'oplus': 8853, |
| 594 | 'otimes': 8855, |
| 595 | 'perp': 8869, |
| 596 | 'sdot': 8901, |
| 597 | 'lceil': 8968, |
| 598 | 'rceil': 8969, |
| 599 | 'lfloor': 8970, |
| 600 | 'rfloor': 8971, |
| 601 | 'lang': 9001, |
| 602 | 'rang': 9002, |
| 603 | 'loz': 9674, |
| 604 | 'spades': 9824, |
| 605 | 'clubs': 9827, |
| 606 | 'hearts': 9829, |
| 607 | 'diams': 9830 |
| 608 | } |
| 609 | |
| 610 | Object.keys(sax.ENTITIES).forEach(function (key) { |
| 611 | var e = sax.ENTITIES[key] |
| 612 | var s = typeof e === 'number' ? String.fromCharCode(e) : e |
| 613 | sax.ENTITIES[key] = s |
| 614 | }) |
| 615 | |
| 616 | for (var s in sax.STATE) { |
| 617 | sax.STATE[sax.STATE[s]] = s |
| 618 | } |
| 619 | |
| 620 | // shorthand |
| 621 | S = sax.STATE |
| 622 | |
| 623 | function emit (parser, event, data) { |
| 624 | parser[event] && parser[event](data) |
| 625 | } |
| 626 | |
| 627 | function emitNode (parser, nodeType, data) { |
| 628 | if (parser.textNode) closeText(parser) |
| 629 | emit(parser, nodeType, data) |
| 630 | } |
| 631 | |
| 632 | function closeText (parser) { |
| 633 | parser.textNode = textopts(parser.opt, parser.textNode) |
| 634 | if (parser.textNode) emit(parser, 'ontext', parser.textNode) |
| 635 | parser.textNode = '' |
| 636 | } |
| 637 | |
| 638 | function textopts (opt, text) { |
| 639 | if (opt.trim) text = text.trim() |
| 640 | if (opt.normalize) text = text.replace(/\s+/g, ' ') |
| 641 | return text |
| 642 | } |
| 643 | |
| 644 | function error (parser, er) { |
| 645 | closeText(parser) |
| 646 | if (parser.trackPosition) { |
| 647 | er += '\nLine: ' + parser.line + |
| 648 | '\nColumn: ' + parser.column + |
| 649 | '\nChar: ' + parser.c |
| 650 | } |
| 651 | er = new Error(er) |
| 652 | parser.error = er |
| 653 | emit(parser, 'onerror', er) |
| 654 | return parser |
| 655 | } |
| 656 | |
| 657 | function end (parser) { |
| 658 | if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag') |
| 659 | if ((parser.state !== S.BEGIN) && |
| 660 | (parser.state !== S.BEGIN_WHITESPACE) && |
| 661 | (parser.state !== S.TEXT)) { |
| 662 | error(parser, 'Unexpected end') |
| 663 | } |
| 664 | closeText(parser) |
| 665 | parser.c = '' |
| 666 | parser.closed = true |
| 667 | emit(parser, 'onend') |
| 668 | SAXParser.call(parser, parser.strict, parser.opt) |
| 669 | return parser |
| 670 | } |
| 671 | |
| 672 | function strictFail (parser, message) { |
| 673 | if (typeof parser !== 'object' || !(parser instanceof SAXParser)) { |
| 674 | throw new Error('bad call to strictFail') |
| 675 | } |
| 676 | if (parser.strict) { |
| 677 | error(parser, message) |
| 678 | } |
| 679 | } |
| 680 | |
| 681 | function newTag (parser) { |
| 682 | if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() |
| 683 | var parent = parser.tags[parser.tags.length - 1] || parser |
| 684 | var tag = parser.tag = { name: parser.tagName, attributes: {} } |
| 685 | |
| 686 | // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" |
| 687 | if (parser.opt.xmlns) { |
| 688 | tag.ns = parent.ns |
| 689 | } |
| 690 | parser.attribList.length = 0 |
| 691 | emitNode(parser, 'onopentagstart', tag) |
| 692 | } |
| 693 | |
| 694 | function qname (name, attribute) { |
| 695 | var i = name.indexOf(':') |
| 696 | var qualName = i < 0 ? [ '', name ] : name.split(':') |
| 697 | var prefix = qualName[0] |
| 698 | var local = qualName[1] |
| 699 | |
| 700 | // <x "xmlns"="http://foo"> |
| 701 | if (attribute && name === 'xmlns') { |
| 702 | prefix = 'xmlns' |
| 703 | local = '' |
| 704 | } |
| 705 | |
| 706 | return { prefix: prefix, local: local } |
| 707 | } |
| 708 | |
| 709 | function attrib (parser) { |
| 710 | if (!parser.strict) { |
| 711 | parser.attribName = parser.attribName[parser.looseCase]() |
| 712 | } |
| 713 | |
| 714 | if (parser.attribList.indexOf(parser.attribName) !== -1 || |
| 715 | parser.tag.attributes.hasOwnProperty(parser.attribName)) { |
| 716 | parser.attribName = parser.attribValue = '' |
| 717 | return |
| 718 | } |
| 719 | |
| 720 | if (parser.opt.xmlns) { |
| 721 | var qn = qname(parser.attribName, true) |
| 722 | var prefix = qn.prefix |
| 723 | var local = qn.local |
| 724 | |
| 725 | if (prefix === 'xmlns') { |
| 726 | // namespace binding attribute. push the binding into scope |
| 727 | if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) { |
| 728 | strictFail(parser, |
| 729 | 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' + |
| 730 | 'Actual: ' + parser.attribValue) |
| 731 | } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) { |
| 732 | strictFail(parser, |
| 733 | 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' + |
| 734 | 'Actual: ' + parser.attribValue) |
| 735 | } else { |
| 736 | var tag = parser.tag |
| 737 | var parent = parser.tags[parser.tags.length - 1] || parser |
| 738 | if (tag.ns === parent.ns) { |
| 739 | tag.ns = Object.create(parent.ns) |
| 740 | } |
| 741 | tag.ns[local] = parser.attribValue |
| 742 | } |
| 743 | } |
| 744 | |
| 745 | // defer onattribute events until all attributes have been seen |
| 746 | // so any new bindings can take effect. preserve attribute order |
| 747 | // so deferred events can be emitted in document order |
| 748 | parser.attribList.push([parser.attribName, parser.attribValue]) |
| 749 | } else { |
| 750 | // in non-xmlns mode, we can emit the event right away |
| 751 | parser.tag.attributes[parser.attribName] = parser.attribValue |
| 752 | emitNode(parser, 'onattribute', { |
| 753 | name: parser.attribName, |
| 754 | value: parser.attribValue |
| 755 | }) |
| 756 | } |
| 757 | |
| 758 | parser.attribName = parser.attribValue = '' |
| 759 | } |
| 760 | |
| 761 | function openTag (parser, selfClosing) { |
| 762 | if (parser.opt.xmlns) { |
| 763 | // emit namespace binding events |
| 764 | var tag = parser.tag |
| 765 | |
| 766 | // add namespace info to tag |
| 767 | var qn = qname(parser.tagName) |
| 768 | tag.prefix = qn.prefix |
| 769 | tag.local = qn.local |
| 770 | tag.uri = tag.ns[qn.prefix] || '' |
| 771 | |
| 772 | if (tag.prefix && !tag.uri) { |
| 773 | strictFail(parser, 'Unbound namespace prefix: ' + |
| 774 | JSON.stringify(parser.tagName)) |
| 775 | tag.uri = qn.prefix |
| 776 | } |
| 777 | |
| 778 | var parent = parser.tags[parser.tags.length - 1] || parser |
| 779 | if (tag.ns && parent.ns !== tag.ns) { |
| 780 | Object.keys(tag.ns).forEach(function (p) { |
| 781 | emitNode(parser, 'onopennamespace', { |
| 782 | prefix: p, |
| 783 | uri: tag.ns[p] |
| 784 | }) |
| 785 | }) |
| 786 | } |
| 787 | |
| 788 | // handle deferred onattribute events |
| 789 | // Note: do not apply default ns to attributes: |
| 790 | // http://www.w3.org/TR/REC-xml-names/#defaulting |
| 791 | for (var i = 0, l = parser.attribList.length; i < l; i++) { |
| 792 | var nv = parser.attribList[i] |
| 793 | var name = nv[0] |
| 794 | var value = nv[1] |
| 795 | var qualName = qname(name, true) |
| 796 | var prefix = qualName.prefix |
| 797 | var local = qualName.local |
| 798 | var uri = prefix === '' ? '' : (tag.ns[prefix] || '') |
| 799 | var a = { |
| 800 | name: name, |
| 801 | value: value, |
| 802 | prefix: prefix, |
| 803 | local: local, |
| 804 | uri: uri |
| 805 | } |
| 806 | |
| 807 | // if there's any attributes with an undefined namespace, |
| 808 | // then fail on them now. |
| 809 | if (prefix && prefix !== 'xmlns' && !uri) { |
| 810 | strictFail(parser, 'Unbound namespace prefix: ' + |
| 811 | JSON.stringify(prefix)) |
| 812 | a.uri = prefix |
| 813 | } |
| 814 | parser.tag.attributes[name] = a |
| 815 | emitNode(parser, 'onattribute', a) |
| 816 | } |
| 817 | parser.attribList.length = 0 |
| 818 | } |
| 819 | |
| 820 | parser.tag.isSelfClosing = !!selfClosing |
| 821 | |
| 822 | // process the tag |
| 823 | parser.sawRoot = true |
| 824 | parser.tags.push(parser.tag) |
| 825 | emitNode(parser, 'onopentag', parser.tag) |
| 826 | if (!selfClosing) { |
| 827 | // special case for <script> in non-strict mode. |
| 828 | if (!parser.noscript && parser.tagName.toLowerCase() === 'script') { |
| 829 | parser.state = S.SCRIPT |
| 830 | } else { |
| 831 | parser.state = S.TEXT |
| 832 | } |
| 833 | parser.tag = null |
| 834 | parser.tagName = '' |
| 835 | } |
| 836 | parser.attribName = parser.attribValue = '' |
| 837 | parser.attribList.length = 0 |
| 838 | } |
| 839 | |
| 840 | function closeTag (parser) { |
| 841 | if (!parser.tagName) { |
| 842 | strictFail(parser, 'Weird empty close tag.') |
| 843 | parser.textNode += '</>' |
| 844 | parser.state = S.TEXT |
| 845 | return |
| 846 | } |
| 847 | |
| 848 | if (parser.script) { |
| 849 | if (parser.tagName !== 'script') { |
| 850 | parser.script += '</' + parser.tagName + '>' |
| 851 | parser.tagName = '' |
| 852 | parser.state = S.SCRIPT |
| 853 | return |
| 854 | } |
| 855 | emitNode(parser, 'onscript', parser.script) |
| 856 | parser.script = '' |
| 857 | } |
| 858 | |
| 859 | // first make sure that the closing tag actually exists. |
| 860 | // <a><b></c></b></a> will close everything, otherwise. |
| 861 | var t = parser.tags.length |
| 862 | var tagName = parser.tagName |
| 863 | if (!parser.strict) { |
| 864 | tagName = tagName[parser.looseCase]() |
| 865 | } |
| 866 | var closeTo = tagName |
| 867 | while (t--) { |
| 868 | var close = parser.tags[t] |
| 869 | if (close.name !== closeTo) { |
| 870 | // fail the first time in strict mode |
| 871 | strictFail(parser, 'Unexpected close tag') |
| 872 | } else { |
| 873 | break |
| 874 | } |
| 875 | } |
| 876 | |
| 877 | // didn't find it. we already failed for strict, so just abort. |
| 878 | if (t < 0) { |
| 879 | strictFail(parser, 'Unmatched closing tag: ' + parser.tagName) |
| 880 | parser.textNode += '</' + parser.tagName + '>' |
| 881 | parser.state = S.TEXT |
| 882 | return |
| 883 | } |
| 884 | parser.tagName = tagName |
| 885 | var s = parser.tags.length |
| 886 | while (s-- > t) { |
| 887 | var tag = parser.tag = parser.tags.pop() |
| 888 | parser.tagName = parser.tag.name |
| 889 | emitNode(parser, 'onclosetag', parser.tagName) |
| 890 | |
| 891 | var x = {} |
| 892 | for (var i in tag.ns) { |
| 893 | x[i] = tag.ns[i] |
| 894 | } |
| 895 | |
| 896 | var parent = parser.tags[parser.tags.length - 1] || parser |
| 897 | if (parser.opt.xmlns && tag.ns !== parent.ns) { |
| 898 | // remove namespace bindings introduced by tag |
| 899 | Object.keys(tag.ns).forEach(function (p) { |
| 900 | var n = tag.ns[p] |
| 901 | emitNode(parser, 'onclosenamespace', { prefix: p, uri: n }) |
| 902 | }) |
| 903 | } |
| 904 | } |
| 905 | if (t === 0) parser.closedRoot = true |
| 906 | parser.tagName = parser.attribValue = parser.attribName = '' |
| 907 | parser.attribList.length = 0 |
| 908 | parser.state = S.TEXT |
| 909 | } |
| 910 | |
| 911 | function parseEntity (parser) { |
| 912 | var entity = parser.entity |
| 913 | var entityLC = entity.toLowerCase() |
| 914 | var num |
| 915 | var numStr = '' |
| 916 | |
| 917 | if (parser.ENTITIES[entity]) { |
| 918 | return parser.ENTITIES[entity] |
| 919 | } |
| 920 | if (parser.ENTITIES[entityLC]) { |
| 921 | return parser.ENTITIES[entityLC] |
| 922 | } |
| 923 | entity = entityLC |
| 924 | if (entity.charAt(0) === '#') { |
| 925 | if (entity.charAt(1) === 'x') { |
| 926 | entity = entity.slice(2) |
| 927 | num = parseInt(entity, 16) |
| 928 | numStr = num.toString(16) |
| 929 | } else { |
| 930 | entity = entity.slice(1) |
| 931 | num = parseInt(entity, 10) |
| 932 | numStr = num.toString(10) |
| 933 | } |
| 934 | } |
| 935 | entity = entity.replace(/^0+/, '') |
| 936 | if (isNaN(num) || numStr.toLowerCase() !== entity) { |
| 937 | strictFail(parser, 'Invalid character entity') |
| 938 | return '&' + parser.entity + ';' |
| 939 | } |
| 940 | |
| 941 | return String.fromCodePoint(num) |
| 942 | } |
| 943 | |
| 944 | function beginWhiteSpace (parser, c) { |
| 945 | if (c === '<') { |
| 946 | parser.state = S.OPEN_WAKA |
| 947 | parser.startTagPosition = parser.position |
| 948 | } else if (!isWhitespace(c)) { |
| 949 | // have to process this as a text node. |
| 950 | // weird, but happens. |
| 951 | strictFail(parser, 'Non-whitespace before first tag.') |
| 952 | parser.textNode = c |
| 953 | parser.state = S.TEXT |
| 954 | } |
| 955 | } |
| 956 | |
| 957 | function charAt (chunk, i) { |
| 958 | var result = '' |
| 959 | if (i < chunk.length) { |
| 960 | result = chunk.charAt(i) |
| 961 | } |
| 962 | return result |
| 963 | } |
| 964 | |
| 965 | function write (chunk) { |
| 966 | var parser = this |
| 967 | if (this.error) { |
| 968 | throw this.error |
| 969 | } |
| 970 | if (parser.closed) { |
| 971 | return error(parser, |
| 972 | 'Cannot write after close. Assign an onready handler.') |
| 973 | } |
| 974 | if (chunk === null) { |
| 975 | return end(parser) |
| 976 | } |
| 977 | if (typeof chunk === 'object') { |
| 978 | chunk = chunk.toString() |
| 979 | } |
| 980 | var i = 0 |
| 981 | var c = '' |
| 982 | while (true) { |
| 983 | c = charAt(chunk, i++) |
| 984 | parser.c = c |
| 985 | |
| 986 | if (!c) { |
| 987 | break |
| 988 | } |
| 989 | |
| 990 | if (parser.trackPosition) { |
| 991 | parser.position++ |
| 992 | if (c === '\n') { |
| 993 | parser.line++ |
| 994 | parser.column = 0 |
| 995 | } else { |
| 996 | parser.column++ |
| 997 | } |
| 998 | } |
| 999 | |
| 1000 | switch (parser.state) { |
| 1001 | case S.BEGIN: |
| 1002 | parser.state = S.BEGIN_WHITESPACE |
| 1003 | if (c === '\uFEFF') { |
| 1004 | continue |
| 1005 | } |
| 1006 | beginWhiteSpace(parser, c) |
| 1007 | continue |
| 1008 | |
| 1009 | case S.BEGIN_WHITESPACE: |
| 1010 | beginWhiteSpace(parser, c) |
| 1011 | continue |
| 1012 | |
| 1013 | case S.TEXT: |
| 1014 | if (parser.sawRoot && !parser.closedRoot) { |
| 1015 | var starti = i - 1 |
| 1016 | while (c && c !== '<' && c !== '&') { |
| 1017 | c = charAt(chunk, i++) |
| 1018 | if (c && parser.trackPosition) { |
| 1019 | parser.position++ |
| 1020 | if (c === '\n') { |
| 1021 | parser.line++ |
| 1022 | parser.column = 0 |
| 1023 | } else { |
| 1024 | parser.column++ |
| 1025 | } |
| 1026 | } |
| 1027 | } |
| 1028 | parser.textNode += chunk.substring(starti, i - 1) |
| 1029 | } |
| 1030 | if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) { |
| 1031 | parser.state = S.OPEN_WAKA |
| 1032 | parser.startTagPosition = parser.position |
| 1033 | } else { |
| 1034 | if (!isWhitespace(c) && (!parser.sawRoot || parser.closedRoot)) { |
| 1035 | strictFail(parser, 'Text data outside of root node.') |
| 1036 | } |
| 1037 | if (c === '&') { |
| 1038 | parser.state = S.TEXT_ENTITY |
| 1039 | } else { |
| 1040 | parser.textNode += c |
| 1041 | } |
| 1042 | } |
| 1043 | continue |
| 1044 | |
| 1045 | case S.SCRIPT: |
| 1046 | // only non-strict |
| 1047 | if (c === '<') { |
| 1048 | parser.state = S.SCRIPT_ENDING |
| 1049 | } else { |
| 1050 | parser.script += c |
| 1051 | } |
| 1052 | continue |
| 1053 | |
| 1054 | case S.SCRIPT_ENDING: |
| 1055 | if (c === '/') { |
| 1056 | parser.state = S.CLOSE_TAG |
| 1057 | } else { |
| 1058 | parser.script += '<' + c |
| 1059 | parser.state = S.SCRIPT |
| 1060 | } |
| 1061 | continue |
| 1062 | |
| 1063 | case S.OPEN_WAKA: |
| 1064 | // either a /, ?, !, or text is coming next. |
| 1065 | if (c === '!') { |
| 1066 | parser.state = S.SGML_DECL |
| 1067 | parser.sgmlDecl = '' |
| 1068 | } else if (isWhitespace(c)) { |
| 1069 | // wait for it... |
| 1070 | } else if (isMatch(nameStart, c)) { |
| 1071 | parser.state = S.OPEN_TAG |
| 1072 | parser.tagName = c |
| 1073 | } else if (c === '/') { |
| 1074 | parser.state = S.CLOSE_TAG |
| 1075 | parser.tagName = '' |
| 1076 | } else if (c === '?') { |
| 1077 | parser.state = S.PROC_INST |
| 1078 | parser.procInstName = parser.procInstBody = '' |
| 1079 | } else { |
| 1080 | strictFail(parser, 'Unencoded <') |
| 1081 | // if there was some whitespace, then add that in. |
| 1082 | if (parser.startTagPosition + 1 < parser.position) { |
| 1083 | var pad = parser.position - parser.startTagPosition |
| 1084 | c = new Array(pad).join(' ') + c |
| 1085 | } |
| 1086 | parser.textNode += '<' + c |
| 1087 | parser.state = S.TEXT |
| 1088 | } |
| 1089 | continue |
| 1090 | |
| 1091 | case S.SGML_DECL: |
| 1092 | if ((parser.sgmlDecl + c).toUpperCase() === CDATA) { |
| 1093 | emitNode(parser, 'onopencdata') |
| 1094 | parser.state = S.CDATA |
| 1095 | parser.sgmlDecl = '' |
| 1096 | parser.cdata = '' |
| 1097 | } else if (parser.sgmlDecl + c === '--') { |
| 1098 | parser.state = S.COMMENT |
| 1099 | parser.comment = '' |
| 1100 | parser.sgmlDecl = '' |
| 1101 | } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) { |
| 1102 | parser.state = S.DOCTYPE |
| 1103 | if (parser.doctype || parser.sawRoot) { |
| 1104 | strictFail(parser, |
| 1105 | 'Inappropriately located doctype declaration') |
| 1106 | } |
| 1107 | parser.doctype = '' |
| 1108 | parser.sgmlDecl = '' |
| 1109 | } else if (c === '>') { |
| 1110 | emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl) |
| 1111 | parser.sgmlDecl = '' |
| 1112 | parser.state = S.TEXT |
| 1113 | } else if (isQuote(c)) { |
| 1114 | parser.state = S.SGML_DECL_QUOTED |
| 1115 | parser.sgmlDecl += c |
| 1116 | } else { |
| 1117 | parser.sgmlDecl += c |
| 1118 | } |
| 1119 | continue |
| 1120 | |
| 1121 | case S.SGML_DECL_QUOTED: |
| 1122 | if (c === parser.q) { |
| 1123 | parser.state = S.SGML_DECL |
| 1124 | parser.q = '' |
| 1125 | } |
| 1126 | parser.sgmlDecl += c |
| 1127 | continue |
| 1128 | |
| 1129 | case S.DOCTYPE: |
| 1130 | if (c === '>') { |
| 1131 | parser.state = S.TEXT |
| 1132 | emitNode(parser, 'ondoctype', parser.doctype) |
| 1133 | parser.doctype = true // just remember that we saw it. |
| 1134 | } else { |
| 1135 | parser.doctype += c |
| 1136 | if (c === '[') { |
| 1137 | parser.state = S.DOCTYPE_DTD |
| 1138 | } else if (isQuote(c)) { |
| 1139 | parser.state = S.DOCTYPE_QUOTED |
| 1140 | parser.q = c |
| 1141 | } |
| 1142 | } |
| 1143 | continue |
| 1144 | |
| 1145 | case S.DOCTYPE_QUOTED: |
| 1146 | parser.doctype += c |
| 1147 | if (c === parser.q) { |
| 1148 | parser.q = '' |
| 1149 | parser.state = S.DOCTYPE |
| 1150 | } |
| 1151 | continue |
| 1152 | |
| 1153 | case S.DOCTYPE_DTD: |
| 1154 | parser.doctype += c |
| 1155 | if (c === ']') { |
| 1156 | parser.state = S.DOCTYPE |
| 1157 | } else if (isQuote(c)) { |
| 1158 | parser.state = S.DOCTYPE_DTD_QUOTED |
| 1159 | parser.q = c |
| 1160 | } |
| 1161 | continue |
| 1162 | |
| 1163 | case S.DOCTYPE_DTD_QUOTED: |
| 1164 | parser.doctype += c |
| 1165 | if (c === parser.q) { |
| 1166 | parser.state = S.DOCTYPE_DTD |
| 1167 | parser.q = '' |
| 1168 | } |
| 1169 | continue |
| 1170 | |
| 1171 | case S.COMMENT: |
| 1172 | if (c === '-') { |
| 1173 | parser.state = S.COMMENT_ENDING |
| 1174 | } else { |
| 1175 | parser.comment += c |
| 1176 | } |
| 1177 | continue |
| 1178 | |
| 1179 | case S.COMMENT_ENDING: |
| 1180 | if (c === '-') { |
| 1181 | parser.state = S.COMMENT_ENDED |
| 1182 | parser.comment = textopts(parser.opt, parser.comment) |
| 1183 | if (parser.comment) { |
| 1184 | emitNode(parser, 'oncomment', parser.comment) |
| 1185 | } |
| 1186 | parser.comment = '' |
| 1187 | } else { |
| 1188 | parser.comment += '-' + c |
| 1189 | parser.state = S.COMMENT |
| 1190 | } |
| 1191 | continue |
| 1192 | |
| 1193 | case S.COMMENT_ENDED: |
| 1194 | if (c !== '>') { |
| 1195 | strictFail(parser, 'Malformed comment') |
| 1196 | // allow <!-- blah -- bloo --> in non-strict mode, |
| 1197 | // which is a comment of " blah -- bloo " |
| 1198 | parser.comment += '--' + c |
| 1199 | parser.state = S.COMMENT |
| 1200 | } else { |
| 1201 | parser.state = S.TEXT |
| 1202 | } |
| 1203 | continue |
| 1204 | |
| 1205 | case S.CDATA: |
| 1206 | if (c === ']') { |
| 1207 | parser.state = S.CDATA_ENDING |
| 1208 | } else { |
| 1209 | parser.cdata += c |
| 1210 | } |
| 1211 | continue |
| 1212 | |
| 1213 | case S.CDATA_ENDING: |
| 1214 | if (c === ']') { |
| 1215 | parser.state = S.CDATA_ENDING_2 |
| 1216 | } else { |
| 1217 | parser.cdata += ']' + c |
| 1218 | parser.state = S.CDATA |
| 1219 | } |
| 1220 | continue |
| 1221 | |
| 1222 | case S.CDATA_ENDING_2: |
| 1223 | if (c === '>') { |
| 1224 | if (parser.cdata) { |
| 1225 | emitNode(parser, 'oncdata', parser.cdata) |
| 1226 | } |
| 1227 | emitNode(parser, 'onclosecdata') |
| 1228 | parser.cdata = '' |
| 1229 | parser.state = S.TEXT |
| 1230 | } else if (c === ']') { |
| 1231 | parser.cdata += ']' |
| 1232 | } else { |
| 1233 | parser.cdata += ']]' + c |
| 1234 | parser.state = S.CDATA |
| 1235 | } |
| 1236 | continue |
| 1237 | |
| 1238 | case S.PROC_INST: |
| 1239 | if (c === '?') { |
| 1240 | parser.state = S.PROC_INST_ENDING |
| 1241 | } else if (isWhitespace(c)) { |
| 1242 | parser.state = S.PROC_INST_BODY |
| 1243 | } else { |
| 1244 | parser.procInstName += c |
| 1245 | } |
| 1246 | continue |
| 1247 | |
| 1248 | case S.PROC_INST_BODY: |
| 1249 | if (!parser.procInstBody && isWhitespace(c)) { |
| 1250 | continue |
| 1251 | } else if (c === '?') { |
| 1252 | parser.state = S.PROC_INST_ENDING |
| 1253 | } else { |
| 1254 | parser.procInstBody += c |
| 1255 | } |
| 1256 | continue |
| 1257 | |
| 1258 | case S.PROC_INST_ENDING: |
| 1259 | if (c === '>') { |
| 1260 | emitNode(parser, 'onprocessinginstruction', { |
| 1261 | name: parser.procInstName, |
| 1262 | body: parser.procInstBody |
| 1263 | }) |
| 1264 | parser.procInstName = parser.procInstBody = '' |
| 1265 | parser.state = S.TEXT |
| 1266 | } else { |
| 1267 | parser.procInstBody += '?' + c |
| 1268 | parser.state = S.PROC_INST_BODY |
| 1269 | } |
| 1270 | continue |
| 1271 | |
| 1272 | case S.OPEN_TAG: |
| 1273 | if (isMatch(nameBody, c)) { |
| 1274 | parser.tagName += c |
| 1275 | } else { |
| 1276 | newTag(parser) |
| 1277 | if (c === '>') { |
| 1278 | openTag(parser) |
| 1279 | } else if (c === '/') { |
| 1280 | parser.state = S.OPEN_TAG_SLASH |
| 1281 | } else { |
| 1282 | if (!isWhitespace(c)) { |
| 1283 | strictFail(parser, 'Invalid character in tag name') |
| 1284 | } |
| 1285 | parser.state = S.ATTRIB |
| 1286 | } |
| 1287 | } |
| 1288 | continue |
| 1289 | |
| 1290 | case S.OPEN_TAG_SLASH: |
| 1291 | if (c === '>') { |
| 1292 | openTag(parser, true) |
| 1293 | closeTag(parser) |
| 1294 | } else { |
| 1295 | strictFail(parser, 'Forward-slash in opening tag not followed by >') |
| 1296 | parser.state = S.ATTRIB |
| 1297 | } |
| 1298 | continue |
| 1299 | |
| 1300 | case S.ATTRIB: |
| 1301 | // haven't read the attribute name yet. |
| 1302 | if (isWhitespace(c)) { |
| 1303 | continue |
| 1304 | } else if (c === '>') { |
| 1305 | openTag(parser) |
| 1306 | } else if (c === '/') { |
| 1307 | parser.state = S.OPEN_TAG_SLASH |
| 1308 | } else if (isMatch(nameStart, c)) { |
| 1309 | parser.attribName = c |
| 1310 | parser.attribValue = '' |
| 1311 | parser.state = S.ATTRIB_NAME |
| 1312 | } else { |
| 1313 | strictFail(parser, 'Invalid attribute name') |
| 1314 | } |
| 1315 | continue |
| 1316 | |
| 1317 | case S.ATTRIB_NAME: |
| 1318 | if (c === '=') { |
| 1319 | parser.state = S.ATTRIB_VALUE |
| 1320 | } else if (c === '>') { |
| 1321 | strictFail(parser, 'Attribute without value') |
| 1322 | parser.attribValue = parser.attribName |
| 1323 | attrib(parser) |
| 1324 | openTag(parser) |
| 1325 | } else if (isWhitespace(c)) { |
| 1326 | parser.state = S.ATTRIB_NAME_SAW_WHITE |
| 1327 | } else if (isMatch(nameBody, c)) { |
| 1328 | parser.attribName += c |
| 1329 | } else { |
| 1330 | strictFail(parser, 'Invalid attribute name') |
| 1331 | } |
| 1332 | continue |
| 1333 | |
| 1334 | case S.ATTRIB_NAME_SAW_WHITE: |
| 1335 | if (c === '=') { |
| 1336 | parser.state = S.ATTRIB_VALUE |
| 1337 | } else if (isWhitespace(c)) { |
| 1338 | continue |
| 1339 | } else { |
| 1340 | strictFail(parser, 'Attribute without value') |
| 1341 | parser.tag.attributes[parser.attribName] = '' |
| 1342 | parser.attribValue = '' |
| 1343 | emitNode(parser, 'onattribute', { |
| 1344 | name: parser.attribName, |
| 1345 | value: '' |
| 1346 | }) |
| 1347 | parser.attribName = '' |
| 1348 | if (c === '>') { |
| 1349 | openTag(parser) |
| 1350 | } else if (isMatch(nameStart, c)) { |
| 1351 | parser.attribName = c |
| 1352 | parser.state = S.ATTRIB_NAME |
| 1353 | } else { |
| 1354 | strictFail(parser, 'Invalid attribute name') |
| 1355 | parser.state = S.ATTRIB |
| 1356 | } |
| 1357 | } |
| 1358 | continue |
| 1359 | |
| 1360 | case S.ATTRIB_VALUE: |
| 1361 | if (isWhitespace(c)) { |
| 1362 | continue |
| 1363 | } else if (isQuote(c)) { |
| 1364 | parser.q = c |
| 1365 | parser.state = S.ATTRIB_VALUE_QUOTED |
| 1366 | } else { |
| 1367 | strictFail(parser, 'Unquoted attribute value') |
| 1368 | parser.state = S.ATTRIB_VALUE_UNQUOTED |
| 1369 | parser.attribValue = c |
| 1370 | } |
| 1371 | continue |
| 1372 | |
| 1373 | case S.ATTRIB_VALUE_QUOTED: |
| 1374 | if (c !== parser.q) { |
| 1375 | if (c === '&') { |
| 1376 | parser.state = S.ATTRIB_VALUE_ENTITY_Q |
| 1377 | } else { |
| 1378 | parser.attribValue += c |
| 1379 | } |
| 1380 | continue |
| 1381 | } |
| 1382 | attrib(parser) |
| 1383 | parser.q = '' |
| 1384 | parser.state = S.ATTRIB_VALUE_CLOSED |
| 1385 | continue |
| 1386 | |
| 1387 | case S.ATTRIB_VALUE_CLOSED: |
| 1388 | if (isWhitespace(c)) { |
| 1389 | parser.state = S.ATTRIB |
| 1390 | } else if (c === '>') { |
| 1391 | openTag(parser) |
| 1392 | } else if (c === '/') { |
| 1393 | parser.state = S.OPEN_TAG_SLASH |
| 1394 | } else if (isMatch(nameStart, c)) { |
| 1395 | strictFail(parser, 'No whitespace between attributes') |
| 1396 | parser.attribName = c |
| 1397 | parser.attribValue = '' |
| 1398 | parser.state = S.ATTRIB_NAME |
| 1399 | } else { |
| 1400 | strictFail(parser, 'Invalid attribute name') |
| 1401 | } |
| 1402 | continue |
| 1403 | |
| 1404 | case S.ATTRIB_VALUE_UNQUOTED: |
| 1405 | if (!isAttribEnd(c)) { |
| 1406 | if (c === '&') { |
| 1407 | parser.state = S.ATTRIB_VALUE_ENTITY_U |
| 1408 | } else { |
| 1409 | parser.attribValue += c |
| 1410 | } |
| 1411 | continue |
| 1412 | } |
| 1413 | attrib(parser) |
| 1414 | if (c === '>') { |
| 1415 | openTag(parser) |
| 1416 | } else { |
| 1417 | parser.state = S.ATTRIB |
| 1418 | } |
| 1419 | continue |
| 1420 | |
| 1421 | case S.CLOSE_TAG: |
| 1422 | if (!parser.tagName) { |
| 1423 | if (isWhitespace(c)) { |
| 1424 | continue |
| 1425 | } else if (notMatch(nameStart, c)) { |
| 1426 | if (parser.script) { |
| 1427 | parser.script += '</' + c |
| 1428 | parser.state = S.SCRIPT |
| 1429 | } else { |
| 1430 | strictFail(parser, 'Invalid tagname in closing tag.') |
| 1431 | } |
| 1432 | } else { |
| 1433 | parser.tagName = c |
| 1434 | } |
| 1435 | } else if (c === '>') { |
| 1436 | closeTag(parser) |
| 1437 | } else if (isMatch(nameBody, c)) { |
| 1438 | parser.tagName += c |
| 1439 | } else if (parser.script) { |
| 1440 | parser.script += '</' + parser.tagName |
| 1441 | parser.tagName = '' |
| 1442 | parser.state = S.SCRIPT |
| 1443 | } else { |
| 1444 | if (!isWhitespace(c)) { |
| 1445 | strictFail(parser, 'Invalid tagname in closing tag') |
| 1446 | } |
| 1447 | parser.state = S.CLOSE_TAG_SAW_WHITE |
| 1448 | } |
| 1449 | continue |
| 1450 | |
| 1451 | case S.CLOSE_TAG_SAW_WHITE: |
| 1452 | if (isWhitespace(c)) { |
| 1453 | continue |
| 1454 | } |
| 1455 | if (c === '>') { |
| 1456 | closeTag(parser) |
| 1457 | } else { |
| 1458 | strictFail(parser, 'Invalid characters in closing tag') |
| 1459 | } |
| 1460 | continue |
| 1461 | |
| 1462 | case S.TEXT_ENTITY: |
| 1463 | case S.ATTRIB_VALUE_ENTITY_Q: |
| 1464 | case S.ATTRIB_VALUE_ENTITY_U: |
| 1465 | var returnState |
| 1466 | var buffer |
| 1467 | switch (parser.state) { |
| 1468 | case S.TEXT_ENTITY: |
| 1469 | returnState = S.TEXT |
| 1470 | buffer = 'textNode' |
| 1471 | break |
| 1472 | |
| 1473 | case S.ATTRIB_VALUE_ENTITY_Q: |
| 1474 | returnState = S.ATTRIB_VALUE_QUOTED |
| 1475 | buffer = 'attribValue' |
| 1476 | break |
| 1477 | |
| 1478 | case S.ATTRIB_VALUE_ENTITY_U: |
| 1479 | returnState = S.ATTRIB_VALUE_UNQUOTED |
| 1480 | buffer = 'attribValue' |
| 1481 | break |
| 1482 | } |
| 1483 | |
| 1484 | if (c === ';') { |
| 1485 | parser[buffer] += parseEntity(parser) |
| 1486 | parser.entity = '' |
| 1487 | parser.state = returnState |
| 1488 | } else if (isMatch(parser.entity.length ? entityBody : entityStart, c)) { |
| 1489 | parser.entity += c |
| 1490 | } else { |
| 1491 | strictFail(parser, 'Invalid character in entity name') |
| 1492 | parser[buffer] += '&' + parser.entity + c |
| 1493 | parser.entity = '' |
| 1494 | parser.state = returnState |
| 1495 | } |
| 1496 | |
| 1497 | continue |
| 1498 | |
| 1499 | default: |
| 1500 | throw new Error(parser, 'Unknown state: ' + parser.state) |
| 1501 | } |
| 1502 | } // while |
| 1503 | |
| 1504 | if (parser.position >= parser.bufferCheckPosition) { |
| 1505 | checkBufferLength(parser) |
| 1506 | } |
| 1507 | return parser |
| 1508 | } |
| 1509 | |
| 1510 | /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ |
| 1511 | /* istanbul ignore next */ |
| 1512 | if (!String.fromCodePoint) { |
| 1513 | (function () { |
| 1514 | var stringFromCharCode = String.fromCharCode |
| 1515 | var floor = Math.floor |
| 1516 | var fromCodePoint = function () { |
| 1517 | var MAX_SIZE = 0x4000 |
| 1518 | var codeUnits = [] |
| 1519 | var highSurrogate |
| 1520 | var lowSurrogate |
| 1521 | var index = -1 |
| 1522 | var length = arguments.length |
| 1523 | if (!length) { |
| 1524 | return '' |
| 1525 | } |
| 1526 | var result = '' |
| 1527 | while (++index < length) { |
| 1528 | var codePoint = Number(arguments[index]) |
| 1529 | if ( |
| 1530 | !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` |
| 1531 | codePoint < 0 || // not a valid Unicode code point |
| 1532 | codePoint > 0x10FFFF || // not a valid Unicode code point |
| 1533 | floor(codePoint) !== codePoint // not an integer |
| 1534 | ) { |
| 1535 | throw RangeError('Invalid code point: ' + codePoint) |
| 1536 | } |
| 1537 | if (codePoint <= 0xFFFF) { // BMP code point |
| 1538 | codeUnits.push(codePoint) |
| 1539 | } else { // Astral code point; split in surrogate halves |
| 1540 | // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
| 1541 | codePoint -= 0x10000 |
| 1542 | highSurrogate = (codePoint >> 10) + 0xD800 |
| 1543 | lowSurrogate = (codePoint % 0x400) + 0xDC00 |
| 1544 | codeUnits.push(highSurrogate, lowSurrogate) |
| 1545 | } |
| 1546 | if (index + 1 === length || codeUnits.length > MAX_SIZE) { |
| 1547 | result += stringFromCharCode.apply(null, codeUnits) |
| 1548 | codeUnits.length = 0 |
| 1549 | } |
| 1550 | } |
| 1551 | return result |
| 1552 | } |
| 1553 | /* istanbul ignore next */ |
| 1554 | if (Object.defineProperty) { |
| 1555 | Object.defineProperty(String, 'fromCodePoint', { |
| 1556 | value: fromCodePoint, |
| 1557 | configurable: true, |
| 1558 | writable: true |
| 1559 | }) |
| 1560 | } else { |
| 1561 | String.fromCodePoint = fromCodePoint |
| 1562 | } |
| 1563 | }()) |
| 1564 | } |
| 1565 | })(typeof exports === 'undefined' ? this.sax = {} : exports) |