summaryrefslogtreecommitdiff
path: root/web-ui
diff options
context:
space:
mode:
authorFelix Hammerl <fhammerl@thoughtworks.com>2016-02-24 10:30:06 +0100
committerFelix Hammerl <fhammerl@thoughtworks.com>2016-02-24 10:30:06 +0100
commit0f9c1e66c9ab6b8f037436ffcb45d71f92d9b613 (patch)
tree6ee182a9d48f30110b68f00f57dd6d46c42fea5b /web-ui
parent77ec41bb6f542077503106cacc1dbd28118c50b4 (diff)
issue #617: Remove old html whitelister
Diffstat (limited to 'web-ui')
-rw-r--r--web-ui/app/js/lib/html-sanitizer.js1064
-rw-r--r--web-ui/app/js/lib/html_whitelister.js86
2 files changed, 0 insertions, 1150 deletions
diff --git a/web-ui/app/js/lib/html-sanitizer.js b/web-ui/app/js/lib/html-sanitizer.js
deleted file mode 100644
index 80fb0041..00000000
--- a/web-ui/app/js/lib/html-sanitizer.js
+++ /dev/null
@@ -1,1064 +0,0 @@
-// Copyright (C) 2006 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/**
- * @fileoverview
- * An HTML sanitizer that can satisfy a variety of security policies.
- *
- * <p>
- * The HTML sanitizer is built around a SAX parser and HTML element and
- * attributes schemas.
- *
- * If the cssparser is loaded, inline styles are sanitized using the
- * css property and value schemas. Else they are remove during
- * sanitization.
- *
- * If it exists, uses parseCssDeclarations, sanitizeCssProperty, cssSchema
- *
- * @author mikesamuel@gmail.com
- * @author jasvir@gmail.com
- * \@requires html4, URI
- * \@overrides window
- * \@provides html, html_sanitize
- */
-
-// The Turkish i seems to be a non-issue, but abort in case it is.
-if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; }
-
-/**
- * \@namespace
- */
-define(['lib/html4-defs'], function (html4) {
-var html = (function(html4) {
-
- // For closure compiler
- var parseCssDeclarations, sanitizeCssProperty, cssSchema;
- if ('undefined' !== typeof window) {
- parseCssDeclarations = window['parseCssDeclarations'];
- sanitizeCssProperty = window['sanitizeCssProperty'];
- cssSchema = window['cssSchema'];
- }
-
- // The keys of this object must be 'quoted' or JSCompiler will mangle them!
- // This is a partial list -- lookupEntity() uses the host browser's parser
- // (when available) to implement full entity lookup.
- // Note that entities are in general case-sensitive; the uppercase ones are
- // explicitly defined by HTML5 (presumably as compatibility).
- var ENTITIES = {
- 'lt': '<',
- 'LT': '<',
- 'gt': '>',
- 'GT': '>',
- 'amp': '&',
- 'AMP': '&',
- 'quot': '"',
- 'apos': '\'',
- 'nbsp': '\240'
- };
-
- // Patterns for types of entity/character reference names.
- var decimalEscapeRe = /^#(\d+)$/;
- var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/;
- // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html
- var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/;
- // Used as a hook to invoke the browser's entity parsing. <textarea> is used
- // because its content is parsed for entities but not tags.
- // TODO(kpreid): This retrieval is a kludge and leads to silent loss of
- // functionality if the document isn't available.
- var entityLookupElement =
- ('undefined' !== typeof window && window['document'])
- ? window['document'].createElement('textarea') : null;
- /**
- * Decodes an HTML entity.
- *
- * {\@updoc
- * $ lookupEntity('lt')
- * # '<'
- * $ lookupEntity('GT')
- * # '>'
- * $ lookupEntity('amp')
- * # '&'
- * $ lookupEntity('nbsp')
- * # '\xA0'
- * $ lookupEntity('apos')
- * # "'"
- * $ lookupEntity('quot')
- * # '"'
- * $ lookupEntity('#xa')
- * # '\n'
- * $ lookupEntity('#10')
- * # '\n'
- * $ lookupEntity('#x0a')
- * # '\n'
- * $ lookupEntity('#010')
- * # '\n'
- * $ lookupEntity('#x00A')
- * # '\n'
- * $ lookupEntity('Pi') // Known failure
- * # '\u03A0'
- * $ lookupEntity('pi') // Known failure
- * # '\u03C0'
- * }
- *
- * @param {string} name the content between the '&' and the ';'.
- * @return {string} a single unicode code-point as a string.
- */
- function lookupEntity(name) {
- // TODO: entity lookup as specified by HTML5 actually depends on the
- // presence of the ";".
- if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
- var m = name.match(decimalEscapeRe);
- if (m) {
- return String.fromCharCode(parseInt(m[1], 10));
- } else if (!!(m = name.match(hexEscapeRe))) {
- return String.fromCharCode(parseInt(m[1], 16));
- } else if (entityLookupElement && safeEntityNameRe.test(name)) {
- entityLookupElement.innerHTML = '&' + name + ';';
- var text = entityLookupElement.textContent;
- ENTITIES[name] = text;
- return text;
- } else {
- return '&' + name + ';';
- }
- }
-
- function decodeOneEntity(_, name) {
- return lookupEntity(name);
- }
-
- var nulRe = /\0/g;
- function stripNULs(s) {
- return s.replace(nulRe, '');
- }
-
- var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g;
- var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/;
- /**
- * The plain text of a chunk of HTML CDATA which possibly containing.
- *
- * {\@updoc
- * $ unescapeEntities('')
- * # ''
- * $ unescapeEntities('hello World!')
- * # 'hello World!'
- * $ unescapeEntities('1 &lt; 2 &amp;&AMP; 4 &gt; 3&#10;')
- * # '1 < 2 && 4 > 3\n'
- * $ unescapeEntities('&lt;&lt <- unfinished entity&gt;')
- * # '<&lt <- unfinished entity>'
- * $ unescapeEntities('/foo?bar=baz&copy=true') // & often unescaped in URLS
- * # '/foo?bar=baz&copy=true'
- * $ unescapeEntities('pi=&pi;&#x3c0;, Pi=&Pi;\u03A0') // FIXME: known failure
- * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0'
- * }
- *
- * @param {string} s a chunk of HTML CDATA. It must not start or end inside
- * an HTML entity.
- */
- function unescapeEntities(s) {
- return s.replace(ENTITY_RE_1, decodeOneEntity);
- }
-
- var ampRe = /&/g;
- var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
- var ltRe = /[<]/g;
- var gtRe = />/g;
- var quotRe = /\"/g;
-
- /**
- * Escapes HTML special characters in attribute values.
- *
- * {\@updoc
- * $ escapeAttrib('')
- * # ''
- * $ escapeAttrib('"<<&==&>>"') // Do not just escape the first occurrence.
- * # '&#34;&lt;&lt;&amp;&#61;&#61;&amp;&gt;&gt;&#34;'
- * $ escapeAttrib('Hello <World>!')
- * # 'Hello &lt;World&gt;!'
- * }
- */
- function escapeAttrib(s) {
- return ('' + s).replace(ampRe, '&amp;').replace(ltRe, '&lt;')
- .replace(gtRe, '&gt;').replace(quotRe, '&#34;');
- }
-
- /**
- * Escape entities in RCDATA that can be escaped without changing the meaning.
- * {\@updoc
- * $ normalizeRCData('1 < 2 &&amp; 3 > 4 &amp;& 5 &lt; 7&8')
- * # '1 &lt; 2 &amp;&amp; 3 &gt; 4 &amp;&amp; 5 &lt; 7&amp;8'
- * }
- */
- function normalizeRCData(rcdata) {
- return rcdata
- .replace(looseAmpRe, '&amp;$1')
- .replace(ltRe, '&lt;')
- .replace(gtRe, '&gt;');
- }
-
- // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
-
- // We initially split input so that potentially meaningful characters
- // like '<' and '>' are separate tokens, using a fast dumb process that
- // ignores quoting. Then we walk that token stream, and when we see a
- // '<' that's the start of a tag, we use ATTR_RE to extract tag
- // attributes from the next token. That token will never have a '>'
- // character. However, it might have an unbalanced quote character, and
- // when we see that, we combine additional tokens to balance the quote.
-
- var ATTR_RE = new RegExp(
- '^\\s*' +
- '([-.:\\w]+)' + // 1 = Attribute name
- '(?:' + (
- '\\s*(=)\\s*' + // 2 = Is there a value?
- '(' + ( // 3 = Attribute value
- // TODO(felix8a): maybe use backref to match quotes
- '(\")[^\"]*(\"|$)' + // 4, 5 = Double-quoted string
- '|' +
- '(\')[^\']*(\'|$)' + // 6, 7 = Single-quoted string
- '|' +
- // Positive lookahead to prevent interpretation of
- // <foo a= b=c> as <foo a='b=c'>
- // TODO(felix8a): might be able to drop this case
- '(?=[a-z][-\\w]*\\s*=)' +
- '|' +
- // Unquoted value that isn't an attribute name
- // (since we didn't match the positive lookahead above)
- '[^\"\'\\s]*' ) +
- ')' ) +
- ')?',
- 'i');
-
- // false on IE<=8, true on most other browsers
- var splitWillCapture = ('a,b'.split(/(,)/).length === 3);
-
- // bitmask for tags with special parsing, like <script> and <textarea>
- var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA'];
-
- /**
- * Given a SAX-like event handler, produce a function that feeds those
- * events and a parameter to the event handler.
- *
- * The event handler has the form:{@code
- * {
- * // Name is an upper-case HTML tag name. Attribs is an array of
- * // alternating upper-case attribute names, and attribute values. The
- * // attribs array is reused by the parser. Param is the value passed to
- * // the saxParser.
- * startTag: function (name, attribs, param) { ... },
- * endTag: function (name, param) { ... },
- * pcdata: function (text, param) { ... },
- * rcdata: function (text, param) { ... },
- * cdata: function (text, param) { ... },
- * startDoc: function (param) { ... },
- * endDoc: function (param) { ... }
- * }}
- *
- * @param {Object} handler a record containing event handlers.
- * @return {function(string, Object)} A function that takes a chunk of HTML
- * and a parameter. The parameter is passed on to the handler methods.
- */
- function makeSaxParser(handler) {
- // Accept quoted or unquoted keys (Closure compat)
- var hcopy = {
- cdata: handler.cdata || handler['cdata'],
- comment: handler.comment || handler['comment'],
- endDoc: handler.endDoc || handler['endDoc'],
- endTag: handler.endTag || handler['endTag'],
- pcdata: handler.pcdata || handler['pcdata'],
- rcdata: handler.rcdata || handler['rcdata'],
- startDoc: handler.startDoc || handler['startDoc'],
- startTag: handler.startTag || handler['startTag']
- };
- return function(htmlText, param) {
- return parse(htmlText, hcopy, param);
- };
- }
-
- // Parsing strategy is to split input into parts that might be lexically
- // meaningful (every ">" becomes a separate part), and then recombine
- // parts if we discover they're in a different context.
-
- // TODO(felix8a): Significant performance regressions from -legacy,
- // tested on
- // Chrome 18.0
- // Firefox 11.0
- // IE 6, 7, 8, 9
- // Opera 11.61
- // Safari 5.1.3
- // Many of these are unusual patterns that are linearly slower and still
- // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing.
-
- // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all
- // browsers. The hotspot is htmlSplit.
-
- // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers.
- // This is partly htmlSplit, but the hotspot is parseTagAndAttrs.
-
- // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9.
- // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster.
-
- // TODO(felix8a): "<p<p<p..." is slower on IE[6-8]
-
- var continuationMarker = {};
- function parse(htmlText, handler, param) {
- var m, p, tagName;
- var parts = htmlSplit(htmlText);
- var state = {
- noMoreGT: false,
- noMoreEndComments: false
- };
- parseCPS(handler, parts, 0, state, param);
- }
-
- function continuationMaker(h, parts, initial, state, param) {
- return function () {
- parseCPS(h, parts, initial, state, param);
- };
- }
-
- function parseCPS(h, parts, initial, state, param) {
- try {
- if (h.startDoc && initial == 0) { h.startDoc(param); }
- var m, p, tagName;
- for (var pos = initial, end = parts.length; pos < end;) {
- var current = parts[pos++];
- var next = parts[pos];
- switch (current) {
- case '&':
- if (ENTITY_RE_2.test(next)) {
- if (h.pcdata) {
- h.pcdata('&' + next, param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- pos++;
- } else {
- if (h.pcdata) { h.pcdata("&amp;", param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- break;
- case '<\/':
- if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) {
- if (m[0].length === next.length && parts[pos + 1] === '>') {
- // fast case, no attribute parsing needed
- pos += 2;
- tagName = m[1].toLowerCase();
- if (h.endTag) {
- h.endTag(tagName, param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- } else {
- // slow case, need to parse attributes
- // TODO(felix8a): do we really care about misparsing this?
- pos = parseEndTag(
- parts, pos, h, param, continuationMarker, state);
- }
- } else {
- if (h.pcdata) {
- h.pcdata('&lt;/', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- break;
- case '<':
- if (m = /^([-\w:]+)\s*\/?/.exec(next)) {
- if (m[0].length === next.length && parts[pos + 1] === '>') {
- // fast case, no attribute parsing needed
- pos += 2;
- tagName = m[1].toLowerCase();
- if (h.startTag) {
- h.startTag(tagName, [], param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- // tags like <script> and <textarea> have special parsing
- var eflags = html4.ELEMENTS[tagName];
- if (eflags & EFLAGS_TEXT) {
- var tag = { name: tagName, next: pos, eflags: eflags };
- pos = parseText(
- parts, tag, h, param, continuationMarker, state);
- }
- } else {
- // slow case, need to parse attributes
- pos = parseStartTag(
- parts, pos, h, param, continuationMarker, state);
- }
- } else {
- if (h.pcdata) {
- h.pcdata('&lt;', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- break;
- case '<\!--':
- // The pathological case is n copies of '<\!--' without '-->', and
- // repeated failure to find '-->' is quadratic. We avoid that by
- // remembering when search for '-->' fails.
- if (!state.noMoreEndComments) {
- // A comment <\!--x--> is split into three tokens:
- // '<\!--', 'x--', '>'
- // We want to find the next '>' token that has a preceding '--'.
- // pos is at the 'x--'.
- for (p = pos + 1; p < end; p++) {
- if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; }
- }
- if (p < end) {
- if (h.comment) {
- var comment = parts.slice(pos, p).join('');
- h.comment(
- comment.substr(0, comment.length - 2), param,
- continuationMarker,
- continuationMaker(h, parts, p + 1, state, param));
- }
- pos = p + 1;
- } else {
- state.noMoreEndComments = true;
- }
- }
- if (state.noMoreEndComments) {
- if (h.pcdata) {
- h.pcdata('&lt;!--', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- break;
- case '<\!':
- if (!/^\w/.test(next)) {
- if (h.pcdata) {
- h.pcdata('&lt;!', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- } else {
- // similar to noMoreEndComment logic
- if (!state.noMoreGT) {
- for (p = pos + 1; p < end; p++) {
- if (parts[p] === '>') { break; }
- }
- if (p < end) {
- pos = p + 1;
- } else {
- state.noMoreGT = true;
- }
- }
- if (state.noMoreGT) {
- if (h.pcdata) {
- h.pcdata('&lt;!', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- }
- break;
- case '<?':
- // similar to noMoreEndComment logic
- if (!state.noMoreGT) {
- for (p = pos + 1; p < end; p++) {
- if (parts[p] === '>') { break; }
- }
- if (p < end) {
- pos = p + 1;
- } else {
- state.noMoreGT = true;
- }
- }
- if (state.noMoreGT) {
- if (h.pcdata) {
- h.pcdata('&lt;?', param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- }
- break;
- case '>':
- if (h.pcdata) {
- h.pcdata("&gt;", param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- break;
- case '':
- break;
- default:
- if (h.pcdata) {
- h.pcdata(current, param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- break;
- }
- }
- if (h.endDoc) { h.endDoc(param); }
- } catch (e) {
- if (e !== continuationMarker) { throw e; }
- }
- }
-
- // Split str into parts for the html parser.
- function htmlSplit(str) {
- // can't hoist this out of the function because of the re.exec loop.
- var re = /(<\/|<\!--|<[!?]|[&<>])/g;
- str += '';
- if (splitWillCapture) {
- return str.split(re);
- } else {
- var parts = [];
- var lastPos = 0;
- var m;
- while ((m = re.exec(str)) !== null) {
- parts.push(str.substring(lastPos, m.index));
- parts.push(m[0]);
- lastPos = m.index + m[0].length;
- }
- parts.push(str.substring(lastPos));
- return parts;
- }
- }
-
- function parseEndTag(parts, pos, h, param, continuationMarker, state) {
- var tag = parseTagAndAttrs(parts, pos);
- // drop unclosed tags
- if (!tag) { return parts.length; }
- if (h.endTag) {
- h.endTag(tag.name, param, continuationMarker,
- continuationMaker(h, parts, pos, state, param));
- }
- return tag.next;
- }
-
- function parseStartTag(parts, pos, h, param, continuationMarker, state) {
- var tag = parseTagAndAttrs(parts, pos);
- // drop unclosed tags
- if (!tag) { return parts.length; }
- if (h.startTag) {
- h.startTag(tag.name, tag.attrs, param, continuationMarker,
- continuationMaker(h, parts, tag.next, state, param));
- }
- // tags like <script> and <textarea> have special parsing
- if (tag.eflags & EFLAGS_TEXT) {
- return parseText(parts, tag, h, param, continuationMarker, state);
- } else {
- return tag.next;
- }
- }
-
- var endTagRe = {};
-
- // Tags like <script> and <textarea> are flagged as CDATA or RCDATA,
- // which means everything is text until we see the correct closing tag.
- function parseText(parts, tag, h, param, continuationMarker, state) {
- var end = parts.length;
- if (!endTagRe.hasOwnProperty(tag.name)) {
- endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i');
- }
- var re = endTagRe[tag.name];
- var first = tag.next;
- var p = tag.next + 1;
- for (; p < end; p++) {
- if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; }
- }
- if (p < end) { p -= 1; }
- var buf = parts.slice(first, p).join('');
- if (tag.eflags & html4.eflags['CDATA']) {
- if (h.cdata) {
- h.cdata(buf, param, continuationMarker,
- continuationMaker(h, parts, p, state, param));
- }
- } else if (tag.eflags & html4.eflags['RCDATA']) {
- if (h.rcdata) {
- h.rcdata(normalizeRCData(buf), param, continuationMarker,
- continuationMaker(h, parts, p, state, param));
- }
- } else {
- throw new Error('bug');
- }
- return p;
- }
-
- // at this point, parts[pos-1] is either "<" or "<\/".
- function parseTagAndAttrs(parts, pos) {
- var m = /^([-\w:]+)/.exec(parts[pos]);
- var tag = {};
- tag.name = m[1].toLowerCase();
- tag.eflags = html4.ELEMENTS[tag.name];
- var buf = parts[pos].substr(m[0].length);
- // Find the next '>'. We optimistically assume this '>' is not in a
- // quoted context, and further down we fix things up if it turns out to
- // be quoted.
- var p = pos + 1;
- var end = parts.length;
- for (; p < end; p++) {
- if (parts[p] === '>') { break; }
- buf += parts[p];
- }
- if (end <= p) { return void 0; }
- var attrs = [];
- while (buf !== '') {
- m = ATTR_RE.exec(buf);
- if (!m) {
- // No attribute found: skip garbage
- buf = buf.replace(/^[\s\S][^a-z\s]*/, '');
-
- } else if ((m[4] && !m[5]) || (m[6] && !m[7])) {
- // Unterminated quote: slurp to the next unquoted '>'
- var quote = m[4] || m[6];
- var sawQuote = false;
- var abuf = [buf, parts[p++]];
- for (; p < end; p++) {
- if (sawQuote) {
- if (parts[p] === '>') { break; }
- } else if (0 <= parts[p].indexOf(quote)) {
- sawQuote = true;
- }
- abuf.push(parts[p]);
- }
- // Slurp failed: lose the garbage
- if (end <= p) { break; }
- // Otherwise retry attribute parsing
- buf = abuf.join('');
- continue;
-
- } else {
- // We have an attribute
- var aName = m[1].toLowerCase();
- var aValue = m[2] ? decodeValue(m[3]) : '';
- attrs.push(aName, aValue);
- buf = buf.substr(m[0].length);
- }
- }
- tag.attrs = attrs;
- tag.next = p + 1;
- return tag;
- }
-
- function decodeValue(v) {
- var q = v.charCodeAt(0);
- if (q === 0x22 || q === 0x27) { // " or '
- v = v.substr(1, v.length - 2);
- }
- return unescapeEntities(stripNULs(v));
- }
-
- /**
- * Returns a function that strips unsafe tags and attributes from html.
- * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy
- * A function that takes (tagName, attribs[]), where tagName is a key in
- * html4.ELEMENTS and attribs is an array of alternating attribute names
- * and values. It should return a record (as follows), or null to delete
- * the element. It's okay for tagPolicy to modify the attribs array,
- * but the same array is reused, so it should not be held between calls.
- * Record keys:
- * attribs: (required) Sanitized attributes array.
- * tagName: Replacement tag name.
- * @return {function(string, Array)} A function that sanitizes a string of
- * HTML and appends result strings to the second argument, an array.
- */
- function makeHtmlSanitizer(tagPolicy) {
- var stack;
- var ignoring;
- var emit = function (text, out) {
- if (!ignoring) { out.push(text); }
- };
- return makeSaxParser({
- 'startDoc': function(_) {
- stack = [];
- ignoring = false;
- },
- 'startTag': function(tagNameOrig, attribs, out) {
- if (ignoring) { return; }
- if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; }
- var eflagsOrig = html4.ELEMENTS[tagNameOrig];
- if (eflagsOrig & html4.eflags['FOLDABLE']) {
- return;
- }
-
- var decision = tagPolicy(tagNameOrig, attribs);
- if (!decision) {
- ignoring = !(eflagsOrig & html4.eflags['EMPTY']);
- return;
- } else if (typeof decision !== 'object') {
- throw new Error('tagPolicy did not return object (old API?)');
- }
- if ('attribs' in decision) {
- attribs = decision['attribs'];
- } else {
- throw new Error('tagPolicy gave no attribs');
- }
- var eflagsRep;
- var tagNameRep;
- if ('tagName' in decision) {
- tagNameRep = decision['tagName'];
- eflagsRep = html4.ELEMENTS[tagNameRep];
- } else {
- tagNameRep = tagNameOrig;
- eflagsRep = eflagsOrig;
- }
- // TODO(mikesamuel): relying on tagPolicy not to insert unsafe
- // attribute names.
-
- // If this is an optional-end-tag element and either this element or its
- // previous like sibling was rewritten, then insert a close tag to
- // preserve structure.
- if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) {
- var onStack = stack[stack.length - 1];
- if (onStack && onStack.orig === tagNameOrig &&
- (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) {
- out.push('<\/', onStack.rep, '>');
- }
- }
-
- if (!(eflagsOrig & html4.eflags['EMPTY'])) {
- stack.push({orig: tagNameOrig, rep: tagNameRep});
- }
-
- out.push('<', tagNameRep);
- for (var i = 0, n = attribs.length; i < n; i += 2) {
- var attribName = attribs[i],
- value = attribs[i + 1];
- if (value !== null && value !== void 0) {
- out.push(' ', attribName, '="', escapeAttrib(value), '"');
- }
- }
- out.push('>');
-
- if ((eflagsOrig & html4.eflags['EMPTY'])
- && !(eflagsRep & html4.eflags['EMPTY'])) {
- // replacement is non-empty, synthesize end tag
- out.push('<\/', tagNameRep, '>');
- }
- },
- 'endTag': function(tagName, out) {
- if (ignoring) {
- ignoring = false;
- return;
- }
- if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
- var eflags = html4.ELEMENTS[tagName];
- if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) {
- var index;
- if (eflags & html4.eflags['OPTIONAL_ENDTAG']) {
- for (index = stack.length; --index >= 0;) {
- var stackElOrigTag = stack[index].orig;
- if (stackElOrigTag === tagName) { break; }
- if (!(html4.ELEMENTS[stackElOrigTag] &
- html4.eflags['OPTIONAL_ENDTAG'])) {
- // Don't pop non optional end tags looking for a match.
- return;
- }
- }
- } else {
- for (index = stack.length; --index >= 0;) {
- if (stack[index].orig === tagName) { break; }
- }
- }
- if (index < 0) { return; } // Not opened.
- for (var i = stack.length; --i > index;) {
- var stackElRepTag = stack[i].rep;
- if (!(html4.ELEMENTS[stackElRepTag] &
- html4.eflags['OPTIONAL_ENDTAG'])) {
- out.push('<\/', stackElRepTag, '>');
- }
- }
- if (index < stack.length) {
- tagName = stack[index].rep;
- }
- stack.length = index;
- out.push('<\/', tagName, '>');
- }
- },
- 'pcdata': emit,
- 'rcdata': emit,
- 'cdata': emit,
- 'endDoc': function(out) {
- for (; stack.length; stack.length--) {
- out.push('<\/', stack[stack.length - 1].rep, '>');
- }
- }
- });
- }
-
- var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i;
-
- function safeUri(uri, effect, ltype, hints, naiveUriRewriter) {
- if (!naiveUriRewriter) { return null; }
- try {
- var parsed = URI.parse('' + uri);
- if (parsed) {
- if (!parsed.hasScheme() ||
- ALLOWED_URI_SCHEMES.test(parsed.getScheme())) {
- var safe = naiveUriRewriter(parsed, effect, ltype, hints);
- return safe ? safe.toString() : null;
- }
- }
- } catch (e) {
- return null;
- }
- return null;
- }
-
- function log(logger, tagName, attribName, oldValue, newValue) {
- if (!attribName) {
- logger(tagName + " removed", {
- change: "removed",
- tagName: tagName
- });
- }
- if (oldValue !== newValue) {
- var changed = "changed";
- if (oldValue && !newValue) {
- changed = "removed";
- } else if (!oldValue && newValue) {
- changed = "added";
- }
- logger(tagName + "." + attribName + " " + changed, {
- change: changed,
- tagName: tagName,
- attribName: attribName,
- oldValue: oldValue,
- newValue: newValue
- });
- }
- }
-
- function lookupAttribute(map, tagName, attribName) {
- var attribKey;
- attribKey = tagName + '::' + attribName;
- if (map.hasOwnProperty(attribKey)) {
- return map[attribKey];
- }
- attribKey = '*::' + attribName;
- if (map.hasOwnProperty(attribKey)) {
- return map[attribKey];
- }
- return void 0;
- }
- function getAttributeType(tagName, attribName) {
- return lookupAttribute(html4.ATTRIBS, tagName, attribName);
- }
- function getLoaderType(tagName, attribName) {
- return lookupAttribute(html4.LOADERTYPES, tagName, attribName);
- }
- function getUriEffect(tagName, attribName) {
- return lookupAttribute(html4.URIEFFECTS, tagName, attribName);
- }
-
- /**
- * Sanitizes attributes on an HTML tag.
- * @param {string} tagName An HTML tag name in lowercase.
- * @param {Array.<?string>} attribs An array of alternating names and values.
- * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
- * apply to URI attributes; it can return a new string value, or null to
- * delete the attribute. If unspecified, URI attributes are deleted.
- * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
- * to attributes containing HTML names, element IDs, and space-separated
- * lists of classes; it can return a new string value, or null to delete
- * the attribute. If unspecified, these attributes are kept unchanged.
- * @return {Array.<?string>} The sanitized attributes as a list of alternating
- * names and values, where a null value means to omit the attribute.
- */
- function sanitizeAttribs(tagName, attribs,
- opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
- // TODO(felix8a): it's obnoxious that domado duplicates much of this
- // TODO(felix8a): maybe consistently enforce constraints like target=
- for (var i = 0; i < attribs.length; i += 2) {
- var attribName = attribs[i];
- var value = attribs[i + 1];
- var oldValue = value;
- var atype = null, attribKey;
- if ((attribKey = tagName + '::' + attribName,
- html4.ATTRIBS.hasOwnProperty(attribKey)) ||
- (attribKey = '*::' + attribName,
- html4.ATTRIBS.hasOwnProperty(attribKey))) {
- atype = html4.ATTRIBS[attribKey];
- }
- if (atype !== null) {
- switch (atype) {
- case html4.atype['NONE']: break;
- case html4.atype['SCRIPT']:
- value = null;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- case html4.atype['STYLE']:
- if ('undefined' === typeof parseCssDeclarations) {
- value = null;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- }
- var sanitizedDeclarations = [];
- parseCssDeclarations(
- value,
- {
- 'declaration': function (property, tokens) {
- var normProp = property.toLowerCase();
- sanitizeCssProperty(
- normProp, tokens,
- opt_naiveUriRewriter
- ? function (url) {
- return safeUri(
- url, html4.ueffects.SAME_DOCUMENT,
- html4.ltypes.SANDBOXED,
- {
- "TYPE": "CSS",
- "CSS_PROP": normProp
- }, opt_naiveUriRewriter);
- }
- : null);
- if (tokens.length) {
- sanitizedDeclarations.push(
- normProp + ': ' + tokens.join(' '));
- }
- }
- });
- value = sanitizedDeclarations.length > 0 ?
- sanitizedDeclarations.join(' ; ') : null;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- case html4.atype['ID']:
- case html4.atype['IDREF']:
- case html4.atype['IDREFS']:
- case html4.atype['GLOBAL_NAME']:
- case html4.atype['LOCAL_NAME']:
- case html4.atype['CLASSES']:
- value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- case html4.atype['URI']:
- value = safeUri(value,
- getUriEffect(tagName, attribName),
- getLoaderType(tagName, attribName),
- {
- "TYPE": "MARKUP",
- "XML_ATTR": attribName,
- "XML_TAG": tagName
- }, opt_naiveUriRewriter);
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- case html4.atype['URI_FRAGMENT']:
- if (value && '#' === value.charAt(0)) {
- value = value.substring(1); // remove the leading '#'
- value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
- if (value !== null && value !== void 0) {
- value = '#' + value; // restore the leading '#'
- }
- } else {
- value = null;
- }
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- default:
- value = null;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- break;
- }
- } else {
- value = null;
- if (opt_logger) {
- log(opt_logger, tagName, attribName, oldValue, value);
- }
- }
- attribs[i + 1] = value;
- }
- return attribs;
- }
-
- /**
- * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js
- * and applies the default attribute sanitizer with the supplied policy for
- * URI attributes and NMTOKEN attributes.
- * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
- * apply to URI attributes. If not given, URI attributes are deleted.
- * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
- * to attributes containing HTML names, element IDs, and space-separated
- * lists of classes. If not given, such attributes are left unchanged.
- * @return {function(string, Array.<?string>)} A tagPolicy suitable for
- * passing to html.sanitize.
- */
- function makeTagPolicy(
- opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
- return function(tagName, attribs) {
- if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
- return {
- 'attribs': sanitizeAttribs(tagName, attribs,
- opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger)
- };
- } else {
- if (opt_logger) {
- log(opt_logger, tagName, undefined, undefined, undefined);
- }
- }
- };
- }
-
- /**
- * Sanitizes HTML tags and attributes according to a given policy.
- * @param {string} inputHtml The HTML to sanitize.
- * @param {function(string, Array.<?string>)} tagPolicy A function that
- * decides which tags to accept and sanitizes their attributes (see
- * makeHtmlSanitizer above for details).
- * @return {string} The sanitized HTML.
- */
- function sanitizeWithPolicy(inputHtml, tagPolicy) {
- var outputArray = [];
- makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray);
- return outputArray.join('');
- }
-
- /**
- * Strips unsafe tags and attributes from HTML.
- * @param {string} inputHtml The HTML to sanitize.
- * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
- * apply to URI attributes. If not given, URI attributes are deleted.
- * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
- * to attributes containing HTML names, element IDs, and space-separated
- * lists of classes. If not given, such attributes are left unchanged.
- */
- function sanitize(inputHtml,
- opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
- var tagPolicy = makeTagPolicy(
- opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
- return sanitizeWithPolicy(inputHtml, tagPolicy);
- }
-
- // Export both quoted and unquoted names for Closure linkage.
- var html = {};
- html.escapeAttrib = html['escapeAttrib'] = escapeAttrib;
- html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer;
- html.makeSaxParser = html['makeSaxParser'] = makeSaxParser;
- html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy;
- html.normalizeRCData = html['normalizeRCData'] = normalizeRCData;
- html.sanitize = html['sanitize'] = sanitize;
- html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs;
- html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy;
- html.unescapeEntities = html['unescapeEntities'] = unescapeEntities;
- return html;
-})(html4);
-
-var html_sanitize = html['sanitize'];
-
-return {
- html: html
-};
-});
diff --git a/web-ui/app/js/lib/html_whitelister.js b/web-ui/app/js/lib/html_whitelister.js
deleted file mode 100644
index 22841cce..00000000
--- a/web-ui/app/js/lib/html_whitelister.js
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2014 ThoughtWorks, Inc.
- *
- * Pixelated is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pixelated is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Pixelated. If not, see <http://www.gnu.org/licenses/>.
- */
-
-'use strict';
-
-define(['lib/html-sanitizer'], function (htmlSanitizer) {
- var tagAndAttributeWhitelist = {
- 'p': ['style'],
- 'div': ['style'],
- 'a': ['href', 'style'],
- 'span': ['style'],
- 'font': ['face', 'size', 'style'],
- 'img': ['title'],
- 'em': [],
- 'b': [],
- 'i': [],
- 'strong': ['style'],
- 'table': ['style'],
- 'tr': ['style'],
- 'td': ['style'],
- 'th': ['style'],
- 'tbody': ['style'],
- 'thead': ['style'],
- 'dt': ['style'],
- 'dd': ['style'],
- 'dl': ['style'],
- 'h1': ['style'],
- 'h2': ['style'],
- 'h3': ['style'],
- 'h4': ['style'],
- 'h5': ['style'],
- 'h6': ['style'],
- 'br': [],
- 'blockquote': ['style'],
- 'label': ['style'],
- 'form': ['style'],
- 'ol': ['style'],
- 'ul': ['style'],
- 'li': ['style'],
- 'input': ['style', 'type', 'name', 'value']
- };
-
- function filterAllowedAttributes (tagName, attributes) {
- var i, attributesAndValues = [];
-
- for (i = 0; i < attributes.length; i++) {
- if (tagAndAttributeWhitelist[tagName] &&
- _.contains(tagAndAttributeWhitelist[tagName], attributes[i])) {
- attributesAndValues.push(attributes[i]);
- attributesAndValues.push(attributes[i+1]);
- }
- }
-
- return attributesAndValues;
- }
-
- function tagPolicy (tagName, attributes) {
- if (!tagAndAttributeWhitelist[tagName]) {
- return null;
- }
-
- return {
- tagName: tagName,
- attribs: filterAllowedAttributes(tagName, attributes)
- };
- }
-
- return {
- tagPolicy: tagPolicy,
- sanitize: htmlSanitizer.html.sanitizeWithPolicy
- };
-});