diff options
Diffstat (limited to 'web-ui')
-rw-r--r-- | web-ui/app/js/lib/html-sanitizer.js | 1064 | ||||
-rw-r--r-- | web-ui/app/js/lib/html_whitelister.js | 86 |
2 files changed, 0 insertions, 1150 deletions
diff --git a/web-ui/app/js/lib/html-sanitizer.js b/web-ui/app/js/lib/html-sanitizer.js deleted file mode 100644 index 80fb0041..00000000 --- a/web-ui/app/js/lib/html-sanitizer.js +++ /dev/null @@ -1,1064 +0,0 @@ -// Copyright (C) 2006 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * @fileoverview - * An HTML sanitizer that can satisfy a variety of security policies. - * - * <p> - * The HTML sanitizer is built around a SAX parser and HTML element and - * attributes schemas. - * - * If the cssparser is loaded, inline styles are sanitized using the - * css property and value schemas. Else they are remove during - * sanitization. - * - * If it exists, uses parseCssDeclarations, sanitizeCssProperty, cssSchema - * - * @author mikesamuel@gmail.com - * @author jasvir@gmail.com - * \@requires html4, URI - * \@overrides window - * \@provides html, html_sanitize - */ - -// The Turkish i seems to be a non-issue, but abort in case it is. -if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } - -/** - * \@namespace - */ -define(['lib/html4-defs'], function (html4) { -var html = (function(html4) { - - // For closure compiler - var parseCssDeclarations, sanitizeCssProperty, cssSchema; - if ('undefined' !== typeof window) { - parseCssDeclarations = window['parseCssDeclarations']; - sanitizeCssProperty = window['sanitizeCssProperty']; - cssSchema = window['cssSchema']; - } - - // The keys of this object must be 'quoted' or JSCompiler will mangle them! - // This is a partial list -- lookupEntity() uses the host browser's parser - // (when available) to implement full entity lookup. - // Note that entities are in general case-sensitive; the uppercase ones are - // explicitly defined by HTML5 (presumably as compatibility). - var ENTITIES = { - 'lt': '<', - 'LT': '<', - 'gt': '>', - 'GT': '>', - 'amp': '&', - 'AMP': '&', - 'quot': '"', - 'apos': '\'', - 'nbsp': '\240' - }; - - // Patterns for types of entity/character reference names. - var decimalEscapeRe = /^#(\d+)$/; - var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/; - // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html - var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/; - // Used as a hook to invoke the browser's entity parsing. <textarea> is used - // because its content is parsed for entities but not tags. - // TODO(kpreid): This retrieval is a kludge and leads to silent loss of - // functionality if the document isn't available. - var entityLookupElement = - ('undefined' !== typeof window && window['document']) - ? window['document'].createElement('textarea') : null; - /** - * Decodes an HTML entity. - * - * {\@updoc - * $ lookupEntity('lt') - * # '<' - * $ lookupEntity('GT') - * # '>' - * $ lookupEntity('amp') - * # '&' - * $ lookupEntity('nbsp') - * # '\xA0' - * $ lookupEntity('apos') - * # "'" - * $ lookupEntity('quot') - * # '"' - * $ lookupEntity('#xa') - * # '\n' - * $ lookupEntity('#10') - * # '\n' - * $ lookupEntity('#x0a') - * # '\n' - * $ lookupEntity('#010') - * # '\n' - * $ lookupEntity('#x00A') - * # '\n' - * $ lookupEntity('Pi') // Known failure - * # '\u03A0' - * $ lookupEntity('pi') // Known failure - * # '\u03C0' - * } - * - * @param {string} name the content between the '&' and the ';'. - * @return {string} a single unicode code-point as a string. - */ - function lookupEntity(name) { - // TODO: entity lookup as specified by HTML5 actually depends on the - // presence of the ";". - if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; } - var m = name.match(decimalEscapeRe); - if (m) { - return String.fromCharCode(parseInt(m[1], 10)); - } else if (!!(m = name.match(hexEscapeRe))) { - return String.fromCharCode(parseInt(m[1], 16)); - } else if (entityLookupElement && safeEntityNameRe.test(name)) { - entityLookupElement.innerHTML = '&' + name + ';'; - var text = entityLookupElement.textContent; - ENTITIES[name] = text; - return text; - } else { - return '&' + name + ';'; - } - } - - function decodeOneEntity(_, name) { - return lookupEntity(name); - } - - var nulRe = /\0/g; - function stripNULs(s) { - return s.replace(nulRe, ''); - } - - var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g; - var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/; - /** - * The plain text of a chunk of HTML CDATA which possibly containing. - * - * {\@updoc - * $ unescapeEntities('') - * # '' - * $ unescapeEntities('hello World!') - * # 'hello World!' - * $ unescapeEntities('1 < 2 && 4 > 3 ') - * # '1 < 2 && 4 > 3\n' - * $ unescapeEntities('<< <- unfinished entity>') - * # '<< <- unfinished entity>' - * $ unescapeEntities('/foo?bar=baz©=true') // & often unescaped in URLS - * # '/foo?bar=baz©=true' - * $ unescapeEntities('pi=ππ, Pi=Π\u03A0') // FIXME: known failure - * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0' - * } - * - * @param {string} s a chunk of HTML CDATA. It must not start or end inside - * an HTML entity. - */ - function unescapeEntities(s) { - return s.replace(ENTITY_RE_1, decodeOneEntity); - } - - var ampRe = /&/g; - var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi; - var ltRe = /[<]/g; - var gtRe = />/g; - var quotRe = /\"/g; - - /** - * Escapes HTML special characters in attribute values. - * - * {\@updoc - * $ escapeAttrib('') - * # '' - * $ escapeAttrib('"<<&==&>>"') // Do not just escape the first occurrence. - * # '"<<&==&>>"' - * $ escapeAttrib('Hello <World>!') - * # 'Hello <World>!' - * } - */ - function escapeAttrib(s) { - return ('' + s).replace(ampRe, '&').replace(ltRe, '<') - .replace(gtRe, '>').replace(quotRe, '"'); - } - - /** - * Escape entities in RCDATA that can be escaped without changing the meaning. - * {\@updoc - * $ normalizeRCData('1 < 2 && 3 > 4 && 5 < 7&8') - * # '1 < 2 && 3 > 4 && 5 < 7&8' - * } - */ - function normalizeRCData(rcdata) { - return rcdata - .replace(looseAmpRe, '&$1') - .replace(ltRe, '<') - .replace(gtRe, '>'); - } - - // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at - // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html - // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html - // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html - - // We initially split input so that potentially meaningful characters - // like '<' and '>' are separate tokens, using a fast dumb process that - // ignores quoting. Then we walk that token stream, and when we see a - // '<' that's the start of a tag, we use ATTR_RE to extract tag - // attributes from the next token. That token will never have a '>' - // character. However, it might have an unbalanced quote character, and - // when we see that, we combine additional tokens to balance the quote. - - var ATTR_RE = new RegExp( - '^\\s*' + - '([-.:\\w]+)' + // 1 = Attribute name - '(?:' + ( - '\\s*(=)\\s*' + // 2 = Is there a value? - '(' + ( // 3 = Attribute value - // TODO(felix8a): maybe use backref to match quotes - '(\")[^\"]*(\"|$)' + // 4, 5 = Double-quoted string - '|' + - '(\')[^\']*(\'|$)' + // 6, 7 = Single-quoted string - '|' + - // Positive lookahead to prevent interpretation of - // <foo a= b=c> as <foo a='b=c'> - // TODO(felix8a): might be able to drop this case - '(?=[a-z][-\\w]*\\s*=)' + - '|' + - // Unquoted value that isn't an attribute name - // (since we didn't match the positive lookahead above) - '[^\"\'\\s]*' ) + - ')' ) + - ')?', - 'i'); - - // false on IE<=8, true on most other browsers - var splitWillCapture = ('a,b'.split(/(,)/).length === 3); - - // bitmask for tags with special parsing, like <script> and <textarea> - var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA']; - - /** - * Given a SAX-like event handler, produce a function that feeds those - * events and a parameter to the event handler. - * - * The event handler has the form:{@code - * { - * // Name is an upper-case HTML tag name. Attribs is an array of - * // alternating upper-case attribute names, and attribute values. The - * // attribs array is reused by the parser. Param is the value passed to - * // the saxParser. - * startTag: function (name, attribs, param) { ... }, - * endTag: function (name, param) { ... }, - * pcdata: function (text, param) { ... }, - * rcdata: function (text, param) { ... }, - * cdata: function (text, param) { ... }, - * startDoc: function (param) { ... }, - * endDoc: function (param) { ... } - * }} - * - * @param {Object} handler a record containing event handlers. - * @return {function(string, Object)} A function that takes a chunk of HTML - * and a parameter. The parameter is passed on to the handler methods. - */ - function makeSaxParser(handler) { - // Accept quoted or unquoted keys (Closure compat) - var hcopy = { - cdata: handler.cdata || handler['cdata'], - comment: handler.comment || handler['comment'], - endDoc: handler.endDoc || handler['endDoc'], - endTag: handler.endTag || handler['endTag'], - pcdata: handler.pcdata || handler['pcdata'], - rcdata: handler.rcdata || handler['rcdata'], - startDoc: handler.startDoc || handler['startDoc'], - startTag: handler.startTag || handler['startTag'] - }; - return function(htmlText, param) { - return parse(htmlText, hcopy, param); - }; - } - - // Parsing strategy is to split input into parts that might be lexically - // meaningful (every ">" becomes a separate part), and then recombine - // parts if we discover they're in a different context. - - // TODO(felix8a): Significant performance regressions from -legacy, - // tested on - // Chrome 18.0 - // Firefox 11.0 - // IE 6, 7, 8, 9 - // Opera 11.61 - // Safari 5.1.3 - // Many of these are unusual patterns that are linearly slower and still - // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing. - - // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all - // browsers. The hotspot is htmlSplit. - - // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers. - // This is partly htmlSplit, but the hotspot is parseTagAndAttrs. - - // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9. - // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster. - - // TODO(felix8a): "<p<p<p..." is slower on IE[6-8] - - var continuationMarker = {}; - function parse(htmlText, handler, param) { - var m, p, tagName; - var parts = htmlSplit(htmlText); - var state = { - noMoreGT: false, - noMoreEndComments: false - }; - parseCPS(handler, parts, 0, state, param); - } - - function continuationMaker(h, parts, initial, state, param) { - return function () { - parseCPS(h, parts, initial, state, param); - }; - } - - function parseCPS(h, parts, initial, state, param) { - try { - if (h.startDoc && initial == 0) { h.startDoc(param); } - var m, p, tagName; - for (var pos = initial, end = parts.length; pos < end;) { - var current = parts[pos++]; - var next = parts[pos]; - switch (current) { - case '&': - if (ENTITY_RE_2.test(next)) { - if (h.pcdata) { - h.pcdata('&' + next, param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - pos++; - } else { - if (h.pcdata) { h.pcdata("&", param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - break; - case '<\/': - if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) { - if (m[0].length === next.length && parts[pos + 1] === '>') { - // fast case, no attribute parsing needed - pos += 2; - tagName = m[1].toLowerCase(); - if (h.endTag) { - h.endTag(tagName, param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } else { - // slow case, need to parse attributes - // TODO(felix8a): do we really care about misparsing this? - pos = parseEndTag( - parts, pos, h, param, continuationMarker, state); - } - } else { - if (h.pcdata) { - h.pcdata('</', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - break; - case '<': - if (m = /^([-\w:]+)\s*\/?/.exec(next)) { - if (m[0].length === next.length && parts[pos + 1] === '>') { - // fast case, no attribute parsing needed - pos += 2; - tagName = m[1].toLowerCase(); - if (h.startTag) { - h.startTag(tagName, [], param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - // tags like <script> and <textarea> have special parsing - var eflags = html4.ELEMENTS[tagName]; - if (eflags & EFLAGS_TEXT) { - var tag = { name: tagName, next: pos, eflags: eflags }; - pos = parseText( - parts, tag, h, param, continuationMarker, state); - } - } else { - // slow case, need to parse attributes - pos = parseStartTag( - parts, pos, h, param, continuationMarker, state); - } - } else { - if (h.pcdata) { - h.pcdata('<', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - break; - case '<\!--': - // The pathological case is n copies of '<\!--' without '-->', and - // repeated failure to find '-->' is quadratic. We avoid that by - // remembering when search for '-->' fails. - if (!state.noMoreEndComments) { - // A comment <\!--x--> is split into three tokens: - // '<\!--', 'x--', '>' - // We want to find the next '>' token that has a preceding '--'. - // pos is at the 'x--'. - for (p = pos + 1; p < end; p++) { - if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; } - } - if (p < end) { - if (h.comment) { - var comment = parts.slice(pos, p).join(''); - h.comment( - comment.substr(0, comment.length - 2), param, - continuationMarker, - continuationMaker(h, parts, p + 1, state, param)); - } - pos = p + 1; - } else { - state.noMoreEndComments = true; - } - } - if (state.noMoreEndComments) { - if (h.pcdata) { - h.pcdata('<!--', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - break; - case '<\!': - if (!/^\w/.test(next)) { - if (h.pcdata) { - h.pcdata('<!', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } else { - // similar to noMoreEndComment logic - if (!state.noMoreGT) { - for (p = pos + 1; p < end; p++) { - if (parts[p] === '>') { break; } - } - if (p < end) { - pos = p + 1; - } else { - state.noMoreGT = true; - } - } - if (state.noMoreGT) { - if (h.pcdata) { - h.pcdata('<!', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - } - break; - case '<?': - // similar to noMoreEndComment logic - if (!state.noMoreGT) { - for (p = pos + 1; p < end; p++) { - if (parts[p] === '>') { break; } - } - if (p < end) { - pos = p + 1; - } else { - state.noMoreGT = true; - } - } - if (state.noMoreGT) { - if (h.pcdata) { - h.pcdata('<?', param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - } - break; - case '>': - if (h.pcdata) { - h.pcdata(">", param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - break; - case '': - break; - default: - if (h.pcdata) { - h.pcdata(current, param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - break; - } - } - if (h.endDoc) { h.endDoc(param); } - } catch (e) { - if (e !== continuationMarker) { throw e; } - } - } - - // Split str into parts for the html parser. - function htmlSplit(str) { - // can't hoist this out of the function because of the re.exec loop. - var re = /(<\/|<\!--|<[!?]|[&<>])/g; - str += ''; - if (splitWillCapture) { - return str.split(re); - } else { - var parts = []; - var lastPos = 0; - var m; - while ((m = re.exec(str)) !== null) { - parts.push(str.substring(lastPos, m.index)); - parts.push(m[0]); - lastPos = m.index + m[0].length; - } - parts.push(str.substring(lastPos)); - return parts; - } - } - - function parseEndTag(parts, pos, h, param, continuationMarker, state) { - var tag = parseTagAndAttrs(parts, pos); - // drop unclosed tags - if (!tag) { return parts.length; } - if (h.endTag) { - h.endTag(tag.name, param, continuationMarker, - continuationMaker(h, parts, pos, state, param)); - } - return tag.next; - } - - function parseStartTag(parts, pos, h, param, continuationMarker, state) { - var tag = parseTagAndAttrs(parts, pos); - // drop unclosed tags - if (!tag) { return parts.length; } - if (h.startTag) { - h.startTag(tag.name, tag.attrs, param, continuationMarker, - continuationMaker(h, parts, tag.next, state, param)); - } - // tags like <script> and <textarea> have special parsing - if (tag.eflags & EFLAGS_TEXT) { - return parseText(parts, tag, h, param, continuationMarker, state); - } else { - return tag.next; - } - } - - var endTagRe = {}; - - // Tags like <script> and <textarea> are flagged as CDATA or RCDATA, - // which means everything is text until we see the correct closing tag. - function parseText(parts, tag, h, param, continuationMarker, state) { - var end = parts.length; - if (!endTagRe.hasOwnProperty(tag.name)) { - endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i'); - } - var re = endTagRe[tag.name]; - var first = tag.next; - var p = tag.next + 1; - for (; p < end; p++) { - if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; } - } - if (p < end) { p -= 1; } - var buf = parts.slice(first, p).join(''); - if (tag.eflags & html4.eflags['CDATA']) { - if (h.cdata) { - h.cdata(buf, param, continuationMarker, - continuationMaker(h, parts, p, state, param)); - } - } else if (tag.eflags & html4.eflags['RCDATA']) { - if (h.rcdata) { - h.rcdata(normalizeRCData(buf), param, continuationMarker, - continuationMaker(h, parts, p, state, param)); - } - } else { - throw new Error('bug'); - } - return p; - } - - // at this point, parts[pos-1] is either "<" or "<\/". - function parseTagAndAttrs(parts, pos) { - var m = /^([-\w:]+)/.exec(parts[pos]); - var tag = {}; - tag.name = m[1].toLowerCase(); - tag.eflags = html4.ELEMENTS[tag.name]; - var buf = parts[pos].substr(m[0].length); - // Find the next '>'. We optimistically assume this '>' is not in a - // quoted context, and further down we fix things up if it turns out to - // be quoted. - var p = pos + 1; - var end = parts.length; - for (; p < end; p++) { - if (parts[p] === '>') { break; } - buf += parts[p]; - } - if (end <= p) { return void 0; } - var attrs = []; - while (buf !== '') { - m = ATTR_RE.exec(buf); - if (!m) { - // No attribute found: skip garbage - buf = buf.replace(/^[\s\S][^a-z\s]*/, ''); - - } else if ((m[4] && !m[5]) || (m[6] && !m[7])) { - // Unterminated quote: slurp to the next unquoted '>' - var quote = m[4] || m[6]; - var sawQuote = false; - var abuf = [buf, parts[p++]]; - for (; p < end; p++) { - if (sawQuote) { - if (parts[p] === '>') { break; } - } else if (0 <= parts[p].indexOf(quote)) { - sawQuote = true; - } - abuf.push(parts[p]); - } - // Slurp failed: lose the garbage - if (end <= p) { break; } - // Otherwise retry attribute parsing - buf = abuf.join(''); - continue; - - } else { - // We have an attribute - var aName = m[1].toLowerCase(); - var aValue = m[2] ? decodeValue(m[3]) : ''; - attrs.push(aName, aValue); - buf = buf.substr(m[0].length); - } - } - tag.attrs = attrs; - tag.next = p + 1; - return tag; - } - - function decodeValue(v) { - var q = v.charCodeAt(0); - if (q === 0x22 || q === 0x27) { // " or ' - v = v.substr(1, v.length - 2); - } - return unescapeEntities(stripNULs(v)); - } - - /** - * Returns a function that strips unsafe tags and attributes from html. - * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy - * A function that takes (tagName, attribs[]), where tagName is a key in - * html4.ELEMENTS and attribs is an array of alternating attribute names - * and values. It should return a record (as follows), or null to delete - * the element. It's okay for tagPolicy to modify the attribs array, - * but the same array is reused, so it should not be held between calls. - * Record keys: - * attribs: (required) Sanitized attributes array. - * tagName: Replacement tag name. - * @return {function(string, Array)} A function that sanitizes a string of - * HTML and appends result strings to the second argument, an array. - */ - function makeHtmlSanitizer(tagPolicy) { - var stack; - var ignoring; - var emit = function (text, out) { - if (!ignoring) { out.push(text); } - }; - return makeSaxParser({ - 'startDoc': function(_) { - stack = []; - ignoring = false; - }, - 'startTag': function(tagNameOrig, attribs, out) { - if (ignoring) { return; } - if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; } - var eflagsOrig = html4.ELEMENTS[tagNameOrig]; - if (eflagsOrig & html4.eflags['FOLDABLE']) { - return; - } - - var decision = tagPolicy(tagNameOrig, attribs); - if (!decision) { - ignoring = !(eflagsOrig & html4.eflags['EMPTY']); - return; - } else if (typeof decision !== 'object') { - throw new Error('tagPolicy did not return object (old API?)'); - } - if ('attribs' in decision) { - attribs = decision['attribs']; - } else { - throw new Error('tagPolicy gave no attribs'); - } - var eflagsRep; - var tagNameRep; - if ('tagName' in decision) { - tagNameRep = decision['tagName']; - eflagsRep = html4.ELEMENTS[tagNameRep]; - } else { - tagNameRep = tagNameOrig; - eflagsRep = eflagsOrig; - } - // TODO(mikesamuel): relying on tagPolicy not to insert unsafe - // attribute names. - - // If this is an optional-end-tag element and either this element or its - // previous like sibling was rewritten, then insert a close tag to - // preserve structure. - if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) { - var onStack = stack[stack.length - 1]; - if (onStack && onStack.orig === tagNameOrig && - (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) { - out.push('<\/', onStack.rep, '>'); - } - } - - if (!(eflagsOrig & html4.eflags['EMPTY'])) { - stack.push({orig: tagNameOrig, rep: tagNameRep}); - } - - out.push('<', tagNameRep); - for (var i = 0, n = attribs.length; i < n; i += 2) { - var attribName = attribs[i], - value = attribs[i + 1]; - if (value !== null && value !== void 0) { - out.push(' ', attribName, '="', escapeAttrib(value), '"'); - } - } - out.push('>'); - - if ((eflagsOrig & html4.eflags['EMPTY']) - && !(eflagsRep & html4.eflags['EMPTY'])) { - // replacement is non-empty, synthesize end tag - out.push('<\/', tagNameRep, '>'); - } - }, - 'endTag': function(tagName, out) { - if (ignoring) { - ignoring = false; - return; - } - if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; } - var eflags = html4.ELEMENTS[tagName]; - if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) { - var index; - if (eflags & html4.eflags['OPTIONAL_ENDTAG']) { - for (index = stack.length; --index >= 0;) { - var stackElOrigTag = stack[index].orig; - if (stackElOrigTag === tagName) { break; } - if (!(html4.ELEMENTS[stackElOrigTag] & - html4.eflags['OPTIONAL_ENDTAG'])) { - // Don't pop non optional end tags looking for a match. - return; - } - } - } else { - for (index = stack.length; --index >= 0;) { - if (stack[index].orig === tagName) { break; } - } - } - if (index < 0) { return; } // Not opened. - for (var i = stack.length; --i > index;) { - var stackElRepTag = stack[i].rep; - if (!(html4.ELEMENTS[stackElRepTag] & - html4.eflags['OPTIONAL_ENDTAG'])) { - out.push('<\/', stackElRepTag, '>'); - } - } - if (index < stack.length) { - tagName = stack[index].rep; - } - stack.length = index; - out.push('<\/', tagName, '>'); - } - }, - 'pcdata': emit, - 'rcdata': emit, - 'cdata': emit, - 'endDoc': function(out) { - for (; stack.length; stack.length--) { - out.push('<\/', stack[stack.length - 1].rep, '>'); - } - } - }); - } - - var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i; - - function safeUri(uri, effect, ltype, hints, naiveUriRewriter) { - if (!naiveUriRewriter) { return null; } - try { - var parsed = URI.parse('' + uri); - if (parsed) { - if (!parsed.hasScheme() || - ALLOWED_URI_SCHEMES.test(parsed.getScheme())) { - var safe = naiveUriRewriter(parsed, effect, ltype, hints); - return safe ? safe.toString() : null; - } - } - } catch (e) { - return null; - } - return null; - } - - function log(logger, tagName, attribName, oldValue, newValue) { - if (!attribName) { - logger(tagName + " removed", { - change: "removed", - tagName: tagName - }); - } - if (oldValue !== newValue) { - var changed = "changed"; - if (oldValue && !newValue) { - changed = "removed"; - } else if (!oldValue && newValue) { - changed = "added"; - } - logger(tagName + "." + attribName + " " + changed, { - change: changed, - tagName: tagName, - attribName: attribName, - oldValue: oldValue, - newValue: newValue - }); - } - } - - function lookupAttribute(map, tagName, attribName) { - var attribKey; - attribKey = tagName + '::' + attribName; - if (map.hasOwnProperty(attribKey)) { - return map[attribKey]; - } - attribKey = '*::' + attribName; - if (map.hasOwnProperty(attribKey)) { - return map[attribKey]; - } - return void 0; - } - function getAttributeType(tagName, attribName) { - return lookupAttribute(html4.ATTRIBS, tagName, attribName); - } - function getLoaderType(tagName, attribName) { - return lookupAttribute(html4.LOADERTYPES, tagName, attribName); - } - function getUriEffect(tagName, attribName) { - return lookupAttribute(html4.URIEFFECTS, tagName, attribName); - } - - /** - * Sanitizes attributes on an HTML tag. - * @param {string} tagName An HTML tag name in lowercase. - * @param {Array.<?string>} attribs An array of alternating names and values. - * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to - * apply to URI attributes; it can return a new string value, or null to - * delete the attribute. If unspecified, URI attributes are deleted. - * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply - * to attributes containing HTML names, element IDs, and space-separated - * lists of classes; it can return a new string value, or null to delete - * the attribute. If unspecified, these attributes are kept unchanged. - * @return {Array.<?string>} The sanitized attributes as a list of alternating - * names and values, where a null value means to omit the attribute. - */ - function sanitizeAttribs(tagName, attribs, - opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { - // TODO(felix8a): it's obnoxious that domado duplicates much of this - // TODO(felix8a): maybe consistently enforce constraints like target= - for (var i = 0; i < attribs.length; i += 2) { - var attribName = attribs[i]; - var value = attribs[i + 1]; - var oldValue = value; - var atype = null, attribKey; - if ((attribKey = tagName + '::' + attribName, - html4.ATTRIBS.hasOwnProperty(attribKey)) || - (attribKey = '*::' + attribName, - html4.ATTRIBS.hasOwnProperty(attribKey))) { - atype = html4.ATTRIBS[attribKey]; - } - if (atype !== null) { - switch (atype) { - case html4.atype['NONE']: break; - case html4.atype['SCRIPT']: - value = null; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - case html4.atype['STYLE']: - if ('undefined' === typeof parseCssDeclarations) { - value = null; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - } - var sanitizedDeclarations = []; - parseCssDeclarations( - value, - { - 'declaration': function (property, tokens) { - var normProp = property.toLowerCase(); - sanitizeCssProperty( - normProp, tokens, - opt_naiveUriRewriter - ? function (url) { - return safeUri( - url, html4.ueffects.SAME_DOCUMENT, - html4.ltypes.SANDBOXED, - { - "TYPE": "CSS", - "CSS_PROP": normProp - }, opt_naiveUriRewriter); - } - : null); - if (tokens.length) { - sanitizedDeclarations.push( - normProp + ': ' + tokens.join(' ')); - } - } - }); - value = sanitizedDeclarations.length > 0 ? - sanitizedDeclarations.join(' ; ') : null; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - case html4.atype['ID']: - case html4.atype['IDREF']: - case html4.atype['IDREFS']: - case html4.atype['GLOBAL_NAME']: - case html4.atype['LOCAL_NAME']: - case html4.atype['CLASSES']: - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - case html4.atype['URI']: - value = safeUri(value, - getUriEffect(tagName, attribName), - getLoaderType(tagName, attribName), - { - "TYPE": "MARKUP", - "XML_ATTR": attribName, - "XML_TAG": tagName - }, opt_naiveUriRewriter); - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - case html4.atype['URI_FRAGMENT']: - if (value && '#' === value.charAt(0)) { - value = value.substring(1); // remove the leading '#' - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; - if (value !== null && value !== void 0) { - value = '#' + value; // restore the leading '#' - } - } else { - value = null; - } - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - default: - value = null; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; - } - } else { - value = null; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - } - attribs[i + 1] = value; - } - return attribs; - } - - /** - * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js - * and applies the default attribute sanitizer with the supplied policy for - * URI attributes and NMTOKEN attributes. - * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to - * apply to URI attributes. If not given, URI attributes are deleted. - * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply - * to attributes containing HTML names, element IDs, and space-separated - * lists of classes. If not given, such attributes are left unchanged. - * @return {function(string, Array.<?string>)} A tagPolicy suitable for - * passing to html.sanitize. - */ - function makeTagPolicy( - opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { - return function(tagName, attribs) { - if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) { - return { - 'attribs': sanitizeAttribs(tagName, attribs, - opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) - }; - } else { - if (opt_logger) { - log(opt_logger, tagName, undefined, undefined, undefined); - } - } - }; - } - - /** - * Sanitizes HTML tags and attributes according to a given policy. - * @param {string} inputHtml The HTML to sanitize. - * @param {function(string, Array.<?string>)} tagPolicy A function that - * decides which tags to accept and sanitizes their attributes (see - * makeHtmlSanitizer above for details). - * @return {string} The sanitized HTML. - */ - function sanitizeWithPolicy(inputHtml, tagPolicy) { - var outputArray = []; - makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray); - return outputArray.join(''); - } - - /** - * Strips unsafe tags and attributes from HTML. - * @param {string} inputHtml The HTML to sanitize. - * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to - * apply to URI attributes. If not given, URI attributes are deleted. - * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply - * to attributes containing HTML names, element IDs, and space-separated - * lists of classes. If not given, such attributes are left unchanged. - */ - function sanitize(inputHtml, - opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { - var tagPolicy = makeTagPolicy( - opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger); - return sanitizeWithPolicy(inputHtml, tagPolicy); - } - - // Export both quoted and unquoted names for Closure linkage. - var html = {}; - html.escapeAttrib = html['escapeAttrib'] = escapeAttrib; - html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer; - html.makeSaxParser = html['makeSaxParser'] = makeSaxParser; - html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy; - html.normalizeRCData = html['normalizeRCData'] = normalizeRCData; - html.sanitize = html['sanitize'] = sanitize; - html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs; - html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy; - html.unescapeEntities = html['unescapeEntities'] = unescapeEntities; - return html; -})(html4); - -var html_sanitize = html['sanitize']; - -return { - html: html -}; -}); diff --git a/web-ui/app/js/lib/html_whitelister.js b/web-ui/app/js/lib/html_whitelister.js deleted file mode 100644 index 22841cce..00000000 --- a/web-ui/app/js/lib/html_whitelister.js +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 ThoughtWorks, Inc. - * - * Pixelated is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pixelated is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with Pixelated. If not, see <http://www.gnu.org/licenses/>. - */ - -'use strict'; - -define(['lib/html-sanitizer'], function (htmlSanitizer) { - var tagAndAttributeWhitelist = { - 'p': ['style'], - 'div': ['style'], - 'a': ['href', 'style'], - 'span': ['style'], - 'font': ['face', 'size', 'style'], - 'img': ['title'], - 'em': [], - 'b': [], - 'i': [], - 'strong': ['style'], - 'table': ['style'], - 'tr': ['style'], - 'td': ['style'], - 'th': ['style'], - 'tbody': ['style'], - 'thead': ['style'], - 'dt': ['style'], - 'dd': ['style'], - 'dl': ['style'], - 'h1': ['style'], - 'h2': ['style'], - 'h3': ['style'], - 'h4': ['style'], - 'h5': ['style'], - 'h6': ['style'], - 'br': [], - 'blockquote': ['style'], - 'label': ['style'], - 'form': ['style'], - 'ol': ['style'], - 'ul': ['style'], - 'li': ['style'], - 'input': ['style', 'type', 'name', 'value'] - }; - - function filterAllowedAttributes (tagName, attributes) { - var i, attributesAndValues = []; - - for (i = 0; i < attributes.length; i++) { - if (tagAndAttributeWhitelist[tagName] && - _.contains(tagAndAttributeWhitelist[tagName], attributes[i])) { - attributesAndValues.push(attributes[i]); - attributesAndValues.push(attributes[i+1]); - } - } - - return attributesAndValues; - } - - function tagPolicy (tagName, attributes) { - if (!tagAndAttributeWhitelist[tagName]) { - return null; - } - - return { - tagName: tagName, - attribs: filterAllowedAttributes(tagName, attributes) - }; - } - - return { - tagPolicy: tagPolicy, - sanitize: htmlSanitizer.html.sanitizeWithPolicy - }; -}); |