diff options
| -rw-r--r-- | web-ui/app/js/lib/html-sanitizer.js | 1064 | ||||
| -rw-r--r-- | web-ui/app/js/lib/html_whitelister.js | 86 | 
2 files changed, 0 insertions, 1150 deletions
diff --git a/web-ui/app/js/lib/html-sanitizer.js b/web-ui/app/js/lib/html-sanitizer.js deleted file mode 100644 index 80fb0041..00000000 --- a/web-ui/app/js/lib/html-sanitizer.js +++ /dev/null @@ -1,1064 +0,0 @@ -// Copyright (C) 2006 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -//      http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * @fileoverview - * An HTML sanitizer that can satisfy a variety of security policies. - * - * <p> - * The HTML sanitizer is built around a SAX parser and HTML element and - * attributes schemas. - * - * If the cssparser is loaded, inline styles are sanitized using the - * css property and value schemas.  Else they are remove during - * sanitization. - * - * If it exists, uses parseCssDeclarations, sanitizeCssProperty,  cssSchema - * - * @author mikesamuel@gmail.com - * @author jasvir@gmail.com - * \@requires html4, URI - * \@overrides window - * \@provides html, html_sanitize - */ - -// The Turkish i seems to be a non-issue, but abort in case it is. -if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } - -/** - * \@namespace - */ -define(['lib/html4-defs'], function (html4) { -var html = (function(html4) { - -  // For closure compiler -  var parseCssDeclarations, sanitizeCssProperty, cssSchema; -  if ('undefined' !== typeof window) { -    parseCssDeclarations = window['parseCssDeclarations']; -    sanitizeCssProperty = window['sanitizeCssProperty']; -    cssSchema = window['cssSchema']; -  } - -  // The keys of this object must be 'quoted' or JSCompiler will mangle them! -  // This is a partial list -- lookupEntity() uses the host browser's parser -  // (when available) to implement full entity lookup. -  // Note that entities are in general case-sensitive; the uppercase ones are -  // explicitly defined by HTML5 (presumably as compatibility). -  var ENTITIES = { -    'lt': '<', -    'LT': '<', -    'gt': '>', -    'GT': '>', -    'amp': '&', -    'AMP': '&', -    'quot': '"', -    'apos': '\'', -    'nbsp': '\240' -  }; - -  // Patterns for types of entity/character reference names. -  var decimalEscapeRe = /^#(\d+)$/; -  var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/; -  // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html -  var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/; -  // Used as a hook to invoke the browser's entity parsing. <textarea> is used -  // because its content is parsed for entities but not tags. -  // TODO(kpreid): This retrieval is a kludge and leads to silent loss of -  // functionality if the document isn't available. -  var entityLookupElement = -      ('undefined' !== typeof window && window['document']) -          ? window['document'].createElement('textarea') : null; -  /** -   * Decodes an HTML entity. -   * -   * {\@updoc -   * $ lookupEntity('lt') -   * # '<' -   * $ lookupEntity('GT') -   * # '>' -   * $ lookupEntity('amp') -   * # '&' -   * $ lookupEntity('nbsp') -   * # '\xA0' -   * $ lookupEntity('apos') -   * # "'" -   * $ lookupEntity('quot') -   * # '"' -   * $ lookupEntity('#xa') -   * # '\n' -   * $ lookupEntity('#10') -   * # '\n' -   * $ lookupEntity('#x0a') -   * # '\n' -   * $ lookupEntity('#010') -   * # '\n' -   * $ lookupEntity('#x00A') -   * # '\n' -   * $ lookupEntity('Pi')      // Known failure -   * # '\u03A0' -   * $ lookupEntity('pi')      // Known failure -   * # '\u03C0' -   * } -   * -   * @param {string} name the content between the '&' and the ';'. -   * @return {string} a single unicode code-point as a string. -   */ -  function lookupEntity(name) { -    // TODO: entity lookup as specified by HTML5 actually depends on the -    // presence of the ";". -    if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; } -    var m = name.match(decimalEscapeRe); -    if (m) { -      return String.fromCharCode(parseInt(m[1], 10)); -    } else if (!!(m = name.match(hexEscapeRe))) { -      return String.fromCharCode(parseInt(m[1], 16)); -    } else if (entityLookupElement && safeEntityNameRe.test(name)) { -      entityLookupElement.innerHTML = '&' + name + ';'; -      var text = entityLookupElement.textContent; -      ENTITIES[name] = text; -      return text; -    } else { -      return '&' + name + ';'; -    } -  } - -  function decodeOneEntity(_, name) { -    return lookupEntity(name); -  } - -  var nulRe = /\0/g; -  function stripNULs(s) { -    return s.replace(nulRe, ''); -  } - -  var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g; -  var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/; -  /** -   * The plain text of a chunk of HTML CDATA which possibly containing. -   * -   * {\@updoc -   * $ unescapeEntities('') -   * # '' -   * $ unescapeEntities('hello World!') -   * # 'hello World!' -   * $ unescapeEntities('1 < 2 && 4 > 3
') -   * # '1 < 2 && 4 > 3\n' -   * $ unescapeEntities('<< <- unfinished entity>') -   * # '<< <- unfinished entity>' -   * $ unescapeEntities('/foo?bar=baz©=true')  // & often unescaped in URLS -   * # '/foo?bar=baz©=true' -   * $ unescapeEntities('pi=ππ, Pi=Π\u03A0') // FIXME: known failure -   * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0' -   * } -   * -   * @param {string} s a chunk of HTML CDATA.  It must not start or end inside -   *     an HTML entity. -   */ -  function unescapeEntities(s) { -    return s.replace(ENTITY_RE_1, decodeOneEntity); -  } - -  var ampRe = /&/g; -  var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi; -  var ltRe = /[<]/g; -  var gtRe = />/g; -  var quotRe = /\"/g; - -  /** -   * Escapes HTML special characters in attribute values. -   * -   * {\@updoc -   * $ escapeAttrib('') -   * # '' -   * $ escapeAttrib('"<<&==&>>"')  // Do not just escape the first occurrence. -   * # '"<<&==&>>"' -   * $ escapeAttrib('Hello <World>!') -   * # 'Hello <World>!' -   * } -   */ -  function escapeAttrib(s) { -    return ('' + s).replace(ampRe, '&').replace(ltRe, '<') -        .replace(gtRe, '>').replace(quotRe, '"'); -  } - -  /** -   * Escape entities in RCDATA that can be escaped without changing the meaning. -   * {\@updoc -   * $ normalizeRCData('1 < 2 && 3 > 4 && 5 < 7&8') -   * # '1 < 2 && 3 > 4 && 5 < 7&8' -   * } -   */ -  function normalizeRCData(rcdata) { -    return rcdata -        .replace(looseAmpRe, '&$1') -        .replace(ltRe, '<') -        .replace(gtRe, '>'); -  } - -  // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at -  // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html -  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html -  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html -  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html - -  // We initially split input so that potentially meaningful characters -  // like '<' and '>' are separate tokens, using a fast dumb process that -  // ignores quoting.  Then we walk that token stream, and when we see a -  // '<' that's the start of a tag, we use ATTR_RE to extract tag -  // attributes from the next token.  That token will never have a '>' -  // character.  However, it might have an unbalanced quote character, and -  // when we see that, we combine additional tokens to balance the quote. - -  var ATTR_RE = new RegExp( -    '^\\s*' + -    '([-.:\\w]+)' +             // 1 = Attribute name -    '(?:' + ( -      '\\s*(=)\\s*' +           // 2 = Is there a value? -      '(' + (                   // 3 = Attribute value -        // TODO(felix8a): maybe use backref to match quotes -        '(\")[^\"]*(\"|$)' +    // 4, 5 = Double-quoted string -        '|' + -        '(\')[^\']*(\'|$)' +    // 6, 7 = Single-quoted string -        '|' + -        // Positive lookahead to prevent interpretation of -        // <foo a= b=c> as <foo a='b=c'> -        // TODO(felix8a): might be able to drop this case -        '(?=[a-z][-\\w]*\\s*=)' + -        '|' + -        // Unquoted value that isn't an attribute name -        // (since we didn't match the positive lookahead above) -        '[^\"\'\\s]*' ) + -      ')' ) + -    ')?', -    'i'); - -  // false on IE<=8, true on most other browsers -  var splitWillCapture = ('a,b'.split(/(,)/).length === 3); - -  // bitmask for tags with special parsing, like <script> and <textarea> -  var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA']; - -  /** -   * Given a SAX-like event handler, produce a function that feeds those -   * events and a parameter to the event handler. -   * -   * The event handler has the form:{@code -   * { -   *   // Name is an upper-case HTML tag name.  Attribs is an array of -   *   // alternating upper-case attribute names, and attribute values.  The -   *   // attribs array is reused by the parser.  Param is the value passed to -   *   // the saxParser. -   *   startTag: function (name, attribs, param) { ... }, -   *   endTag:   function (name, param) { ... }, -   *   pcdata:   function (text, param) { ... }, -   *   rcdata:   function (text, param) { ... }, -   *   cdata:    function (text, param) { ... }, -   *   startDoc: function (param) { ... }, -   *   endDoc:   function (param) { ... } -   * }} -   * -   * @param {Object} handler a record containing event handlers. -   * @return {function(string, Object)} A function that takes a chunk of HTML -   *     and a parameter.  The parameter is passed on to the handler methods. -   */ -  function makeSaxParser(handler) { -    // Accept quoted or unquoted keys (Closure compat) -    var hcopy = { -      cdata: handler.cdata || handler['cdata'], -      comment: handler.comment || handler['comment'], -      endDoc: handler.endDoc || handler['endDoc'], -      endTag: handler.endTag || handler['endTag'], -      pcdata: handler.pcdata || handler['pcdata'], -      rcdata: handler.rcdata || handler['rcdata'], -      startDoc: handler.startDoc || handler['startDoc'], -      startTag: handler.startTag || handler['startTag'] -    }; -    return function(htmlText, param) { -      return parse(htmlText, hcopy, param); -    }; -  } - -  // Parsing strategy is to split input into parts that might be lexically -  // meaningful (every ">" becomes a separate part), and then recombine -  // parts if we discover they're in a different context. - -  // TODO(felix8a): Significant performance regressions from -legacy, -  // tested on -  //    Chrome 18.0 -  //    Firefox 11.0 -  //    IE 6, 7, 8, 9 -  //    Opera 11.61 -  //    Safari 5.1.3 -  // Many of these are unusual patterns that are linearly slower and still -  // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing. - -  // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all -  // browsers.  The hotspot is htmlSplit. - -  // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers. -  // This is partly htmlSplit, but the hotspot is parseTagAndAttrs. - -  // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9. -  // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster. - -  // TODO(felix8a): "<p<p<p..." is slower on IE[6-8] - -  var continuationMarker = {}; -  function parse(htmlText, handler, param) { -    var m, p, tagName; -    var parts = htmlSplit(htmlText); -    var state = { -      noMoreGT: false, -      noMoreEndComments: false -    }; -    parseCPS(handler, parts, 0, state, param); -  } - -  function continuationMaker(h, parts, initial, state, param) { -    return function () { -      parseCPS(h, parts, initial, state, param); -    }; -  } - -  function parseCPS(h, parts, initial, state, param) { -    try { -      if (h.startDoc && initial == 0) { h.startDoc(param); } -      var m, p, tagName; -      for (var pos = initial, end = parts.length; pos < end;) { -        var current = parts[pos++]; -        var next = parts[pos]; -        switch (current) { -        case '&': -          if (ENTITY_RE_2.test(next)) { -            if (h.pcdata) { -              h.pcdata('&' + next, param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -            pos++; -          } else { -            if (h.pcdata) { h.pcdata("&", param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } -          break; -        case '<\/': -          if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) { -            if (m[0].length === next.length && parts[pos + 1] === '>') { -              // fast case, no attribute parsing needed -              pos += 2; -              tagName = m[1].toLowerCase(); -              if (h.endTag) { -                h.endTag(tagName, param, continuationMarker, -                  continuationMaker(h, parts, pos, state, param)); -              } -            } else { -              // slow case, need to parse attributes -              // TODO(felix8a): do we really care about misparsing this? -              pos = parseEndTag( -                parts, pos, h, param, continuationMarker, state); -            } -          } else { -            if (h.pcdata) { -              h.pcdata('</', param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } -          break; -        case '<': -          if (m = /^([-\w:]+)\s*\/?/.exec(next)) { -            if (m[0].length === next.length && parts[pos + 1] === '>') { -              // fast case, no attribute parsing needed -              pos += 2; -              tagName = m[1].toLowerCase(); -              if (h.startTag) { -                h.startTag(tagName, [], param, continuationMarker, -                  continuationMaker(h, parts, pos, state, param)); -              } -              // tags like <script> and <textarea> have special parsing -              var eflags = html4.ELEMENTS[tagName]; -              if (eflags & EFLAGS_TEXT) { -                var tag = { name: tagName, next: pos, eflags: eflags }; -                pos = parseText( -                  parts, tag, h, param, continuationMarker, state); -              } -            } else { -              // slow case, need to parse attributes -              pos = parseStartTag( -                parts, pos, h, param, continuationMarker, state); -            } -          } else { -            if (h.pcdata) { -              h.pcdata('<', param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } -          break; -        case '<\!--': -          // The pathological case is n copies of '<\!--' without '-->', and -          // repeated failure to find '-->' is quadratic.  We avoid that by -          // remembering when search for '-->' fails. -          if (!state.noMoreEndComments) { -            // A comment <\!--x--> is split into three tokens: -            //   '<\!--', 'x--', '>' -            // We want to find the next '>' token that has a preceding '--'. -            // pos is at the 'x--'. -            for (p = pos + 1; p < end; p++) { -              if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; } -            } -            if (p < end) { -              if (h.comment) { -                var comment = parts.slice(pos, p).join(''); -                h.comment( -                  comment.substr(0, comment.length - 2), param, -                  continuationMarker, -                  continuationMaker(h, parts, p + 1, state, param)); -              } -              pos = p + 1; -            } else { -              state.noMoreEndComments = true; -            } -          } -          if (state.noMoreEndComments) { -            if (h.pcdata) { -              h.pcdata('<!--', param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } -          break; -        case '<\!': -          if (!/^\w/.test(next)) { -            if (h.pcdata) { -              h.pcdata('<!', param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } else { -            // similar to noMoreEndComment logic -            if (!state.noMoreGT) { -              for (p = pos + 1; p < end; p++) { -                if (parts[p] === '>') { break; } -              } -              if (p < end) { -                pos = p + 1; -              } else { -                state.noMoreGT = true; -              } -            } -            if (state.noMoreGT) { -              if (h.pcdata) { -                h.pcdata('<!', param, continuationMarker, -                  continuationMaker(h, parts, pos, state, param)); -              } -            } -          } -          break; -        case '<?': -          // similar to noMoreEndComment logic -          if (!state.noMoreGT) { -            for (p = pos + 1; p < end; p++) { -              if (parts[p] === '>') { break; } -            } -            if (p < end) { -              pos = p + 1; -            } else { -              state.noMoreGT = true; -            } -          } -          if (state.noMoreGT) { -            if (h.pcdata) { -              h.pcdata('<?', param, continuationMarker, -                continuationMaker(h, parts, pos, state, param)); -            } -          } -          break; -        case '>': -          if (h.pcdata) { -            h.pcdata(">", param, continuationMarker, -              continuationMaker(h, parts, pos, state, param)); -          } -          break; -        case '': -          break; -        default: -          if (h.pcdata) { -            h.pcdata(current, param, continuationMarker, -              continuationMaker(h, parts, pos, state, param)); -          } -          break; -        } -      } -      if (h.endDoc) { h.endDoc(param); } -    } catch (e) { -      if (e !== continuationMarker) { throw e; } -    } -  } - -  // Split str into parts for the html parser. -  function htmlSplit(str) { -    // can't hoist this out of the function because of the re.exec loop. -    var re = /(<\/|<\!--|<[!?]|[&<>])/g; -    str += ''; -    if (splitWillCapture) { -      return str.split(re); -    } else { -      var parts = []; -      var lastPos = 0; -      var m; -      while ((m = re.exec(str)) !== null) { -        parts.push(str.substring(lastPos, m.index)); -        parts.push(m[0]); -        lastPos = m.index + m[0].length; -      } -      parts.push(str.substring(lastPos)); -      return parts; -    } -  } - -  function parseEndTag(parts, pos, h, param, continuationMarker, state) { -    var tag = parseTagAndAttrs(parts, pos); -    // drop unclosed tags -    if (!tag) { return parts.length; } -    if (h.endTag) { -      h.endTag(tag.name, param, continuationMarker, -        continuationMaker(h, parts, pos, state, param)); -    } -    return tag.next; -  } - -  function parseStartTag(parts, pos, h, param, continuationMarker, state) { -    var tag = parseTagAndAttrs(parts, pos); -    // drop unclosed tags -    if (!tag) { return parts.length; } -    if (h.startTag) { -      h.startTag(tag.name, tag.attrs, param, continuationMarker, -        continuationMaker(h, parts, tag.next, state, param)); -    } -    // tags like <script> and <textarea> have special parsing -    if (tag.eflags & EFLAGS_TEXT) { -      return parseText(parts, tag, h, param, continuationMarker, state); -    } else { -      return tag.next; -    } -  } - -  var endTagRe = {}; - -  // Tags like <script> and <textarea> are flagged as CDATA or RCDATA, -  // which means everything is text until we see the correct closing tag. -  function parseText(parts, tag, h, param, continuationMarker, state) { -    var end = parts.length; -    if (!endTagRe.hasOwnProperty(tag.name)) { -      endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i'); -    } -    var re = endTagRe[tag.name]; -    var first = tag.next; -    var p = tag.next + 1; -    for (; p < end; p++) { -      if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; } -    } -    if (p < end) { p -= 1; } -    var buf = parts.slice(first, p).join(''); -    if (tag.eflags & html4.eflags['CDATA']) { -      if (h.cdata) { -        h.cdata(buf, param, continuationMarker, -          continuationMaker(h, parts, p, state, param)); -      } -    } else if (tag.eflags & html4.eflags['RCDATA']) { -      if (h.rcdata) { -        h.rcdata(normalizeRCData(buf), param, continuationMarker, -          continuationMaker(h, parts, p, state, param)); -      } -    } else { -      throw new Error('bug'); -    } -    return p; -  } - -  // at this point, parts[pos-1] is either "<" or "<\/". -  function parseTagAndAttrs(parts, pos) { -    var m = /^([-\w:]+)/.exec(parts[pos]); -    var tag = {}; -    tag.name = m[1].toLowerCase(); -    tag.eflags = html4.ELEMENTS[tag.name]; -    var buf = parts[pos].substr(m[0].length); -    // Find the next '>'.  We optimistically assume this '>' is not in a -    // quoted context, and further down we fix things up if it turns out to -    // be quoted. -    var p = pos + 1; -    var end = parts.length; -    for (; p < end; p++) { -      if (parts[p] === '>') { break; } -      buf += parts[p]; -    } -    if (end <= p) { return void 0; } -    var attrs = []; -    while (buf !== '') { -      m = ATTR_RE.exec(buf); -      if (!m) { -        // No attribute found: skip garbage -        buf = buf.replace(/^[\s\S][^a-z\s]*/, ''); - -      } else if ((m[4] && !m[5]) || (m[6] && !m[7])) { -        // Unterminated quote: slurp to the next unquoted '>' -        var quote = m[4] || m[6]; -        var sawQuote = false; -        var abuf = [buf, parts[p++]]; -        for (; p < end; p++) { -          if (sawQuote) { -            if (parts[p] === '>') { break; } -          } else if (0 <= parts[p].indexOf(quote)) { -            sawQuote = true; -          } -          abuf.push(parts[p]); -        } -        // Slurp failed: lose the garbage -        if (end <= p) { break; } -        // Otherwise retry attribute parsing -        buf = abuf.join(''); -        continue; - -      } else { -        // We have an attribute -        var aName = m[1].toLowerCase(); -        var aValue = m[2] ? decodeValue(m[3]) : ''; -        attrs.push(aName, aValue); -        buf = buf.substr(m[0].length); -      } -    } -    tag.attrs = attrs; -    tag.next = p + 1; -    return tag; -  } - -  function decodeValue(v) { -    var q = v.charCodeAt(0); -    if (q === 0x22 || q === 0x27) { // " or ' -      v = v.substr(1, v.length - 2); -    } -    return unescapeEntities(stripNULs(v)); -  } - -  /** -   * Returns a function that strips unsafe tags and attributes from html. -   * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy -   *     A function that takes (tagName, attribs[]), where tagName is a key in -   *     html4.ELEMENTS and attribs is an array of alternating attribute names -   *     and values.  It should return a record (as follows), or null to delete -   *     the element.  It's okay for tagPolicy to modify the attribs array, -   *     but the same array is reused, so it should not be held between calls. -   *     Record keys: -   *        attribs: (required) Sanitized attributes array. -   *        tagName: Replacement tag name. -   * @return {function(string, Array)} A function that sanitizes a string of -   *     HTML and appends result strings to the second argument, an array. -   */ -  function makeHtmlSanitizer(tagPolicy) { -    var stack; -    var ignoring; -    var emit = function (text, out) { -      if (!ignoring) { out.push(text); } -    }; -    return makeSaxParser({ -      'startDoc': function(_) { -        stack = []; -        ignoring = false; -      }, -      'startTag': function(tagNameOrig, attribs, out) { -        if (ignoring) { return; } -        if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; } -        var eflagsOrig = html4.ELEMENTS[tagNameOrig]; -        if (eflagsOrig & html4.eflags['FOLDABLE']) { -          return; -        } - -        var decision = tagPolicy(tagNameOrig, attribs); -        if (!decision) { -          ignoring = !(eflagsOrig & html4.eflags['EMPTY']); -          return; -        } else if (typeof decision !== 'object') { -          throw new Error('tagPolicy did not return object (old API?)'); -        } -        if ('attribs' in decision) { -          attribs = decision['attribs']; -        } else { -          throw new Error('tagPolicy gave no attribs'); -        } -        var eflagsRep; -        var tagNameRep; -        if ('tagName' in decision) { -          tagNameRep = decision['tagName']; -          eflagsRep = html4.ELEMENTS[tagNameRep]; -        } else { -          tagNameRep = tagNameOrig; -          eflagsRep = eflagsOrig; -        } -        // TODO(mikesamuel): relying on tagPolicy not to insert unsafe -        // attribute names. - -        // If this is an optional-end-tag element and either this element or its -        // previous like sibling was rewritten, then insert a close tag to -        // preserve structure. -        if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) { -          var onStack = stack[stack.length - 1]; -          if (onStack && onStack.orig === tagNameOrig && -              (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) { -                out.push('<\/', onStack.rep, '>'); -          } -        } - -        if (!(eflagsOrig & html4.eflags['EMPTY'])) { -          stack.push({orig: tagNameOrig, rep: tagNameRep}); -        } - -        out.push('<', tagNameRep); -        for (var i = 0, n = attribs.length; i < n; i += 2) { -          var attribName = attribs[i], -              value = attribs[i + 1]; -          if (value !== null && value !== void 0) { -            out.push(' ', attribName, '="', escapeAttrib(value), '"'); -          } -        } -        out.push('>'); - -        if ((eflagsOrig & html4.eflags['EMPTY']) -            && !(eflagsRep & html4.eflags['EMPTY'])) { -          // replacement is non-empty, synthesize end tag -          out.push('<\/', tagNameRep, '>'); -        } -      }, -      'endTag': function(tagName, out) { -        if (ignoring) { -          ignoring = false; -          return; -        } -        if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; } -        var eflags = html4.ELEMENTS[tagName]; -        if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) { -          var index; -          if (eflags & html4.eflags['OPTIONAL_ENDTAG']) { -            for (index = stack.length; --index >= 0;) { -              var stackElOrigTag = stack[index].orig; -              if (stackElOrigTag === tagName) { break; } -              if (!(html4.ELEMENTS[stackElOrigTag] & -                    html4.eflags['OPTIONAL_ENDTAG'])) { -                // Don't pop non optional end tags looking for a match. -                return; -              } -            } -          } else { -            for (index = stack.length; --index >= 0;) { -              if (stack[index].orig === tagName) { break; } -            } -          } -          if (index < 0) { return; }  // Not opened. -          for (var i = stack.length; --i > index;) { -            var stackElRepTag = stack[i].rep; -            if (!(html4.ELEMENTS[stackElRepTag] & -                  html4.eflags['OPTIONAL_ENDTAG'])) { -              out.push('<\/', stackElRepTag, '>'); -            } -          } -          if (index < stack.length) { -            tagName = stack[index].rep; -          } -          stack.length = index; -          out.push('<\/', tagName, '>'); -        } -      }, -      'pcdata': emit, -      'rcdata': emit, -      'cdata': emit, -      'endDoc': function(out) { -        for (; stack.length; stack.length--) { -          out.push('<\/', stack[stack.length - 1].rep, '>'); -        } -      } -    }); -  } - -  var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i; - -  function safeUri(uri, effect, ltype, hints, naiveUriRewriter) { -    if (!naiveUriRewriter) { return null; } -    try { -      var parsed = URI.parse('' + uri); -      if (parsed) { -        if (!parsed.hasScheme() || -            ALLOWED_URI_SCHEMES.test(parsed.getScheme())) { -          var safe = naiveUriRewriter(parsed, effect, ltype, hints); -          return safe ? safe.toString() : null; -        } -      } -    } catch (e) { -      return null; -    } -    return null; -  } - -  function log(logger, tagName, attribName, oldValue, newValue) { -    if (!attribName) { -      logger(tagName + " removed", { -        change: "removed", -        tagName: tagName -      }); -    } -    if (oldValue !== newValue) { -      var changed = "changed"; -      if (oldValue && !newValue) { -        changed = "removed"; -      } else if (!oldValue && newValue)  { -        changed = "added"; -      } -      logger(tagName + "." + attribName + " " + changed, { -        change: changed, -        tagName: tagName, -        attribName: attribName, -        oldValue: oldValue, -        newValue: newValue -      }); -    } -  } - -  function lookupAttribute(map, tagName, attribName) { -    var attribKey; -    attribKey = tagName + '::' + attribName; -    if (map.hasOwnProperty(attribKey)) { -      return map[attribKey]; -    } -    attribKey = '*::' + attribName; -    if (map.hasOwnProperty(attribKey)) { -      return map[attribKey]; -    } -    return void 0; -  } -  function getAttributeType(tagName, attribName) { -    return lookupAttribute(html4.ATTRIBS, tagName, attribName); -  } -  function getLoaderType(tagName, attribName) { -    return lookupAttribute(html4.LOADERTYPES, tagName, attribName); -  } -  function getUriEffect(tagName, attribName) { -    return lookupAttribute(html4.URIEFFECTS, tagName, attribName); -  } - -  /** -   * Sanitizes attributes on an HTML tag. -   * @param {string} tagName An HTML tag name in lowercase. -   * @param {Array.<?string>} attribs An array of alternating names and values. -   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to -   *     apply to URI attributes; it can return a new string value, or null to -   *     delete the attribute.  If unspecified, URI attributes are deleted. -   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply -   *     to attributes containing HTML names, element IDs, and space-separated -   *     lists of classes; it can return a new string value, or null to delete -   *     the attribute.  If unspecified, these attributes are kept unchanged. -   * @return {Array.<?string>} The sanitized attributes as a list of alternating -   *     names and values, where a null value means to omit the attribute. -   */ -  function sanitizeAttribs(tagName, attribs, -    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { -    // TODO(felix8a): it's obnoxious that domado duplicates much of this -    // TODO(felix8a): maybe consistently enforce constraints like target= -    for (var i = 0; i < attribs.length; i += 2) { -      var attribName = attribs[i]; -      var value = attribs[i + 1]; -      var oldValue = value; -      var atype = null, attribKey; -      if ((attribKey = tagName + '::' + attribName, -           html4.ATTRIBS.hasOwnProperty(attribKey)) || -          (attribKey = '*::' + attribName, -           html4.ATTRIBS.hasOwnProperty(attribKey))) { -        atype = html4.ATTRIBS[attribKey]; -      } -      if (atype !== null) { -        switch (atype) { -          case html4.atype['NONE']: break; -          case html4.atype['SCRIPT']: -            value = null; -            if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -          case html4.atype['STYLE']: -            if ('undefined' === typeof parseCssDeclarations) { -              value = null; -              if (opt_logger) { -                log(opt_logger, tagName, attribName, oldValue, value); -	      } -              break; -            } -            var sanitizedDeclarations = []; -            parseCssDeclarations( -                value, -                { -                  'declaration': function (property, tokens) { -                    var normProp = property.toLowerCase(); -                    sanitizeCssProperty( -                        normProp, tokens, -                        opt_naiveUriRewriter -                        ? function (url) { -                            return safeUri( -                                url, html4.ueffects.SAME_DOCUMENT, -                                html4.ltypes.SANDBOXED, -                                { -                                  "TYPE": "CSS", -                                  "CSS_PROP": normProp -                                }, opt_naiveUriRewriter); -                          } -                        : null); -                    if (tokens.length) { -                      sanitizedDeclarations.push( -                          normProp + ': ' + tokens.join(' ')); -                    } -                  } -                }); -            value = sanitizedDeclarations.length > 0 ? -              sanitizedDeclarations.join(' ; ') : null; -            if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -          case html4.atype['ID']: -          case html4.atype['IDREF']: -          case html4.atype['IDREFS']: -          case html4.atype['GLOBAL_NAME']: -          case html4.atype['LOCAL_NAME']: -          case html4.atype['CLASSES']: -            value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; -            if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -          case html4.atype['URI']: -            value = safeUri(value, -              getUriEffect(tagName, attribName), -              getLoaderType(tagName, attribName), -              { -                "TYPE": "MARKUP", -                "XML_ATTR": attribName, -                "XML_TAG": tagName -              }, opt_naiveUriRewriter); -              if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -          case html4.atype['URI_FRAGMENT']: -            if (value && '#' === value.charAt(0)) { -              value = value.substring(1);  // remove the leading '#' -              value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; -              if (value !== null && value !== void 0) { -                value = '#' + value;  // restore the leading '#' -              } -            } else { -              value = null; -            } -            if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -          default: -            value = null; -            if (opt_logger) { -              log(opt_logger, tagName, attribName, oldValue, value); -            } -            break; -        } -      } else { -        value = null; -        if (opt_logger) { -          log(opt_logger, tagName, attribName, oldValue, value); -        } -      } -      attribs[i + 1] = value; -    } -    return attribs; -  } - -  /** -   * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js -   * and applies the default attribute sanitizer with the supplied policy for -   * URI attributes and NMTOKEN attributes. -   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to -   *     apply to URI attributes.  If not given, URI attributes are deleted. -   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply -   *     to attributes containing HTML names, element IDs, and space-separated -   *     lists of classes.  If not given, such attributes are left unchanged. -   * @return {function(string, Array.<?string>)} A tagPolicy suitable for -   *     passing to html.sanitize. -   */ -  function makeTagPolicy( -    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { -    return function(tagName, attribs) { -      if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) { -        return { -          'attribs': sanitizeAttribs(tagName, attribs, -            opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) -        }; -      } else { -        if (opt_logger) { -          log(opt_logger, tagName, undefined, undefined, undefined); -        } -      } -    }; -  } - -  /** -   * Sanitizes HTML tags and attributes according to a given policy. -   * @param {string} inputHtml The HTML to sanitize. -   * @param {function(string, Array.<?string>)} tagPolicy A function that -   *     decides which tags to accept and sanitizes their attributes (see -   *     makeHtmlSanitizer above for details). -   * @return {string} The sanitized HTML. -   */ -  function sanitizeWithPolicy(inputHtml, tagPolicy) { -    var outputArray = []; -    makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray); -    return outputArray.join(''); -  } - -  /** -   * Strips unsafe tags and attributes from HTML. -   * @param {string} inputHtml The HTML to sanitize. -   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to -   *     apply to URI attributes.  If not given, URI attributes are deleted. -   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply -   *     to attributes containing HTML names, element IDs, and space-separated -   *     lists of classes.  If not given, such attributes are left unchanged. -   */ -  function sanitize(inputHtml, -    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) { -    var tagPolicy = makeTagPolicy( -      opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger); -    return sanitizeWithPolicy(inputHtml, tagPolicy); -  } - -  // Export both quoted and unquoted names for Closure linkage. -  var html = {}; -  html.escapeAttrib = html['escapeAttrib'] = escapeAttrib; -  html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer; -  html.makeSaxParser = html['makeSaxParser'] = makeSaxParser; -  html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy; -  html.normalizeRCData = html['normalizeRCData'] = normalizeRCData; -  html.sanitize = html['sanitize'] = sanitize; -  html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs; -  html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy; -  html.unescapeEntities = html['unescapeEntities'] = unescapeEntities; -  return html; -})(html4); - -var html_sanitize = html['sanitize']; - -return { -  html: html -}; -}); diff --git a/web-ui/app/js/lib/html_whitelister.js b/web-ui/app/js/lib/html_whitelister.js deleted file mode 100644 index 22841cce..00000000 --- a/web-ui/app/js/lib/html_whitelister.js +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 ThoughtWorks, Inc. - * - * Pixelated is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pixelated is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with Pixelated. If not, see <http://www.gnu.org/licenses/>. - */ - -'use strict'; - -define(['lib/html-sanitizer'], function (htmlSanitizer) { -  var tagAndAttributeWhitelist = { -    'p': ['style'], -    'div': ['style'], -    'a': ['href', 'style'], -    'span': ['style'], -    'font': ['face', 'size', 'style'], -    'img': ['title'], -    'em': [], -    'b': [], -    'i': [], -    'strong': ['style'], -    'table': ['style'], -    'tr': ['style'], -    'td': ['style'], -    'th': ['style'], -    'tbody': ['style'], -    'thead': ['style'], -    'dt': ['style'], -    'dd': ['style'], -    'dl': ['style'], -    'h1': ['style'], -    'h2': ['style'], -    'h3': ['style'], -    'h4': ['style'], -    'h5': ['style'], -    'h6': ['style'], -    'br': [], -    'blockquote': ['style'], -    'label': ['style'], -    'form': ['style'], -    'ol': ['style'], -    'ul': ['style'], -    'li': ['style'], -    'input': ['style', 'type', 'name', 'value'] -  }; - -  function filterAllowedAttributes (tagName, attributes) { -    var i, attributesAndValues = []; - -    for (i = 0; i < attributes.length; i++) { -      if (tagAndAttributeWhitelist[tagName] && -        _.contains(tagAndAttributeWhitelist[tagName], attributes[i])) { -        attributesAndValues.push(attributes[i]); -        attributesAndValues.push(attributes[i+1]); -      } -    } - -    return attributesAndValues; -  } - -  function tagPolicy (tagName, attributes) { -    if (!tagAndAttributeWhitelist[tagName]) { -      return null; -    } - -    return { -      tagName: tagName, -      attribs: filterAllowedAttributes(tagName, attributes) -    }; -  } - -  return { -    tagPolicy: tagPolicy, -    sanitize: htmlSanitizer.html.sanitizeWithPolicy -  }; -});  | 
