issue #617: Remove old html whitelister

author: Felix Hammerl <fhammerl@thoughtworks.com> 2016-02-24 10:30:06 +0100
committer: Felix Hammerl <fhammerl@thoughtworks.com> 2016-02-24 10:30:06 +0100
commit: 0f9c1e66c9ab6b8f037436ffcb45d71f92d9b613 (patch)
tree: 6ee182a9d48f30110b68f00f57dd6d46c42fea5b /web-ui
parent: 77ec41bb6f542077503106cacc1dbd28118c50b4 (diff)
2 files changed, 0 insertions, 1150 deletions
diff --git a/web-ui/app/js/lib/html-sanitizer.js b/web-ui/app/js/lib/html-sanitizer.js
deleted file mode 100644
index 80fb0041..00000000
--- a/web-ui/app/js/lib/html-sanitizer.js
+++ /dev/null
@@ -1,1064 +0,0 @@
-// Copyright (C) 2006 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/**
- * @fileoverview
- * An HTML sanitizer that can satisfy a variety of security policies.
- *
- * <p>
- * The HTML sanitizer is built around a SAX parser and HTML element and
- * attributes schemas.
- *
- * If the cssparser is loaded, inline styles are sanitized using the
- * css property and value schemas.  Else they are remove during
- * sanitization.
- *
- * If it exists, uses parseCssDeclarations, sanitizeCssProperty,  cssSchema
- *
- * @author mikesamuel@gmail.com
- * @author jasvir@gmail.com
- * \@requires html4, URI
- * \@overrides window
- * \@provides html, html_sanitize
- */
-
-// The Turkish i seems to be a non-issue, but abort in case it is.
-if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; }
-
-/**
- * \@namespace
- */
-define(['lib/html4-defs'], function (html4) {
-var html = (function(html4) {
-
-  // For closure compiler
-  var parseCssDeclarations, sanitizeCssProperty, cssSchema;
-  if ('undefined' !== typeof window) {
-    parseCssDeclarations = window['parseCssDeclarations'];
-    sanitizeCssProperty = window['sanitizeCssProperty'];
-    cssSchema = window['cssSchema'];
-  }
-
-  // The keys of this object must be 'quoted' or JSCompiler will mangle them!
-  // This is a partial list -- lookupEntity() uses the host browser's parser
-  // (when available) to implement full entity lookup.
-  // Note that entities are in general case-sensitive; the uppercase ones are
-  // explicitly defined by HTML5 (presumably as compatibility).
-  var ENTITIES = {
-    'lt': '<',
-    'LT': '<',
-    'gt': '>',
-    'GT': '>',
-    'amp': '&',
-    'AMP': '&',
-    'quot': '"',
-    'apos': '\'',
-    'nbsp': '\240'
-  };
-
-  // Patterns for types of entity/character reference names.
-  var decimalEscapeRe = /^#(\d+)$/;
-  var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/;
-  // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html
-  var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/;
-  // Used as a hook to invoke the browser's entity parsing. <textarea> is used
-  // because its content is parsed for entities but not tags.
-  // TODO(kpreid): This retrieval is a kludge and leads to silent loss of
-  // functionality if the document isn't available.
-  var entityLookupElement =
-      ('undefined' !== typeof window && window['document'])
-          ? window['document'].createElement('textarea') : null;
-  /**
-   * Decodes an HTML entity.
-   *
-   * {\@updoc
-   * $ lookupEntity('lt')
-   * # '<'
-   * $ lookupEntity('GT')
-   * # '>'
-   * $ lookupEntity('amp')
-   * # '&'
-   * $ lookupEntity('nbsp')
-   * # '\xA0'
-   * $ lookupEntity('apos')
-   * # "'"
-   * $ lookupEntity('quot')
-   * # '"'
-   * $ lookupEntity('#xa')
-   * # '\n'
-   * $ lookupEntity('#10')
-   * # '\n'
-   * $ lookupEntity('#x0a')
-   * # '\n'
-   * $ lookupEntity('#010')
-   * # '\n'
-   * $ lookupEntity('#x00A')
-   * # '\n'
-   * $ lookupEntity('Pi')      // Known failure
-   * # '\u03A0'
-   * $ lookupEntity('pi')      // Known failure
-   * # '\u03C0'
-   * }
-   *
-   * @param {string} name the content between the '&' and the ';'.
-   * @return {string} a single unicode code-point as a string.
-   */
-  function lookupEntity(name) {
-    // TODO: entity lookup as specified by HTML5 actually depends on the
-    // presence of the ";".
-    if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
-    var m = name.match(decimalEscapeRe);
-    if (m) {
-      return String.fromCharCode(parseInt(m[1], 10));
-    } else if (!!(m = name.match(hexEscapeRe))) {
-      return String.fromCharCode(parseInt(m[1], 16));
-    } else if (entityLookupElement && safeEntityNameRe.test(name)) {
-      entityLookupElement.innerHTML = '&' + name + ';';
-      var text = entityLookupElement.textContent;
-      ENTITIES[name] = text;
-      return text;
-    } else {
-      return '&' + name + ';';
-    }
-  }
-
-  function decodeOneEntity(_, name) {
-    return lookupEntity(name);
-  }
-
-  var nulRe = /\0/g;
-  function stripNULs(s) {
-    return s.replace(nulRe, '');
-  }
-
-  var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g;
-  var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/;
-  /**
-   * The plain text of a chunk of HTML CDATA which possibly containing.
-   *
-   * {\@updoc
-   * $ unescapeEntities('')
-   * # ''
-   * $ unescapeEntities('hello World!')
-   * # 'hello World!'
-   * $ unescapeEntities('1 &lt; 2 &amp;&AMP; 4 &gt; 3&#10;')
-   * # '1 < 2 && 4 > 3\n'
-   * $ unescapeEntities('&lt;&lt <- unfinished entity&gt;')
-   * # '<&lt <- unfinished entity>'
-   * $ unescapeEntities('/foo?bar=baz&copy=true')  // & often unescaped in URLS
-   * # '/foo?bar=baz&copy=true'
-   * $ unescapeEntities('pi=&pi;&#x3c0;, Pi=&Pi;\u03A0') // FIXME: known failure
-   * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0'
-   * }
-   *
-   * @param {string} s a chunk of HTML CDATA.  It must not start or end inside
-   *     an HTML entity.
-   */
-  function unescapeEntities(s) {
-    return s.replace(ENTITY_RE_1, decodeOneEntity);
-  }
-
-  var ampRe = /&/g;
-  var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
-  var ltRe = /[<]/g;
-  var gtRe = />/g;
-  var quotRe = /\"/g;
-
-  /**
-   * Escapes HTML special characters in attribute values.
-   *
-   * {\@updoc
-   * $ escapeAttrib('')
-   * # ''
-   * $ escapeAttrib('"<<&==&>>"')  // Do not just escape the first occurrence.
-   * # '&#34;&lt;&lt;&amp;&#61;&#61;&amp;&gt;&gt;&#34;'
-   * $ escapeAttrib('Hello <World>!')
-   * # 'Hello &lt;World&gt;!'
-   * }
-   */
-  function escapeAttrib(s) {
-    return ('' + s).replace(ampRe, '&amp;').replace(ltRe, '&lt;')
-        .replace(gtRe, '&gt;').replace(quotRe, '&#34;');
-  }
-
-  /**
-   * Escape entities in RCDATA that can be escaped without changing the meaning.
-   * {\@updoc
-   * $ normalizeRCData('1 < 2 &&amp; 3 > 4 &amp;& 5 &lt; 7&8')
-   * # '1 &lt; 2 &amp;&amp; 3 &gt; 4 &amp;&amp; 5 &lt; 7&amp;8'
-   * }
-   */
-  function normalizeRCData(rcdata) {
-    return rcdata
-        .replace(looseAmpRe, '&amp;$1')
-        .replace(ltRe, '&lt;')
-        .replace(gtRe, '&gt;');
-  }
-
-  // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at
-  // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
-  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
-  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
-  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
-
-  // We initially split input so that potentially meaningful characters
-  // like '<' and '>' are separate tokens, using a fast dumb process that
-  // ignores quoting.  Then we walk that token stream, and when we see a
-  // '<' that's the start of a tag, we use ATTR_RE to extract tag
-  // attributes from the next token.  That token will never have a '>'
-  // character.  However, it might have an unbalanced quote character, and
-  // when we see that, we combine additional tokens to balance the quote.
-
-  var ATTR_RE = new RegExp(
-    '^\\s*' +
-    '([-.:\\w]+)' +             // 1 = Attribute name
-    '(?:' + (
-      '\\s*(=)\\s*' +           // 2 = Is there a value?
-      '(' + (                   // 3 = Attribute value
-        // TODO(felix8a): maybe use backref to match quotes
-        '(\")[^\"]*(\"|$)' +    // 4, 5 = Double-quoted string
-        '|' +
-        '(\')[^\']*(\'|$)' +    // 6, 7 = Single-quoted string
-        '|' +
-        // Positive lookahead to prevent interpretation of
-        // <foo a= b=c> as <foo a='b=c'>
-        // TODO(felix8a): might be able to drop this case
-        '(?=[a-z][-\\w]*\\s*=)' +
-        '|' +
-        // Unquoted value that isn't an attribute name
-        // (since we didn't match the positive lookahead above)
-        '[^\"\'\\s]*' ) +
-      ')' ) +
-    ')?',
-    'i');
-
-  // false on IE<=8, true on most other browsers
-  var splitWillCapture = ('a,b'.split(/(,)/).length === 3);
-
-  // bitmask for tags with special parsing, like <script> and <textarea>
-  var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA'];
-
-  /**
-   * Given a SAX-like event handler, produce a function that feeds those
-   * events and a parameter to the event handler.
-   *
-   * The event handler has the form:{@code
-   * {
-   *   // Name is an upper-case HTML tag name.  Attribs is an array of
-   *   // alternating upper-case attribute names, and attribute values.  The
-   *   // attribs array is reused by the parser.  Param is the value passed to
-   *   // the saxParser.
-   *   startTag: function (name, attribs, param) { ... },
-   *   endTag:   function (name, param) { ... },
-   *   pcdata:   function (text, param) { ... },
-   *   rcdata:   function (text, param) { ... },
-   *   cdata:    function (text, param) { ... },
-   *   startDoc: function (param) { ... },
-   *   endDoc:   function (param) { ... }
-   * }}
-   *
-   * @param {Object} handler a record containing event handlers.
-   * @return {function(string, Object)} A function that takes a chunk of HTML
-   *     and a parameter.  The parameter is passed on to the handler methods.
-   */
-  function makeSaxParser(handler) {
-    // Accept quoted or unquoted keys (Closure compat)
-    var hcopy = {
-      cdata: handler.cdata || handler['cdata'],
-      comment: handler.comment || handler['comment'],
-      endDoc: handler.endDoc || handler['endDoc'],
-      endTag: handler.endTag || handler['endTag'],
-      pcdata: handler.pcdata || handler['pcdata'],
-      rcdata: handler.rcdata || handler['rcdata'],
-      startDoc: handler.startDoc || handler['startDoc'],
-      startTag: handler.startTag || handler['startTag']
-    };
-    return function(htmlText, param) {
-      return parse(htmlText, hcopy, param);
-    };
-  }
-
-  // Parsing strategy is to split input into parts that might be lexically
-  // meaningful (every ">" becomes a separate part), and then recombine
-  // parts if we discover they're in a different context.
-
-  // TODO(felix8a): Significant performance regressions from -legacy,
-  // tested on
-  //    Chrome 18.0
-  //    Firefox 11.0
-  //    IE 6, 7, 8, 9
-  //    Opera 11.61
-  //    Safari 5.1.3
-  // Many of these are unusual patterns that are linearly slower and still
-  // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing.
-
-  // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all
-  // browsers.  The hotspot is htmlSplit.
-
-  // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers.
-  // This is partly htmlSplit, but the hotspot is parseTagAndAttrs.
-
-  // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9.
-  // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster.
-
-  // TODO(felix8a): "<p<p<p..." is slower on IE[6-8]
-
-  var continuationMarker = {};
-  function parse(htmlText, handler, param) {
-    var m, p, tagName;
-    var parts = htmlSplit(htmlText);
-    var state = {
-      noMoreGT: false,
-      noMoreEndComments: false
-    };
-    parseCPS(handler, parts, 0, state, param);
-  }
-
-  function continuationMaker(h, parts, initial, state, param) {
-    return function () {
-      parseCPS(h, parts, initial, state, param);
-    };
-  }
-
-  function parseCPS(h, parts, initial, state, param) {
-    try {
-      if (h.startDoc && initial == 0) { h.startDoc(param); }
-      var m, p, tagName;
-      for (var pos = initial, end = parts.length; pos < end;) {
-        var current = parts[pos++];
-        var next = parts[pos];
-        switch (current) {
-        case '&':
-          if (ENTITY_RE_2.test(next)) {
-            if (h.pcdata) {
-              h.pcdata('&' + next, param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-            pos++;
-          } else {
-            if (h.pcdata) { h.pcdata("&amp;", param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          }
-          break;
-        case '<\/':
-          if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) {
-            if (m[0].length === next.length && parts[pos + 1] === '>') {
-              // fast case, no attribute parsing needed
-              pos += 2;
-              tagName = m[1].toLowerCase();
-              if (h.endTag) {
-                h.endTag(tagName, param, continuationMarker,
-                  continuationMaker(h, parts, pos, state, param));
-              }
-            } else {
-              // slow case, need to parse attributes
-              // TODO(felix8a): do we really care about misparsing this?
-              pos = parseEndTag(
-                parts, pos, h, param, continuationMarker, state);
-            }
-          } else {
-            if (h.pcdata) {
-              h.pcdata('&lt;/', param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          }
-          break;
-        case '<':
-          if (m = /^([-\w:]+)\s*\/?/.exec(next)) {
-            if (m[0].length === next.length && parts[pos + 1] === '>') {
-              // fast case, no attribute parsing needed
-              pos += 2;
-              tagName = m[1].toLowerCase();
-              if (h.startTag) {
-                h.startTag(tagName, [], param, continuationMarker,
-                  continuationMaker(h, parts, pos, state, param));
-              }
-              // tags like <script> and <textarea> have special parsing
-              var eflags = html4.ELEMENTS[tagName];
-              if (eflags & EFLAGS_TEXT) {
-                var tag = { name: tagName, next: pos, eflags: eflags };
-                pos = parseText(
-                  parts, tag, h, param, continuationMarker, state);
-              }
-            } else {
-              // slow case, need to parse attributes
-              pos = parseStartTag(
-                parts, pos, h, param, continuationMarker, state);
-            }
-          } else {
-            if (h.pcdata) {
-              h.pcdata('&lt;', param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          }
-          break;
-        case '<\!--':
-          // The pathological case is n copies of '<\!--' without '-->', and
-          // repeated failure to find '-->' is quadratic.  We avoid that by
-          // remembering when search for '-->' fails.
-          if (!state.noMoreEndComments) {
-            // A comment <\!--x--> is split into three tokens:
-            //   '<\!--', 'x--', '>'
-            // We want to find the next '>' token that has a preceding '--'.
-            // pos is at the 'x--'.
-            for (p = pos + 1; p < end; p++) {
-              if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; }
-            }
-            if (p < end) {
-              if (h.comment) {
-                var comment = parts.slice(pos, p).join('');
-                h.comment(
-                  comment.substr(0, comment.length - 2), param,
-                  continuationMarker,
-                  continuationMaker(h, parts, p + 1, state, param));
-              }
-              pos = p + 1;
-            } else {
-              state.noMoreEndComments = true;
-            }
-          }
-          if (state.noMoreEndComments) {
-            if (h.pcdata) {
-              h.pcdata('&lt;!--', param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          }
-          break;
-        case '<\!':
-          if (!/^\w/.test(next)) {
-            if (h.pcdata) {
-              h.pcdata('&lt;!', param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          } else {
-            // similar to noMoreEndComment logic
-            if (!state.noMoreGT) {
-              for (p = pos + 1; p < end; p++) {
-                if (parts[p] === '>') { break; }
-              }
-              if (p < end) {
-                pos = p + 1;
-              } else {
-                state.noMoreGT = true;
-              }
-            }
-            if (state.noMoreGT) {
-              if (h.pcdata) {
-                h.pcdata('&lt;!', param, continuationMarker,
-                  continuationMaker(h, parts, pos, state, param));
-              }
-            }
-          }
-          break;
-        case '<?':
-          // similar to noMoreEndComment logic
-          if (!state.noMoreGT) {
-            for (p = pos + 1; p < end; p++) {
-              if (parts[p] === '>') { break; }
-            }
-            if (p < end) {
-              pos = p + 1;
-            } else {
-              state.noMoreGT = true;
-            }
-          }
-          if (state.noMoreGT) {
-            if (h.pcdata) {
-              h.pcdata('&lt;?', param, continuationMarker,
-                continuationMaker(h, parts, pos, state, param));
-            }
-          }
-          break;
-        case '>':
-          if (h.pcdata) {
-            h.pcdata("&gt;", param, continuationMarker,
-              continuationMaker(h, parts, pos, state, param));
-          }
-          break;
-        case '':
-          break;
-        default:
-          if (h.pcdata) {
-            h.pcdata(current, param, continuationMarker,
-              continuationMaker(h, parts, pos, state, param));
-          }
-          break;
-        }
-      }
-      if (h.endDoc) { h.endDoc(param); }
-    } catch (e) {
-      if (e !== continuationMarker) { throw e; }
-    }
-  }
-
-  // Split str into parts for the html parser.
-  function htmlSplit(str) {
-    // can't hoist this out of the function because of the re.exec loop.
-    var re = /(<\/|<\!--|<[!?]|[&<>])/g;
-    str += '';
-    if (splitWillCapture) {
-      return str.split(re);
-    } else {
-      var parts = [];
-      var lastPos = 0;
-      var m;
-      while ((m = re.exec(str)) !== null) {
-        parts.push(str.substring(lastPos, m.index));
-        parts.push(m[0]);
-        lastPos = m.index + m[0].length;
-      }
-      parts.push(str.substring(lastPos));
-      return parts;
-    }
-  }
-
-  function parseEndTag(parts, pos, h, param, continuationMarker, state) {
-    var tag = parseTagAndAttrs(parts, pos);
-    // drop unclosed tags
-    if (!tag) { return parts.length; }
-    if (h.endTag) {
-      h.endTag(tag.name, param, continuationMarker,
-        continuationMaker(h, parts, pos, state, param));
-    }
-    return tag.next;
-  }
-
-  function parseStartTag(parts, pos, h, param, continuationMarker, state) {
-    var tag = parseTagAndAttrs(parts, pos);
-    // drop unclosed tags
-    if (!tag) { return parts.length; }
-    if (h.startTag) {
-      h.startTag(tag.name, tag.attrs, param, continuationMarker,
-        continuationMaker(h, parts, tag.next, state, param));
-    }
-    // tags like <script> and <textarea> have special parsing
-    if (tag.eflags & EFLAGS_TEXT) {
-      return parseText(parts, tag, h, param, continuationMarker, state);
-    } else {
-      return tag.next;
-    }
-  }
-
-  var endTagRe = {};
-
-  // Tags like <script> and <textarea> are flagged as CDATA or RCDATA,
-  // which means everything is text until we see the correct closing tag.
-  function parseText(parts, tag, h, param, continuationMarker, state) {
-    var end = parts.length;
-    if (!endTagRe.hasOwnProperty(tag.name)) {
-      endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i');
-    }
-    var re = endTagRe[tag.name];
-    var first = tag.next;
-    var p = tag.next + 1;
-    for (; p < end; p++) {
-      if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; }
-    }
-    if (p < end) { p -= 1; }
-    var buf = parts.slice(first, p).join('');
-    if (tag.eflags & html4.eflags['CDATA']) {
-      if (h.cdata) {
-        h.cdata(buf, param, continuationMarker,
-          continuationMaker(h, parts, p, state, param));
-      }
-    } else if (tag.eflags & html4.eflags['RCDATA']) {
-      if (h.rcdata) {
-        h.rcdata(normalizeRCData(buf), param, continuationMarker,
-          continuationMaker(h, parts, p, state, param));
-      }
-    } else {
-      throw new Error('bug');
-    }
-    return p;
-  }
-
-  // at this point, parts[pos-1] is either "<" or "<\/".
-  function parseTagAndAttrs(parts, pos) {
-    var m = /^([-\w:]+)/.exec(parts[pos]);
-    var tag = {};
-    tag.name = m[1].toLowerCase();
-    tag.eflags = html4.ELEMENTS[tag.name];
-    var buf = parts[pos].substr(m[0].length);
-    // Find the next '>'.  We optimistically assume this '>' is not in a
-    // quoted context, and further down we fix things up if it turns out to
-    // be quoted.
-    var p = pos + 1;
-    var end = parts.length;
-    for (; p < end; p++) {
-      if (parts[p] === '>') { break; }
-      buf += parts[p];
-    }
-    if (end <= p) { return void 0; }
-    var attrs = [];
-    while (buf !== '') {
-      m = ATTR_RE.exec(buf);
-      if (!m) {
-        // No attribute found: skip garbage
-        buf = buf.replace(/^[\s\S][^a-z\s]*/, '');
-
-      } else if ((m[4] && !m[5]) || (m[6] && !m[7])) {
-        // Unterminated quote: slurp to the next unquoted '>'
-        var quote = m[4] || m[6];
-        var sawQuote = false;
-        var abuf = [buf, parts[p++]];
-        for (; p < end; p++) {
-          if (sawQuote) {
-            if (parts[p] === '>') { break; }
-          } else if (0 <= parts[p].indexOf(quote)) {
-            sawQuote = true;
-          }
-          abuf.push(parts[p]);
-        }
-        // Slurp failed: lose the garbage
-        if (end <= p) { break; }
-        // Otherwise retry attribute parsing
-        buf = abuf.join('');
-        continue;
-
-      } else {
-        // We have an attribute
-        var aName = m[1].toLowerCase();
-        var aValue = m[2] ? decodeValue(m[3]) : '';
-        attrs.push(aName, aValue);
-        buf = buf.substr(m[0].length);
-      }
-    }
-    tag.attrs = attrs;
-    tag.next = p + 1;
-    return tag;
-  }
-
-  function decodeValue(v) {
-    var q = v.charCodeAt(0);
-    if (q === 0x22 || q === 0x27) { // " or '
-      v = v.substr(1, v.length - 2);
-    }
-    return unescapeEntities(stripNULs(v));
-  }
-
-  /**
-   * Returns a function that strips unsafe tags and attributes from html.
-   * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy
-   *     A function that takes (tagName, attribs[]), where tagName is a key in
-   *     html4.ELEMENTS and attribs is an array of alternating attribute names
-   *     and values.  It should return a record (as follows), or null to delete
-   *     the element.  It's okay for tagPolicy to modify the attribs array,
-   *     but the same array is reused, so it should not be held between calls.
-   *     Record keys:
-   *        attribs: (required) Sanitized attributes array.
-   *        tagName: Replacement tag name.
-   * @return {function(string, Array)} A function that sanitizes a string of
-   *     HTML and appends result strings to the second argument, an array.
-   */
-  function makeHtmlSanitizer(tagPolicy) {
-    var stack;
-    var ignoring;
-    var emit = function (text, out) {
-      if (!ignoring) { out.push(text); }
-    };
-    return makeSaxParser({
-      'startDoc': function(_) {
-        stack = [];
-        ignoring = false;
-      },
-      'startTag': function(tagNameOrig, attribs, out) {
-        if (ignoring) { return; }
-        if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; }
-        var eflagsOrig = html4.ELEMENTS[tagNameOrig];
-        if (eflagsOrig & html4.eflags['FOLDABLE']) {
-          return;
-        }
-
-        var decision = tagPolicy(tagNameOrig, attribs);
-        if (!decision) {
-          ignoring = !(eflagsOrig & html4.eflags['EMPTY']);
-          return;
-        } else if (typeof decision !== 'object') {
-          throw new Error('tagPolicy did not return object (old API?)');
-        }
-        if ('attribs' in decision) {
-          attribs = decision['attribs'];
-        } else {
-          throw new Error('tagPolicy gave no attribs');
-        }
-        var eflagsRep;
-        var tagNameRep;
-        if ('tagName' in decision) {
-          tagNameRep = decision['tagName'];
-          eflagsRep = html4.ELEMENTS[tagNameRep];
-        } else {
-          tagNameRep = tagNameOrig;
-          eflagsRep = eflagsOrig;
-        }
-        // TODO(mikesamuel): relying on tagPolicy not to insert unsafe
-        // attribute names.
-
-        // If this is an optional-end-tag element and either this element or its
-        // previous like sibling was rewritten, then insert a close tag to
-        // preserve structure.
-        if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) {
-          var onStack = stack[stack.length - 1];
-          if (onStack && onStack.orig === tagNameOrig &&
-              (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) {
-                out.push('<\/', onStack.rep, '>');
-          }
-        }
-
-        if (!(eflagsOrig & html4.eflags['EMPTY'])) {
-          stack.push({orig: tagNameOrig, rep: tagNameRep});
-        }
-
-        out.push('<', tagNameRep);
-        for (var i = 0, n = attribs.length; i < n; i += 2) {
-          var attribName = attribs[i],
-              value = attribs[i + 1];
-          if (value !== null && value !== void 0) {
-            out.push(' ', attribName, '="', escapeAttrib(value), '"');
-          }
-        }
-        out.push('>');
-
-        if ((eflagsOrig & html4.eflags['EMPTY'])
-            && !(eflagsRep & html4.eflags['EMPTY'])) {
-          // replacement is non-empty, synthesize end tag
-          out.push('<\/', tagNameRep, '>');
-        }
-      },
-      'endTag': function(tagName, out) {
-        if (ignoring) {
-          ignoring = false;
-          return;
-        }
-        if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
-        var eflags = html4.ELEMENTS[tagName];
-        if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) {
-          var index;
-          if (eflags & html4.eflags['OPTIONAL_ENDTAG']) {
-            for (index = stack.length; --index >= 0;) {
-              var stackElOrigTag = stack[index].orig;
-              if (stackElOrigTag === tagName) { break; }
-              if (!(html4.ELEMENTS[stackElOrigTag] &
-                    html4.eflags['OPTIONAL_ENDTAG'])) {
-                // Don't pop non optional end tags looking for a match.
-                return;
-              }
-            }
-          } else {
-            for (index = stack.length; --index >= 0;) {
-              if (stack[index].orig === tagName) { break; }
-            }
-          }
-          if (index < 0) { return; }  // Not opened.
-          for (var i = stack.length; --i > index;) {
-            var stackElRepTag = stack[i].rep;
-            if (!(html4.ELEMENTS[stackElRepTag] &
-                  html4.eflags['OPTIONAL_ENDTAG'])) {
-              out.push('<\/', stackElRepTag, '>');
-            }
-          }
-          if (index < stack.length) {
-            tagName = stack[index].rep;
-          }
-          stack.length = index;
-          out.push('<\/', tagName, '>');
-        }
-      },
-      'pcdata': emit,
-      'rcdata': emit,
-      'cdata': emit,
-      'endDoc': function(out) {
-        for (; stack.length; stack.length--) {
-          out.push('<\/', stack[stack.length - 1].rep, '>');
-        }
-      }
-    });
-  }
-
-  var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i;
-
-  function safeUri(uri, effect, ltype, hints, naiveUriRewriter) {
-    if (!naiveUriRewriter) { return null; }
-    try {
-      var parsed = URI.parse('' + uri);
-      if (parsed) {
-        if (!parsed.hasScheme() ||
-            ALLOWED_URI_SCHEMES.test(parsed.getScheme())) {
-          var safe = naiveUriRewriter(parsed, effect, ltype, hints);
-          return safe ? safe.toString() : null;
-        }
-      }
-    } catch (e) {
-      return null;
-    }
-    return null;
-  }
-
-  function log(logger, tagName, attribName, oldValue, newValue) {
-    if (!attribName) {
-      logger(tagName + " removed", {
-        change: "removed",
-        tagName: tagName
-      });
-    }
-    if (oldValue !== newValue) {
-      var changed = "changed";
-      if (oldValue && !newValue) {
-        changed = "removed";
-      } else if (!oldValue && newValue)  {
-        changed = "added";
-      }
-      logger(tagName + "." + attribName + " " + changed, {
-        change: changed,
-        tagName: tagName,
-        attribName: attribName,
-        oldValue: oldValue,
-        newValue: newValue
-      });
-    }
-  }
-
-  function lookupAttribute(map, tagName, attribName) {
-    var attribKey;
-    attribKey = tagName + '::' + attribName;
-    if (map.hasOwnProperty(attribKey)) {
-      return map[attribKey];
-    }
-    attribKey = '*::' + attribName;
-    if (map.hasOwnProperty(attribKey)) {
-      return map[attribKey];
-    }
-    return void 0;
-  }
-  function getAttributeType(tagName, attribName) {
-    return lookupAttribute(html4.ATTRIBS, tagName, attribName);
-  }
-  function getLoaderType(tagName, attribName) {
-    return lookupAttribute(html4.LOADERTYPES, tagName, attribName);
-  }
-  function getUriEffect(tagName, attribName) {
-    return lookupAttribute(html4.URIEFFECTS, tagName, attribName);
-  }
-
-  /**
-   * Sanitizes attributes on an HTML tag.
-   * @param {string} tagName An HTML tag name in lowercase.
-   * @param {Array.<?string>} attribs An array of alternating names and values.
-   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
-   *     apply to URI attributes; it can return a new string value, or null to
-   *     delete the attribute.  If unspecified, URI attributes are deleted.
-   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
-   *     to attributes containing HTML names, element IDs, and space-separated
-   *     lists of classes; it can return a new string value, or null to delete
-   *     the attribute.  If unspecified, these attributes are kept unchanged.
-   * @return {Array.<?string>} The sanitized attributes as a list of alternating
-   *     names and values, where a null value means to omit the attribute.
-   */
-  function sanitizeAttribs(tagName, attribs,
-    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
-    // TODO(felix8a): it's obnoxious that domado duplicates much of this
-    // TODO(felix8a): maybe consistently enforce constraints like target=
-    for (var i = 0; i < attribs.length; i += 2) {
-      var attribName = attribs[i];
-      var value = attribs[i + 1];
-      var oldValue = value;
-      var atype = null, attribKey;
-      if ((attribKey = tagName + '::' + attribName,
-           html4.ATTRIBS.hasOwnProperty(attribKey)) ||
-          (attribKey = '*::' + attribName,
-           html4.ATTRIBS.hasOwnProperty(attribKey))) {
-        atype = html4.ATTRIBS[attribKey];
-      }
-      if (atype !== null) {
-        switch (atype) {
-          case html4.atype['NONE']: break;
-          case html4.atype['SCRIPT']:
-            value = null;
-            if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-          case html4.atype['STYLE']:
-            if ('undefined' === typeof parseCssDeclarations) {
-              value = null;
-              if (opt_logger) {
-                log(opt_logger, tagName, attribName, oldValue, value);
-	      }
-              break;
-            }
-            var sanitizedDeclarations = [];
-            parseCssDeclarations(
-                value,
-                {
-                  'declaration': function (property, tokens) {
-                    var normProp = property.toLowerCase();
-                    sanitizeCssProperty(
-                        normProp, tokens,
-                        opt_naiveUriRewriter
-                        ? function (url) {
-                            return safeUri(
-                                url, html4.ueffects.SAME_DOCUMENT,
-                                html4.ltypes.SANDBOXED,
-                                {
-                                  "TYPE": "CSS",
-                                  "CSS_PROP": normProp
-                                }, opt_naiveUriRewriter);
-                          }
-                        : null);
-                    if (tokens.length) {
-                      sanitizedDeclarations.push(
-                          normProp + ': ' + tokens.join(' '));
-                    }
-                  }
-                });
-            value = sanitizedDeclarations.length > 0 ?
-              sanitizedDeclarations.join(' ; ') : null;
-            if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-          case html4.atype['ID']:
-          case html4.atype['IDREF']:
-          case html4.atype['IDREFS']:
-          case html4.atype['GLOBAL_NAME']:
-          case html4.atype['LOCAL_NAME']:
-          case html4.atype['CLASSES']:
-            value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
-            if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-          case html4.atype['URI']:
-            value = safeUri(value,
-              getUriEffect(tagName, attribName),
-              getLoaderType(tagName, attribName),
-              {
-                "TYPE": "MARKUP",
-                "XML_ATTR": attribName,
-                "XML_TAG": tagName
-              }, opt_naiveUriRewriter);
-              if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-          case html4.atype['URI_FRAGMENT']:
-            if (value && '#' === value.charAt(0)) {
-              value = value.substring(1);  // remove the leading '#'
-              value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
-              if (value !== null && value !== void 0) {
-                value = '#' + value;  // restore the leading '#'
-              }
-            } else {
-              value = null;
-            }
-            if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-          default:
-            value = null;
-            if (opt_logger) {
-              log(opt_logger, tagName, attribName, oldValue, value);
-            }
-            break;
-        }
-      } else {
-        value = null;
-        if (opt_logger) {
-          log(opt_logger, tagName, attribName, oldValue, value);
-        }
-      }
-      attribs[i + 1] = value;
-    }
-    return attribs;
-  }
-
-  /**
-   * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js
-   * and applies the default attribute sanitizer with the supplied policy for
-   * URI attributes and NMTOKEN attributes.
-   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
-   *     apply to URI attributes.  If not given, URI attributes are deleted.
-   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
-   *     to attributes containing HTML names, element IDs, and space-separated
-   *     lists of classes.  If not given, such attributes are left unchanged.
-   * @return {function(string, Array.<?string>)} A tagPolicy suitable for
-   *     passing to html.sanitize.
-   */
-  function makeTagPolicy(
-    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
-    return function(tagName, attribs) {
-      if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
-        return {
-          'attribs': sanitizeAttribs(tagName, attribs,
-            opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger)
-        };
-      } else {
-        if (opt_logger) {
-          log(opt_logger, tagName, undefined, undefined, undefined);
-        }
-      }
-    };
-  }
-
-  /**
-   * Sanitizes HTML tags and attributes according to a given policy.
-   * @param {string} inputHtml The HTML to sanitize.
-   * @param {function(string, Array.<?string>)} tagPolicy A function that
-   *     decides which tags to accept and sanitizes their attributes (see
-   *     makeHtmlSanitizer above for details).
-   * @return {string} The sanitized HTML.
-   */
-  function sanitizeWithPolicy(inputHtml, tagPolicy) {
-    var outputArray = [];
-    makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray);
-    return outputArray.join('');
-  }
-
-  /**
-   * Strips unsafe tags and attributes from HTML.
-   * @param {string} inputHtml The HTML to sanitize.
-   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
-   *     apply to URI attributes.  If not given, URI attributes are deleted.
-   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
-   *     to attributes containing HTML names, element IDs, and space-separated
-   *     lists of classes.  If not given, such attributes are left unchanged.
-   */
-  function sanitize(inputHtml,
-    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
-    var tagPolicy = makeTagPolicy(
-      opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
-    return sanitizeWithPolicy(inputHtml, tagPolicy);
-  }
-
-  // Export both quoted and unquoted names for Closure linkage.
-  var html = {};
-  html.escapeAttrib = html['escapeAttrib'] = escapeAttrib;
-  html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer;
-  html.makeSaxParser = html['makeSaxParser'] = makeSaxParser;
-  html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy;
-  html.normalizeRCData = html['normalizeRCData'] = normalizeRCData;
-  html.sanitize = html['sanitize'] = sanitize;
-  html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs;
-  html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy;
-  html.unescapeEntities = html['unescapeEntities'] = unescapeEntities;
-  return html;
-})(html4);
-
-var html_sanitize = html['sanitize'];
-
-return {
-  html: html
-};
-});
diff --git a/web-ui/app/js/lib/html_whitelister.js b/web-ui/app/js/lib/html_whitelister.js
deleted file mode 100644
index 22841cce..00000000
--- a/web-ui/app/js/lib/html_whitelister.js
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2014 ThoughtWorks, Inc.
- *
- * Pixelated is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pixelated is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Pixelated. If not, see <http://www.gnu.org/licenses/>.
- */
-
-'use strict';
-
-define(['lib/html-sanitizer'], function (htmlSanitizer) {
-  var tagAndAttributeWhitelist = {
-    'p': ['style'],
-    'div': ['style'],
-    'a': ['href', 'style'],
-    'span': ['style'],
-    'font': ['face', 'size', 'style'],
-    'img': ['title'],
-    'em': [],
-    'b': [],
-    'i': [],
-    'strong': ['style'],
-    'table': ['style'],
-    'tr': ['style'],
-    'td': ['style'],
-    'th': ['style'],
-    'tbody': ['style'],
-    'thead': ['style'],
-    'dt': ['style'],
-    'dd': ['style'],
-    'dl': ['style'],
-    'h1': ['style'],
-    'h2': ['style'],
-    'h3': ['style'],
-    'h4': ['style'],
-    'h5': ['style'],
-    'h6': ['style'],
-    'br': [],
-    'blockquote': ['style'],
-    'label': ['style'],
-    'form': ['style'],
-    'ol': ['style'],
-    'ul': ['style'],
-    'li': ['style'],
-    'input': ['style', 'type', 'name', 'value']
-  };
-
-  function filterAllowedAttributes (tagName, attributes) {
-    var i, attributesAndValues = [];
-
-    for (i = 0; i < attributes.length; i++) {
-      if (tagAndAttributeWhitelist[tagName] &&
-        _.contains(tagAndAttributeWhitelist[tagName], attributes[i])) {
-        attributesAndValues.push(attributes[i]);
-        attributesAndValues.push(attributes[i+1]);
-      }
-    }
-
-    return attributesAndValues;
-  }
-
-  function tagPolicy (tagName, attributes) {
-    if (!tagAndAttributeWhitelist[tagName]) {
-      return null;
-    }
-
-    return {
-      tagName: tagName,
-      attribs: filterAllowedAttributes(tagName, attributes)
-    };
-  }
-
-  return {
-    tagPolicy: tagPolicy,
-    sanitize: htmlSanitizer.html.sanitizeWithPolicy
-  };
-});
author	Felix Hammerl <fhammerl@thoughtworks.com>	2016-02-24 10:30:06 +0100
committer	Felix Hammerl <fhammerl@thoughtworks.com>	2016-02-24 10:30:06 +0100
commit	0f9c1e66c9ab6b8f037436ffcb45d71f92d9b613 (patch)
tree	6ee182a9d48f30110b68f00f57dd6d46c42fea5b /web-ui
parent	77ec41bb6f542077503106cacc1dbd28118c50b4 (diff)