1 files changed, 1064 insertions, 0 deletions
diff --git a/web-ui/app/js/lib/html-sanitizer.js b/web-ui/app/js/lib/html-sanitizer.js
new file mode 100644
index 00000000..80fb0041
--- /dev/null
+++ b/web-ui/app/js/lib/html-sanitizer.js
@@ -0,0 +1,1064 @@
+// Copyright (C) 2006 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @fileoverview
+ * An HTML sanitizer that can satisfy a variety of security policies.
+ *
+ * <p>
+ * The HTML sanitizer is built around a SAX parser and HTML element and
+ * attributes schemas.
+ *
+ * If the cssparser is loaded, inline styles are sanitized using the
+ * css property and value schemas.  Else they are remove during
+ * sanitization.
+ *
+ * If it exists, uses parseCssDeclarations, sanitizeCssProperty,  cssSchema
+ *
+ * @author mikesamuel@gmail.com
+ * @author jasvir@gmail.com
+ * \@requires html4, URI
+ * \@overrides window
+ * \@provides html, html_sanitize
+ */
+
+// The Turkish i seems to be a non-issue, but abort in case it is.
+if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; }
+
+/**
+ * \@namespace
+ */
+define(['lib/html4-defs'], function (html4) {
+var html = (function(html4) {
+
+  // For closure compiler
+  var parseCssDeclarations, sanitizeCssProperty, cssSchema;
+  if ('undefined' !== typeof window) {
+    parseCssDeclarations = window['parseCssDeclarations'];
+    sanitizeCssProperty = window['sanitizeCssProperty'];
+    cssSchema = window['cssSchema'];
+  }
+
+  // The keys of this object must be 'quoted' or JSCompiler will mangle them!
+  // This is a partial list -- lookupEntity() uses the host browser's parser
+  // (when available) to implement full entity lookup.
+  // Note that entities are in general case-sensitive; the uppercase ones are
+  // explicitly defined by HTML5 (presumably as compatibility).
+  var ENTITIES = {
+    'lt': '<',
+    'LT': '<',
+    'gt': '>',
+    'GT': '>',
+    'amp': '&',
+    'AMP': '&',
+    'quot': '"',
+    'apos': '\'',
+    'nbsp': '\240'
+  };
+
+  // Patterns for types of entity/character reference names.
+  var decimalEscapeRe = /^#(\d+)$/;
+  var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/;
+  // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html
+  var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/;
+  // Used as a hook to invoke the browser's entity parsing. <textarea> is used
+  // because its content is parsed for entities but not tags.
+  // TODO(kpreid): This retrieval is a kludge and leads to silent loss of
+  // functionality if the document isn't available.
+  var entityLookupElement =
+      ('undefined' !== typeof window && window['document'])
+          ? window['document'].createElement('textarea') : null;
+  /**
+   * Decodes an HTML entity.
+   *
+   * {\@updoc
+   * $ lookupEntity('lt')
+   * # '<'
+   * $ lookupEntity('GT')
+   * # '>'
+   * $ lookupEntity('amp')
+   * # '&'
+   * $ lookupEntity('nbsp')
+   * # '\xA0'
+   * $ lookupEntity('apos')
+   * # "'"
+   * $ lookupEntity('quot')
+   * # '"'
+   * $ lookupEntity('#xa')
+   * # '\n'
+   * $ lookupEntity('#10')
+   * # '\n'
+   * $ lookupEntity('#x0a')
+   * # '\n'
+   * $ lookupEntity('#010')
+   * # '\n'
+   * $ lookupEntity('#x00A')
+   * # '\n'
+   * $ lookupEntity('Pi')      // Known failure
+   * # '\u03A0'
+   * $ lookupEntity('pi')      // Known failure
+   * # '\u03C0'
+   * }
+   *
+   * @param {string} name the content between the '&' and the ';'.
+   * @return {string} a single unicode code-point as a string.
+   */
+  function lookupEntity(name) {
+    // TODO: entity lookup as specified by HTML5 actually depends on the
+    // presence of the ";".
+    if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
+    var m = name.match(decimalEscapeRe);
+    if (m) {
+      return String.fromCharCode(parseInt(m[1], 10));
+    } else if (!!(m = name.match(hexEscapeRe))) {
+      return String.fromCharCode(parseInt(m[1], 16));
+    } else if (entityLookupElement && safeEntityNameRe.test(name)) {
+      entityLookupElement.innerHTML = '&' + name + ';';
+      var text = entityLookupElement.textContent;
+      ENTITIES[name] = text;
+      return text;
+    } else {
+      return '&' + name + ';';
+    }
+  }
+
+  function decodeOneEntity(_, name) {
+    return lookupEntity(name);
+  }
+
+  var nulRe = /\0/g;
+  function stripNULs(s) {
+    return s.replace(nulRe, '');
+  }
+
+  var ENTITY_RE_1 = /&(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/g;
+  var ENTITY_RE_2 = /^(#[0-9]+|#[xX][0-9A-Fa-f]+|\w+);/;
+  /**
+   * The plain text of a chunk of HTML CDATA which possibly containing.
+   *
+   * {\@updoc
+   * $ unescapeEntities('')
+   * # ''
+   * $ unescapeEntities('hello World!')
+   * # 'hello World!'
+   * $ unescapeEntities('1 &lt; 2 &amp;&AMP; 4 &gt; 3&#10;')
+   * # '1 < 2 && 4 > 3\n'
+   * $ unescapeEntities('&lt;&lt <- unfinished entity&gt;')
+   * # '<&lt <- unfinished entity>'
+   * $ unescapeEntities('/foo?bar=baz&copy=true')  // & often unescaped in URLS
+   * # '/foo?bar=baz&copy=true'
+   * $ unescapeEntities('pi=&pi;&#x3c0;, Pi=&Pi;\u03A0') // FIXME: known failure
+   * # 'pi=\u03C0\u03c0, Pi=\u03A0\u03A0'
+   * }
+   *
+   * @param {string} s a chunk of HTML CDATA.  It must not start or end inside
+   *     an HTML entity.
+   */
+  function unescapeEntities(s) {
+    return s.replace(ENTITY_RE_1, decodeOneEntity);
+  }
+
+  var ampRe = /&/g;
+  var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
+  var ltRe = /[<]/g;
+  var gtRe = />/g;
+  var quotRe = /\"/g;
+
+  /**
+   * Escapes HTML special characters in attribute values.
+   *
+   * {\@updoc
+   * $ escapeAttrib('')
+   * # ''
+   * $ escapeAttrib('"<<&==&>>"')  // Do not just escape the first occurrence.
+   * # '&#34;&lt;&lt;&amp;&#61;&#61;&amp;&gt;&gt;&#34;'
+   * $ escapeAttrib('Hello <World>!')
+   * # 'Hello &lt;World&gt;!'
+   * }
+   */
+  function escapeAttrib(s) {
+    return ('' + s).replace(ampRe, '&amp;').replace(ltRe, '&lt;')
+        .replace(gtRe, '&gt;').replace(quotRe, '&#34;');
+  }
+
+  /**
+   * Escape entities in RCDATA that can be escaped without changing the meaning.
+   * {\@updoc
+   * $ normalizeRCData('1 < 2 &&amp; 3 > 4 &amp;& 5 &lt; 7&8')
+   * # '1 &lt; 2 &amp;&amp; 3 &gt; 4 &amp;&amp; 5 &lt; 7&amp;8'
+   * }
+   */
+  function normalizeRCData(rcdata) {
+    return rcdata
+        .replace(looseAmpRe, '&amp;$1')
+        .replace(ltRe, '&lt;')
+        .replace(gtRe, '&gt;');
+  }
+
+  // TODO(felix8a): validate sanitizer regexs against the HTML5 grammar at
+  // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html
+  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
+  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
+  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html
+
+  // We initially split input so that potentially meaningful characters
+  // like '<' and '>' are separate tokens, using a fast dumb process that
+  // ignores quoting.  Then we walk that token stream, and when we see a
+  // '<' that's the start of a tag, we use ATTR_RE to extract tag
+  // attributes from the next token.  That token will never have a '>'
+  // character.  However, it might have an unbalanced quote character, and
+  // when we see that, we combine additional tokens to balance the quote.
+
+  var ATTR_RE = new RegExp(
+    '^\\s*' +
+    '([-.:\\w]+)' +             // 1 = Attribute name
+    '(?:' + (
+      '\\s*(=)\\s*' +           // 2 = Is there a value?
+      '(' + (                   // 3 = Attribute value
+        // TODO(felix8a): maybe use backref to match quotes
+        '(\")[^\"]*(\"|$)' +    // 4, 5 = Double-quoted string
+        '|' +
+        '(\')[^\']*(\'|$)' +    // 6, 7 = Single-quoted string
+        '|' +
+        // Positive lookahead to prevent interpretation of
+        // <foo a= b=c> as <foo a='b=c'>
+        // TODO(felix8a): might be able to drop this case
+        '(?=[a-z][-\\w]*\\s*=)' +
+        '|' +
+        // Unquoted value that isn't an attribute name
+        // (since we didn't match the positive lookahead above)
+        '[^\"\'\\s]*' ) +
+      ')' ) +
+    ')?',
+    'i');
+
+  // false on IE<=8, true on most other browsers
+  var splitWillCapture = ('a,b'.split(/(,)/).length === 3);
+
+  // bitmask for tags with special parsing, like <script> and <textarea>
+  var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA'];
+
+  /**
+   * Given a SAX-like event handler, produce a function that feeds those
+   * events and a parameter to the event handler.
+   *
+   * The event handler has the form:{@code
+   * {
+   *   // Name is an upper-case HTML tag name.  Attribs is an array of
+   *   // alternating upper-case attribute names, and attribute values.  The
+   *   // attribs array is reused by the parser.  Param is the value passed to
+   *   // the saxParser.
+   *   startTag: function (name, attribs, param) { ... },
+   *   endTag:   function (name, param) { ... },
+   *   pcdata:   function (text, param) { ... },
+   *   rcdata:   function (text, param) { ... },
+   *   cdata:    function (text, param) { ... },
+   *   startDoc: function (param) { ... },
+   *   endDoc:   function (param) { ... }
+   * }}
+   *
+   * @param {Object} handler a record containing event handlers.
+   * @return {function(string, Object)} A function that takes a chunk of HTML
+   *     and a parameter.  The parameter is passed on to the handler methods.
+   */
+  function makeSaxParser(handler) {
+    // Accept quoted or unquoted keys (Closure compat)
+    var hcopy = {
+      cdata: handler.cdata || handler['cdata'],
+      comment: handler.comment || handler['comment'],
+      endDoc: handler.endDoc || handler['endDoc'],
+      endTag: handler.endTag || handler['endTag'],
+      pcdata: handler.pcdata || handler['pcdata'],
+      rcdata: handler.rcdata || handler['rcdata'],
+      startDoc: handler.startDoc || handler['startDoc'],
+      startTag: handler.startTag || handler['startTag']
+    };
+    return function(htmlText, param) {
+      return parse(htmlText, hcopy, param);
+    };
+  }
+
+  // Parsing strategy is to split input into parts that might be lexically
+  // meaningful (every ">" becomes a separate part), and then recombine
+  // parts if we discover they're in a different context.
+
+  // TODO(felix8a): Significant performance regressions from -legacy,
+  // tested on
+  //    Chrome 18.0
+  //    Firefox 11.0
+  //    IE 6, 7, 8, 9
+  //    Opera 11.61
+  //    Safari 5.1.3
+  // Many of these are unusual patterns that are linearly slower and still
+  // pretty fast (eg 1ms to 5ms), so not necessarily worth fixing.
+
+  // TODO(felix8a): "<script> && && && ... <\/script>" is slower on all
+  // browsers.  The hotspot is htmlSplit.
+
+  // TODO(felix8a): "<p title='>>>>...'><\/p>" is slower on all browsers.
+  // This is partly htmlSplit, but the hotspot is parseTagAndAttrs.
+
+  // TODO(felix8a): "<a><\/a><a><\/a>..." is slower on IE9.
+  // "<a>1<\/a><a>1<\/a>..." is faster, "<a><\/a>2<a><\/a>2..." is faster.
+
+  // TODO(felix8a): "<p<p<p..." is slower on IE[6-8]
+
+  var continuationMarker = {};
+  function parse(htmlText, handler, param) {
+    var m, p, tagName;
+    var parts = htmlSplit(htmlText);
+    var state = {
+      noMoreGT: false,
+      noMoreEndComments: false
+    };
+    parseCPS(handler, parts, 0, state, param);
+  }
+
+  function continuationMaker(h, parts, initial, state, param) {
+    return function () {
+      parseCPS(h, parts, initial, state, param);
+    };
+  }
+
+  function parseCPS(h, parts, initial, state, param) {
+    try {
+      if (h.startDoc && initial == 0) { h.startDoc(param); }
+      var m, p, tagName;
+      for (var pos = initial, end = parts.length; pos < end;) {
+        var current = parts[pos++];
+        var next = parts[pos];
+        switch (current) {
+        case '&':
+          if (ENTITY_RE_2.test(next)) {
+            if (h.pcdata) {
+              h.pcdata('&' + next, param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+            pos++;
+          } else {
+            if (h.pcdata) { h.pcdata("&amp;", param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          }
+          break;
+        case '<\/':
+          if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) {
+            if (m[0].length === next.length && parts[pos + 1] === '>') {
+              // fast case, no attribute parsing needed
+              pos += 2;
+              tagName = m[1].toLowerCase();
+              if (h.endTag) {
+                h.endTag(tagName, param, continuationMarker,
+                  continuationMaker(h, parts, pos, state, param));
+              }
+            } else {
+              // slow case, need to parse attributes
+              // TODO(felix8a): do we really care about misparsing this?
+              pos = parseEndTag(
+                parts, pos, h, param, continuationMarker, state);
+            }
+          } else {
+            if (h.pcdata) {
+              h.pcdata('&lt;/', param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          }
+          break;
+        case '<':
+          if (m = /^([-\w:]+)\s*\/?/.exec(next)) {
+            if (m[0].length === next.length && parts[pos + 1] === '>') {
+              // fast case, no attribute parsing needed
+              pos += 2;
+              tagName = m[1].toLowerCase();
+              if (h.startTag) {
+                h.startTag(tagName, [], param, continuationMarker,
+                  continuationMaker(h, parts, pos, state, param));
+              }
+              // tags like <script> and <textarea> have special parsing
+              var eflags = html4.ELEMENTS[tagName];
+              if (eflags & EFLAGS_TEXT) {
+                var tag = { name: tagName, next: pos, eflags: eflags };
+                pos = parseText(
+                  parts, tag, h, param, continuationMarker, state);
+              }
+            } else {
+              // slow case, need to parse attributes
+              pos = parseStartTag(
+                parts, pos, h, param, continuationMarker, state);
+            }
+          } else {
+            if (h.pcdata) {
+              h.pcdata('&lt;', param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          }
+          break;
+        case '<\!--':
+          // The pathological case is n copies of '<\!--' without '-->', and
+          // repeated failure to find '-->' is quadratic.  We avoid that by
+          // remembering when search for '-->' fails.
+          if (!state.noMoreEndComments) {
+            // A comment <\!--x--> is split into three tokens:
+            //   '<\!--', 'x--', '>'
+            // We want to find the next '>' token that has a preceding '--'.
+            // pos is at the 'x--'.
+            for (p = pos + 1; p < end; p++) {
+              if (parts[p] === '>' && /--$/.test(parts[p - 1])) { break; }
+            }
+            if (p < end) {
+              if (h.comment) {
+                var comment = parts.slice(pos, p).join('');
+                h.comment(
+                  comment.substr(0, comment.length - 2), param,
+                  continuationMarker,
+                  continuationMaker(h, parts, p + 1, state, param));
+              }
+              pos = p + 1;
+            } else {
+              state.noMoreEndComments = true;
+            }
+          }
+          if (state.noMoreEndComments) {
+            if (h.pcdata) {
+              h.pcdata('&lt;!--', param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          }
+          break;
+        case '<\!':
+          if (!/^\w/.test(next)) {
+            if (h.pcdata) {
+              h.pcdata('&lt;!', param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          } else {
+            // similar to noMoreEndComment logic
+            if (!state.noMoreGT) {
+              for (p = pos + 1; p < end; p++) {
+                if (parts[p] === '>') { break; }
+              }
+              if (p < end) {
+                pos = p + 1;
+              } else {
+                state.noMoreGT = true;
+              }
+            }
+            if (state.noMoreGT) {
+              if (h.pcdata) {
+                h.pcdata('&lt;!', param, continuationMarker,
+                  continuationMaker(h, parts, pos, state, param));
+              }
+            }
+          }
+          break;
+        case '<?':
+          // similar to noMoreEndComment logic
+          if (!state.noMoreGT) {
+            for (p = pos + 1; p < end; p++) {
+              if (parts[p] === '>') { break; }
+            }
+            if (p < end) {
+              pos = p + 1;
+            } else {
+              state.noMoreGT = true;
+            }
+          }
+          if (state.noMoreGT) {
+            if (h.pcdata) {
+              h.pcdata('&lt;?', param, continuationMarker,
+                continuationMaker(h, parts, pos, state, param));
+            }
+          }
+          break;
+        case '>':
+          if (h.pcdata) {
+            h.pcdata("&gt;", param, continuationMarker,
+              continuationMaker(h, parts, pos, state, param));
+          }
+          break;
+        case '':
+          break;
+        default:
+          if (h.pcdata) {
+            h.pcdata(current, param, continuationMarker,
+              continuationMaker(h, parts, pos, state, param));
+          }
+          break;
+        }
+      }
+      if (h.endDoc) { h.endDoc(param); }
+    } catch (e) {
+      if (e !== continuationMarker) { throw e; }
+    }
+  }
+
+  // Split str into parts for the html parser.
+  function htmlSplit(str) {
+    // can't hoist this out of the function because of the re.exec loop.
+    var re = /(<\/|<\!--|<[!?]|[&<>])/g;
+    str += '';
+    if (splitWillCapture) {
+      return str.split(re);
+    } else {
+      var parts = [];
+      var lastPos = 0;
+      var m;
+      while ((m = re.exec(str)) !== null) {
+        parts.push(str.substring(lastPos, m.index));
+        parts.push(m[0]);
+        lastPos = m.index + m[0].length;
+      }
+      parts.push(str.substring(lastPos));
+      return parts;
+    }
+  }
+
+  function parseEndTag(parts, pos, h, param, continuationMarker, state) {
+    var tag = parseTagAndAttrs(parts, pos);
+    // drop unclosed tags
+    if (!tag) { return parts.length; }
+    if (h.endTag) {
+      h.endTag(tag.name, param, continuationMarker,
+        continuationMaker(h, parts, pos, state, param));
+    }
+    return tag.next;
+  }
+
+  function parseStartTag(parts, pos, h, param, continuationMarker, state) {
+    var tag = parseTagAndAttrs(parts, pos);
+    // drop unclosed tags
+    if (!tag) { return parts.length; }
+    if (h.startTag) {
+      h.startTag(tag.name, tag.attrs, param, continuationMarker,
+        continuationMaker(h, parts, tag.next, state, param));
+    }
+    // tags like <script> and <textarea> have special parsing
+    if (tag.eflags & EFLAGS_TEXT) {
+      return parseText(parts, tag, h, param, continuationMarker, state);
+    } else {
+      return tag.next;
+    }
+  }
+
+  var endTagRe = {};
+
+  // Tags like <script> and <textarea> are flagged as CDATA or RCDATA,
+  // which means everything is text until we see the correct closing tag.
+  function parseText(parts, tag, h, param, continuationMarker, state) {
+    var end = parts.length;
+    if (!endTagRe.hasOwnProperty(tag.name)) {
+      endTagRe[tag.name] = new RegExp('^' + tag.name + '(?:[\\s\\/]|$)', 'i');
+    }
+    var re = endTagRe[tag.name];
+    var first = tag.next;
+    var p = tag.next + 1;
+    for (; p < end; p++) {
+      if (parts[p - 1] === '<\/' && re.test(parts[p])) { break; }
+    }
+    if (p < end) { p -= 1; }
+    var buf = parts.slice(first, p).join('');
+    if (tag.eflags & html4.eflags['CDATA']) {
+      if (h.cdata) {
+        h.cdata(buf, param, continuationMarker,
+          continuationMaker(h, parts, p, state, param));
+      }
+    } else if (tag.eflags & html4.eflags['RCDATA']) {
+      if (h.rcdata) {
+        h.rcdata(normalizeRCData(buf), param, continuationMarker,
+          continuationMaker(h, parts, p, state, param));
+      }
+    } else {
+      throw new Error('bug');
+    }
+    return p;
+  }
+
+  // at this point, parts[pos-1] is either "<" or "<\/".
+  function parseTagAndAttrs(parts, pos) {
+    var m = /^([-\w:]+)/.exec(parts[pos]);
+    var tag = {};
+    tag.name = m[1].toLowerCase();
+    tag.eflags = html4.ELEMENTS[tag.name];
+    var buf = parts[pos].substr(m[0].length);
+    // Find the next '>'.  We optimistically assume this '>' is not in a
+    // quoted context, and further down we fix things up if it turns out to
+    // be quoted.
+    var p = pos + 1;
+    var end = parts.length;
+    for (; p < end; p++) {
+      if (parts[p] === '>') { break; }
+      buf += parts[p];
+    }
+    if (end <= p) { return void 0; }
+    var attrs = [];
+    while (buf !== '') {
+      m = ATTR_RE.exec(buf);
+      if (!m) {
+        // No attribute found: skip garbage
+        buf = buf.replace(/^[\s\S][^a-z\s]*/, '');
+
+      } else if ((m[4] && !m[5]) || (m[6] && !m[7])) {
+        // Unterminated quote: slurp to the next unquoted '>'
+        var quote = m[4] || m[6];
+        var sawQuote = false;
+        var abuf = [buf, parts[p++]];
+        for (; p < end; p++) {
+          if (sawQuote) {
+            if (parts[p] === '>') { break; }
+          } else if (0 <= parts[p].indexOf(quote)) {
+            sawQuote = true;
+          }
+          abuf.push(parts[p]);
+        }
+        // Slurp failed: lose the garbage
+        if (end <= p) { break; }
+        // Otherwise retry attribute parsing
+        buf = abuf.join('');
+        continue;
+
+      } else {
+        // We have an attribute
+        var aName = m[1].toLowerCase();
+        var aValue = m[2] ? decodeValue(m[3]) : '';
+        attrs.push(aName, aValue);
+        buf = buf.substr(m[0].length);
+      }
+    }
+    tag.attrs = attrs;
+    tag.next = p + 1;
+    return tag;
+  }
+
+  function decodeValue(v) {
+    var q = v.charCodeAt(0);
+    if (q === 0x22 || q === 0x27) { // " or '
+      v = v.substr(1, v.length - 2);
+    }
+    return unescapeEntities(stripNULs(v));
+  }
+
+  /**
+   * Returns a function that strips unsafe tags and attributes from html.
+   * @param {function(string, Array.<string>): ?Array.<string>} tagPolicy
+   *     A function that takes (tagName, attribs[]), where tagName is a key in
+   *     html4.ELEMENTS and attribs is an array of alternating attribute names
+   *     and values.  It should return a record (as follows), or null to delete
+   *     the element.  It's okay for tagPolicy to modify the attribs array,
+   *     but the same array is reused, so it should not be held between calls.
+   *     Record keys:
+   *        attribs: (required) Sanitized attributes array.
+   *        tagName: Replacement tag name.
+   * @return {function(string, Array)} A function that sanitizes a string of
+   *     HTML and appends result strings to the second argument, an array.
+   */
+  function makeHtmlSanitizer(tagPolicy) {
+    var stack;
+    var ignoring;
+    var emit = function (text, out) {
+      if (!ignoring) { out.push(text); }
+    };
+    return makeSaxParser({
+      'startDoc': function(_) {
+        stack = [];
+        ignoring = false;
+      },
+      'startTag': function(tagNameOrig, attribs, out) {
+        if (ignoring) { return; }
+        if (!html4.ELEMENTS.hasOwnProperty(tagNameOrig)) { return; }
+        var eflagsOrig = html4.ELEMENTS[tagNameOrig];
+        if (eflagsOrig & html4.eflags['FOLDABLE']) {
+          return;
+        }
+
+        var decision = tagPolicy(tagNameOrig, attribs);
+        if (!decision) {
+          ignoring = !(eflagsOrig & html4.eflags['EMPTY']);
+          return;
+        } else if (typeof decision !== 'object') {
+          throw new Error('tagPolicy did not return object (old API?)');
+        }
+        if ('attribs' in decision) {
+          attribs = decision['attribs'];
+        } else {
+          throw new Error('tagPolicy gave no attribs');
+        }
+        var eflagsRep;
+        var tagNameRep;
+        if ('tagName' in decision) {
+          tagNameRep = decision['tagName'];
+          eflagsRep = html4.ELEMENTS[tagNameRep];
+        } else {
+          tagNameRep = tagNameOrig;
+          eflagsRep = eflagsOrig;
+        }
+        // TODO(mikesamuel): relying on tagPolicy not to insert unsafe
+        // attribute names.
+
+        // If this is an optional-end-tag element and either this element or its
+        // previous like sibling was rewritten, then insert a close tag to
+        // preserve structure.
+        if (eflagsOrig & html4.eflags['OPTIONAL_ENDTAG']) {
+          var onStack = stack[stack.length - 1];
+          if (onStack && onStack.orig === tagNameOrig &&
+              (onStack.rep !== tagNameRep || tagNameOrig !== tagNameRep)) {
+                out.push('<\/', onStack.rep, '>');
+          }
+        }
+
+        if (!(eflagsOrig & html4.eflags['EMPTY'])) {
+          stack.push({orig: tagNameOrig, rep: tagNameRep});
+        }
+
+        out.push('<', tagNameRep);
+        for (var i = 0, n = attribs.length; i < n; i += 2) {
+          var attribName = attribs[i],
+              value = attribs[i + 1];
+          if (value !== null && value !== void 0) {
+            out.push(' ', attribName, '="', escapeAttrib(value), '"');
+          }
+        }
+        out.push('>');
+
+        if ((eflagsOrig & html4.eflags['EMPTY'])
+            && !(eflagsRep & html4.eflags['EMPTY'])) {
+          // replacement is non-empty, synthesize end tag
+          out.push('<\/', tagNameRep, '>');
+        }
+      },
+      'endTag': function(tagName, out) {
+        if (ignoring) {
+          ignoring = false;
+          return;
+        }
+        if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
+        var eflags = html4.ELEMENTS[tagName];
+        if (!(eflags & (html4.eflags['EMPTY'] | html4.eflags['FOLDABLE']))) {
+          var index;
+          if (eflags & html4.eflags['OPTIONAL_ENDTAG']) {
+            for (index = stack.length; --index >= 0;) {
+              var stackElOrigTag = stack[index].orig;
+              if (stackElOrigTag === tagName) { break; }
+              if (!(html4.ELEMENTS[stackElOrigTag] &
+                    html4.eflags['OPTIONAL_ENDTAG'])) {
+                // Don't pop non optional end tags looking for a match.
+                return;
+              }
+            }
+          } else {
+            for (index = stack.length; --index >= 0;) {
+              if (stack[index].orig === tagName) { break; }
+            }
+          }
+          if (index < 0) { return; }  // Not opened.
+          for (var i = stack.length; --i > index;) {
+            var stackElRepTag = stack[i].rep;
+            if (!(html4.ELEMENTS[stackElRepTag] &
+                  html4.eflags['OPTIONAL_ENDTAG'])) {
+              out.push('<\/', stackElRepTag, '>');
+            }
+          }
+          if (index < stack.length) {
+            tagName = stack[index].rep;
+          }
+          stack.length = index;
+          out.push('<\/', tagName, '>');
+        }
+      },
+      'pcdata': emit,
+      'rcdata': emit,
+      'cdata': emit,
+      'endDoc': function(out) {
+        for (; stack.length; stack.length--) {
+          out.push('<\/', stack[stack.length - 1].rep, '>');
+        }
+      }
+    });
+  }
+
+  var ALLOWED_URI_SCHEMES = /^(?:https?|mailto)$/i;
+
+  function safeUri(uri, effect, ltype, hints, naiveUriRewriter) {
+    if (!naiveUriRewriter) { return null; }
+    try {
+      var parsed = URI.parse('' + uri);
+      if (parsed) {
+        if (!parsed.hasScheme() ||
+            ALLOWED_URI_SCHEMES.test(parsed.getScheme())) {
+          var safe = naiveUriRewriter(parsed, effect, ltype, hints);
+          return safe ? safe.toString() : null;
+        }
+      }
+    } catch (e) {
+      return null;
+    }
+    return null;
+  }
+
+  function log(logger, tagName, attribName, oldValue, newValue) {
+    if (!attribName) {
+      logger(tagName + " removed", {
+        change: "removed",
+        tagName: tagName
+      });
+    }
+    if (oldValue !== newValue) {
+      var changed = "changed";
+      if (oldValue && !newValue) {
+        changed = "removed";
+      } else if (!oldValue && newValue)  {
+        changed = "added";
+      }
+      logger(tagName + "." + attribName + " " + changed, {
+        change: changed,
+        tagName: tagName,
+        attribName: attribName,
+        oldValue: oldValue,
+        newValue: newValue
+      });
+    }
+  }
+
+  function lookupAttribute(map, tagName, attribName) {
+    var attribKey;
+    attribKey = tagName + '::' + attribName;
+    if (map.hasOwnProperty(attribKey)) {
+      return map[attribKey];
+    }
+    attribKey = '*::' + attribName;
+    if (map.hasOwnProperty(attribKey)) {
+      return map[attribKey];
+    }
+    return void 0;
+  }
+  function getAttributeType(tagName, attribName) {
+    return lookupAttribute(html4.ATTRIBS, tagName, attribName);
+  }
+  function getLoaderType(tagName, attribName) {
+    return lookupAttribute(html4.LOADERTYPES, tagName, attribName);
+  }
+  function getUriEffect(tagName, attribName) {
+    return lookupAttribute(html4.URIEFFECTS, tagName, attribName);
+  }
+
+  /**
+   * Sanitizes attributes on an HTML tag.
+   * @param {string} tagName An HTML tag name in lowercase.
+   * @param {Array.<?string>} attribs An array of alternating names and values.
+   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+   *     apply to URI attributes; it can return a new string value, or null to
+   *     delete the attribute.  If unspecified, URI attributes are deleted.
+   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+   *     to attributes containing HTML names, element IDs, and space-separated
+   *     lists of classes; it can return a new string value, or null to delete
+   *     the attribute.  If unspecified, these attributes are kept unchanged.
+   * @return {Array.<?string>} The sanitized attributes as a list of alternating
+   *     names and values, where a null value means to omit the attribute.
+   */
+  function sanitizeAttribs(tagName, attribs,
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+    // TODO(felix8a): it's obnoxious that domado duplicates much of this
+    // TODO(felix8a): maybe consistently enforce constraints like target=
+    for (var i = 0; i < attribs.length; i += 2) {
+      var attribName = attribs[i];
+      var value = attribs[i + 1];
+      var oldValue = value;
+      var atype = null, attribKey;
+      if ((attribKey = tagName + '::' + attribName,
+           html4.ATTRIBS.hasOwnProperty(attribKey)) ||
+          (attribKey = '*::' + attribName,
+           html4.ATTRIBS.hasOwnProperty(attribKey))) {
+        atype = html4.ATTRIBS[attribKey];
+      }
+      if (atype !== null) {
+        switch (atype) {
+          case html4.atype['NONE']: break;
+          case html4.atype['SCRIPT']:
+            value = null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+          case html4.atype['STYLE']:
+            if ('undefined' === typeof parseCssDeclarations) {
+              value = null;
+              if (opt_logger) {
+                log(opt_logger, tagName, attribName, oldValue, value);
+	      }
+              break;
+            }
+            var sanitizedDeclarations = [];
+            parseCssDeclarations(
+                value,
+                {
+                  'declaration': function (property, tokens) {
+                    var normProp = property.toLowerCase();
+                    sanitizeCssProperty(
+                        normProp, tokens,
+                        opt_naiveUriRewriter
+                        ? function (url) {
+                            return safeUri(
+                                url, html4.ueffects.SAME_DOCUMENT,
+                                html4.ltypes.SANDBOXED,
+                                {
+                                  "TYPE": "CSS",
+                                  "CSS_PROP": normProp
+                                }, opt_naiveUriRewriter);
+                          }
+                        : null);
+                    if (tokens.length) {
+                      sanitizedDeclarations.push(
+                          normProp + ': ' + tokens.join(' '));
+                    }
+                  }
+                });
+            value = sanitizedDeclarations.length > 0 ?
+              sanitizedDeclarations.join(' ; ') : null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+          case html4.atype['ID']:
+          case html4.atype['IDREF']:
+          case html4.atype['IDREFS']:
+          case html4.atype['GLOBAL_NAME']:
+          case html4.atype['LOCAL_NAME']:
+          case html4.atype['CLASSES']:
+            value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+          case html4.atype['URI']:
+            value = safeUri(value,
+              getUriEffect(tagName, attribName),
+              getLoaderType(tagName, attribName),
+              {
+                "TYPE": "MARKUP",
+                "XML_ATTR": attribName,
+                "XML_TAG": tagName
+              }, opt_naiveUriRewriter);
+              if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+          case html4.atype['URI_FRAGMENT']:
+            if (value && '#' === value.charAt(0)) {
+              value = value.substring(1);  // remove the leading '#'
+              value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+              if (value !== null && value !== void 0) {
+                value = '#' + value;  // restore the leading '#'
+              }
+            } else {
+              value = null;
+            }
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+          default:
+            value = null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
+            break;
+        }
+      } else {
+        value = null;
+        if (opt_logger) {
+          log(opt_logger, tagName, attribName, oldValue, value);
+        }
+      }
+      attribs[i + 1] = value;
+    }
+    return attribs;
+  }
+
+  /**
+   * Creates a tag policy that omits all tags marked UNSAFE in html4-defs.js
+   * and applies the default attribute sanitizer with the supplied policy for
+   * URI attributes and NMTOKEN attributes.
+   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+   *     apply to URI attributes.  If not given, URI attributes are deleted.
+   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+   *     to attributes containing HTML names, element IDs, and space-separated
+   *     lists of classes.  If not given, such attributes are left unchanged.
+   * @return {function(string, Array.<?string>)} A tagPolicy suitable for
+   *     passing to html.sanitize.
+   */
+  function makeTagPolicy(
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+    return function(tagName, attribs) {
+      if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
+        return {
+          'attribs': sanitizeAttribs(tagName, attribs,
+            opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger)
+        };
+      } else {
+        if (opt_logger) {
+          log(opt_logger, tagName, undefined, undefined, undefined);
+        }
+      }
+    };
+  }
+
+  /**
+   * Sanitizes HTML tags and attributes according to a given policy.
+   * @param {string} inputHtml The HTML to sanitize.
+   * @param {function(string, Array.<?string>)} tagPolicy A function that
+   *     decides which tags to accept and sanitizes their attributes (see
+   *     makeHtmlSanitizer above for details).
+   * @return {string} The sanitized HTML.
+   */
+  function sanitizeWithPolicy(inputHtml, tagPolicy) {
+    var outputArray = [];
+    makeHtmlSanitizer(tagPolicy)(inputHtml, outputArray);
+    return outputArray.join('');
+  }
+
+  /**
+   * Strips unsafe tags and attributes from HTML.
+   * @param {string} inputHtml The HTML to sanitize.
+   * @param {?function(?string): ?string} opt_naiveUriRewriter A transform to
+   *     apply to URI attributes.  If not given, URI attributes are deleted.
+   * @param {function(?string): ?string} opt_nmTokenPolicy A transform to apply
+   *     to attributes containing HTML names, element IDs, and space-separated
+   *     lists of classes.  If not given, such attributes are left unchanged.
+   */
+  function sanitize(inputHtml,
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+    var tagPolicy = makeTagPolicy(
+      opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
+    return sanitizeWithPolicy(inputHtml, tagPolicy);
+  }
+
+  // Export both quoted and unquoted names for Closure linkage.
+  var html = {};
+  html.escapeAttrib = html['escapeAttrib'] = escapeAttrib;
+  html.makeHtmlSanitizer = html['makeHtmlSanitizer'] = makeHtmlSanitizer;
+  html.makeSaxParser = html['makeSaxParser'] = makeSaxParser;
+  html.makeTagPolicy = html['makeTagPolicy'] = makeTagPolicy;
+  html.normalizeRCData = html['normalizeRCData'] = normalizeRCData;
+  html.sanitize = html['sanitize'] = sanitize;
+  html.sanitizeAttribs = html['sanitizeAttribs'] = sanitizeAttribs;
+  html.sanitizeWithPolicy = html['sanitizeWithPolicy'] = sanitizeWithPolicy;
+  html.unescapeEntities = html['unescapeEntities'] = unescapeEntities;
+  return html;
+})(html4);
+
+var html_sanitize = html['sanitize'];
+
+return {
+  html: html
+};
+});