// Copyright (C) 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview * An HTML sanitizer that can satisfy a variety of security policies. * *
* The HTML sanitizer is built around a SAX parser and HTML element and * attributes schemas. * * If the cssparser is loaded, inline styles are sanitized using the * css property and value schemas. Else they are remove during * sanitization. * * If it exists, uses parseCssDeclarations, sanitizeCssProperty, cssSchema * * @author mikesamuel@gmail.com * @author jasvir@gmail.com * \@requires html4, URI * \@overrides window * \@provides html, html_sanitize */ // The Turkish i seems to be a non-issue, but abort in case it is. if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } /** * \@namespace */ define(['lib/html4-defs'], function (html4) { var html = (function(html4) { // For closure compiler var parseCssDeclarations, sanitizeCssProperty, cssSchema; if ('undefined' !== typeof window) { parseCssDeclarations = window['parseCssDeclarations']; sanitizeCssProperty = window['sanitizeCssProperty']; cssSchema = window['cssSchema']; } // The keys of this object must be 'quoted' or JSCompiler will mangle them! // This is a partial list -- lookupEntity() uses the host browser's parser // (when available) to implement full entity lookup. // Note that entities are in general case-sensitive; the uppercase ones are // explicitly defined by HTML5 (presumably as compatibility). var ENTITIES = { 'lt': '<', 'LT': '<', 'gt': '>', 'GT': '>', 'amp': '&', 'AMP': '&', 'quot': '"', 'apos': '\'', 'nbsp': '\240' }; // Patterns for types of entity/character reference names. var decimalEscapeRe = /^#(\d+)$/; var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/; // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/; // Used as a hook to invoke the browser's entity parsing.