Modified: incubator/shindig/trunk/features/caja/html-sanitizer.js
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/features/caja/html-sanitizer.js?rev=635936&r1=635935&r2=635936&view=diff
==============================================================================
--- incubator/shindig/trunk/features/caja/html-sanitizer.js (original)
+++ incubator/shindig/trunk/features/caja/html-sanitizer.js Tue Mar 11 07:21:16 
2008
@@ -1,348 +1,26 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// [EMAIL PROTECTED]
+// Copyright (C) 2006 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 /**
- * Strips unsafe tags and attributes from html.
- * @param {String} html to sanitize
- * @param {Function} opt_urlXform : String -> String -- a transform to apply to
- *   url attribute values.
- * @param {Function} opt_nmTokenXform : String -> String -- a transform to 
apply
- *   to name attribute values.
- * @return {String} html
+ * @fileoverview
+ * Provide a factory that allows transformations on HTML.
+ * @author [EMAIL PROTECTED]
  */
-var html_sanitize = (function () {
-
-  // hide all the whitelists and other state so they can't be interfered with
-
-  /** token definitions. */
-  var TOK_ENTITY = /^&(?:\#[0-9]+|\#x[0-9a-f]+|\w+);/i;
-  var TOK_COMMENT = /^<!--[\s\S]*?-->/;
-  var TOK_TAG_BEGIN = /^<\/?[a-z][a-z0-9]*/i;
-  var TOK_TAG_END = /^\/?>/;
-  var TOK_ATTRIB = /^\w+(\s*=\s*(?:\"[^\"]*\"|\'[^\']*\'|[^>\"\'\s]*))?/;
-  var TOK_SPACE = /^\s+/;
-  var TOK_OTHER = /^[^&<]+/;
-  var TOK_CRUFT = /^[<&]/;
-  var TOK_IGNORABLE_CRUFT = /^[^\w\s>]+/;
-
-  /** the token definitions used when we are outside a tag. */
-  var TOKS_NOTTAG = [
-      TOK_ENTITY,
-      TOK_COMMENT,
-      TOK_TAG_BEGIN,
-      TOK_OTHER,
-      TOK_CRUFT
-  ];
-  /** token definitions used inside a tag. */
-  var TOKS_INTAG = [
-      TOK_SPACE,
-      TOK_ATTRIB,
-      TOK_TAG_END,
-      TOK_IGNORABLE_CRUFT
-  ];
-
-  /**
-   * split html into tokens using the regexps above.
-   * This also does some normalization of tokens, escaping specials that don't
-   * appear to be part of a larger token.
-   */
-  var lex = function (html) {
-    var tokens = [];
-    var inTag = false; // 0 outside tag, 1 inside tag
-    while (html) {
-      var m = null;
-      var tok = null;
-      if (!inTag) {
-        for (var i = 0; i < TOKS_NOTTAG.length; ++i) {
-          m = html.match(TOKS_NOTTAG[i]);
-          if (m) {
-            tok = TOKS_NOTTAG[i];
-            break;
-          }
-        }
-        if (tok === TOK_TAG_BEGIN) { inTag = true; }
-      } else {
-        for (var i = 0; i < TOKS_INTAG.length; ++i) {
-          m = html.match(TOKS_INTAG[i]);
-          if (m) {
-            tok = TOKS_INTAG[i];
-            break;
-          }
-        }
-        if (tok === TOK_TAG_END) { inTag = false; }
-      }
-      var tokstr = m[0];
-      html = html.substring(tokstr.length);
-      if (tok === TOK_CRUFT) {
-        tokstr = tokstr == '<' ? '&lt;' : '&amp;';
-      } else if (tok === TOK_OTHER) {
-        tokstr = tokstr.replace(/>/g, '&gt;');
-      } else if (tok == TOK_SPACE) {
-        tokstr = ' ';
-      } else if (tok === TOK_ATTRIB) {
-        tokstr = tokstr.replace(/^(\w+)\s*=\s*/, '$1=');
-      }
-      if (tok !== TOK_IGNORABLE_CRUFT && tok != TOK_COMMENT) {
-        tokens.push(tokstr, tok);
-      }
-    }
-    return tokens;
-  };
-
-  // whitelists of elements and attributes
 
-  /** element flags. */
-  var OPTIONAL_ENDTAG = 1,
-          BREAKS_FLOW = 2,
-                EMPTY = 4,
-               UNSAFE = 8;
-
-  /** attribute flags */
-  var SCRIPT_TYPE = 1,
-       STYLE_TYPE = 2,
-     NMTOKEN_TYPE = 4,
-         URI_TYPE = 8;
 
-  /**
-   * All the HTML4 elements.
-   * U - unsafe, E - empty, B - breaks flow, O - optional endtag
-   */
-  var ELEMENTS = {
-    A          : 0,
-    ABBR       : 0,
-    ACRONYM    : 0,
-    ADDRESS    : 0,
-    APPLET     : UNSAFE,
-    AREA       : EMPTY,
-    B          : 0,
-    BASE       : UNSAFE|EMPTY,
-    BASEFONT   : UNSAFE|EMPTY,
-    BDO        : 0,
-    BIG        : 0,
-    BLOCKQUOTE : BREAKS_FLOW,
-    BODY       : UNSAFE|OPTIONAL_ENDTAG,
-    BR         : EMPTY|BREAKS_FLOW,
-    BUTTON     : 0,
-    CAPTION    : 0,
-    CENTER     : BREAKS_FLOW,
-    CITE       : 0,
-    CODE       : 0,
-    COL        : EMPTY,
-    COLGROUP   : OPTIONAL_ENDTAG,
-    DD         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    DEL        : 0,
-    DFN        : 0,
-    DIR        : BREAKS_FLOW,
-    DIV        : BREAKS_FLOW,
-    DL         : BREAKS_FLOW,
-    DT         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    EM         : 0,
-    FIELDSET   : 0,
-    FONT       : 0,
-    FORM       : BREAKS_FLOW,
-    FRAME      : UNSAFE|EMPTY,
-    FRAMESET   : UNSAFE,
-    H1         : BREAKS_FLOW,
-    H2         : BREAKS_FLOW,
-    H3         : BREAKS_FLOW,
-    H4         : BREAKS_FLOW,
-    H5         : BREAKS_FLOW,
-    H6         : BREAKS_FLOW,
-    HEAD       : UNSAFE|OPTIONAL_ENDTAG|BREAKS_FLOW,
-    HR         : EMPTY|BREAKS_FLOW,
-    HTML       : UNSAFE|OPTIONAL_ENDTAG|BREAKS_FLOW,
-    I          : 0,
-    IFRAME     : UNSAFE,
-    IMG        : EMPTY,
-    INPUT      : EMPTY,
-    INS        : 0,
-    ISINDEX    : UNSAFE|EMPTY|BREAKS_FLOW,
-    KBD        : 0,
-    LABEL      : 0,
-    LEGEND     : 0,
-    LI         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    LINK       : UNSAFE|EMPTY,
-    MAP        : 0,
-    MENU       : BREAKS_FLOW,
-    META       : UNSAFE|EMPTY,
-    NOFRAMES   : UNSAFE|BREAKS_FLOW,
-    NOSCRIPT   : UNSAFE,
-    OBJECT     : UNSAFE,
-    OL         : BREAKS_FLOW,
-    OPTGROUP   : 0,
-    OPTION     : OPTIONAL_ENDTAG,
-    P          : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    PARAM      : UNSAFE|EMPTY,
-    PRE        : BREAKS_FLOW,
-    Q          : 0,
-    S          : 0,
-    SAMP       : 0,
-    SCRIPT     : UNSAFE,
-    SELECT     : 0,
-    SMALL      : 0,
-    SPAN       : 0,
-    STRIKE     : 0,
-    STRONG     : 0,
-    STYLE      : UNSAFE,
-    SUB        : 0,
-    SUP        : 0,
-    TABLE      : BREAKS_FLOW,
-    TBODY      : OPTIONAL_ENDTAG,
-    TD         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    TEXTAREA   : 0,
-    TFOOT      : OPTIONAL_ENDTAG,
-    TH         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    THEAD      : OPTIONAL_ENDTAG,
-    TITLE      : UNSAFE|BREAKS_FLOW,
-    TR         : OPTIONAL_ENDTAG|BREAKS_FLOW,
-    TT         : 0,
-    U          : 0,
-    UL         : BREAKS_FLOW,
-    VAR        : 0
-  };
-
-  /**
-   * All the HTML4 attributes
-   */
-  var ATTRIBS = {
-    ABBR            : 0,
-    ACCEPT          : 0,
-    'ACCEPT-CHARSET': 0,
-    ACCESSKEY       : 0,
-    ACTION          : URI_TYPE,
-    ALIGN           : 0,
-    ALINK           : 0,
-    ALT             : 0,
-    ARCHIVE         : URI_TYPE,
-    AXIS            : 0,
-    BACKGROUND      : URI_TYPE,
-    BGCOLOR         : 0,
-    BORDER          : 0,
-    CELLPADDING     : 0,
-    CELLSPACING     : 0,
-    CHAR            : 0,
-    CHAROFF         : 0,
-    CHARSET         : 0,
-    CHECKED         : 0,
-    CITE            : URI_TYPE,
-    CLASS           : NMTOKEN_TYPE,
-    CLASSID         : URI_TYPE,
-    CLEAR           : 0,
-    CODE            : 0,
-    CODEBASE        : URI_TYPE,
-    CODETYPE        : 0,
-    COLOR           : 0,
-    COLS            : 0,
-    COLSPAN         : 0,
-    COMPACT         : 0,
-    CONTENT         : 0,
-    COORDS          : 0,
-    DATA            : URI_TYPE,
-    DATETIME        : 0,
-    DECLARE         : 0,
-    DEFER           : 0,
-    DIR             : 0,
-    DISABLED        : 0,
-    ENCTYPE         : 0,
-    FACE            : 0,
-    FOR             : NMTOKEN_TYPE,
-    FRAME           : 0,
-    FRAMEBORDER     : 0,
-    HEADERS         : 0,
-    HEIGHT          : 0,
-    HREF            : URI_TYPE,
-    HREFLANG        : 0,
-    HSPACE          : 0,
-    'HTTP-EQUIV'    : 0,
-    ID              : NMTOKEN_TYPE,
-    ISMAP           : 0,
-    LABEL           : 0,
-    LANG            : 0,
-    LANGUAGE        : 0,
-    LINK            : 0,
-    LONGDESC        : URI_TYPE,
-    MARGINHEIGHT    : 0,
-    MARGINWIDTH     : 0,
-    MAXLENGTH       : 0,
-    MEDIA           : 0,
-    METHOD          : 0,
-    MULTIPLE        : 0,
-    NAME            : NMTOKEN_TYPE,  // but not really for inputs
-    NOHREF          : 0,
-    NORESIZE        : 0,
-    NOSHADE         : 0,
-    NOWRAP          : 0,
-    OBJECT          : 0,
-    ONBLUR          : SCRIPT_TYPE,
-    ONCHANGE        : SCRIPT_TYPE,
-    ONCLICK         : SCRIPT_TYPE,
-    ONDBLCLICK      : SCRIPT_TYPE,
-    ONFOCUS         : SCRIPT_TYPE,
-    ONKEYDOWN       : SCRIPT_TYPE,
-    ONKEYPRESS      : SCRIPT_TYPE,
-    ONKEYUP         : SCRIPT_TYPE,
-    ONLOAD          : SCRIPT_TYPE,
-    ONMOUSEDOWN     : SCRIPT_TYPE,
-    ONMOUSEMOVE     : SCRIPT_TYPE,
-    ONMOUSEOUT      : SCRIPT_TYPE,
-    ONMOUSEOVER     : SCRIPT_TYPE,
-    ONMOUSEUP       : SCRIPT_TYPE,
-    ONRESET         : SCRIPT_TYPE,
-    ONSELECT        : SCRIPT_TYPE,
-    ONSUBMIT        : SCRIPT_TYPE,
-    ONUNLOAD        : SCRIPT_TYPE,
-    PROFILE         : URI_TYPE,
-    PROMPT          : 0,
-    READONLY        : 0,
-    REL             : 0,
-    REV             : 0,
-    ROWS            : 0,
-    ROWSPAN         : 0,
-    RULES           : 0,
-    SCHEME          : 0,
-    SCOPE           : 0,
-    SCROLLING       : 0,
-    SELECTED        : 0,
-    SHAPE           : 0,
-    SIZE            : 0,
-    SPAN            : 0,
-    SRC             : URI_TYPE,
-    STANDBY         : 0,
-    START           : 0,
-    STYLE           : STYLE_TYPE,
-    SUMMARY         : 0,
-    TABINDEX        : 0,
-    TARGET          : 0,
-    TEXT            : 0,
-    TITLE           : 0,
-    TYPE            : 0,
-    USEMAP          : URI_TYPE,
-    VALIGN          : 0,
-    VALUE           : 0,
-    VALUETYPE       : 0,
-    VERSION         : 0,
-    VLINK           : 0,
-    VSPACE          : 0,
-    WIDTH           : 0
-  };
+/** @namespace */
+var html = (function () {
 
   var ENTITIES = {
     LT   : '<',
@@ -353,73 +31,325 @@
     APOS : '\''
   };
 
-  function escapeOneEntity(m) {
-    var name = m[1].toUpperCase();
-    if (ENTITIES.hasOwnProperty(s)) { return ENTITIES[name]; }
-    m = name.match(/^#(\d+)$/);
+  var decimalEscapeRe = /^#(\d+)$/;
+  var hexEscapeRe = /^#x([0-9A-F]+)$/;
+  function lookupEntity(name) {
+    name = name.toUpperCase();
+    if (ENTITIES.hasOwnProperty(name)) { return ENTITIES[name]; }
+    var m = name.match(decimalEscapeRe);
     if (m) {
       return String.fromCharCode(parseInt(m[1], 10));
-    } else if (!!(m = name.match(/^#x([0-9A-F]+)$/))) {
+    } else if (!!(m = name.match(hexEscapeRe))) {
       return String.fromCharCode(parseInt(m[1], 16));
     }
     return '';
   }
 
-  function unescapeEntities(s) {
-    return s.replace(/&(#\d+|#x[\da-f]+|\w+);/g, escapeOneEntity);
+  function decodeOneEntity(_, name) {
+    return lookupEntity(name);
   }
 
-  function unescapedValueForAttrib(s) {
-    var m = s.match(/=\s*([\"\']?)?(.*)\1/);
-    if (m) {
-      return unescapeEntities(m[2]);
-    } else {
-      return null;
-    }
+  var entityRe = /&(#\d+|#x[\da-f]+|\w+);/g;
+  function unescapeEntities(s) {
+    return s.replace(entityRe, decodeOneEntity);
   }
 
+  var ampRe = /&/g;
+  var looseAmpRe = /&([^a-z#]|#(?:[^0-9x]|x(?:[^0-9a-f]|$)|$)|$)/gi;
+  var ltRe = /</g;
+  var gtRe = />/g;
+  var quotRe = /\"/g;
+
   function escapeAttrib(s) {
-    return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
-      .replace(/\"/g, '&quot;');
+    return s.replace(ampRe, '&amp;').replace(ltRe, '&lt;').replace(gtRe, 
'&gt;')
+        .replace(quotRe, '&quot;');
+  }
+
+  /**
+   * Escape entities in RCDATA that can be escaped without changing the 
meaning.
+   */
+  function normalizeRCData(rcdata) {
+    return rcdata
+        .replace(looseAmpRe, '&amp;$1')
+        .replace(ltRe, '&lt;')
+        .replace(gtRe, '&gt;');
   }
 
-  /** actually does the sanitizing. */
-  return function html_sanitize(html, opt_urlXform, opt_nmTokenXform) {
-    var toks = lex(html);
-    var out = [];
-
-    var ignoring = false;
-    for (var i = 0; i < toks.length; ++i) {
-      var tok = toks[i], type = toks[++i];
-      //alert('tok=' + tok + ', type=' + type + ', ignoring=' + ignoring);
-      if (TOK_TAG_BEGIN === type) {
-        var name = tok.replace(/^[<\/]+/, '').toUpperCase();
-        ignoring = !ELEMENTS.hasOwnProperty(name) || (ELEMENTS[name] & UNSAFE);
-      } else if (TOK_ATTRIB === type && !ignoring) {
-        var name = tok.match(/\w+/)[0].toUpperCase();
-        if (!ATTRIBS.hasOwnProperty(name)) { continue; }
-        var flags = ATTRIBS[name];
-        if (flags & (SCRIPT_TYPE | STYLE_TYPE)) { continue; }
-        if (flags) {
-          // apply transforms
-          // unescape value, transform it.  skip if null, otherwise reescape.
-          var value = unescapedValueForAttrib(tok);
-          if (null == value) { continue; }
-          if ((flags & URI_TYPE) && opt_urlXform) {
-            value = opt_urlXform(value);
+
+  /** token definitions. */
+  var INSIDE_TAG_TOKEN = new RegExp(
+      // Don't capture space.
+      '^\\s*(?:'
+      // Capture an attribute name in group 1, and value in groups 2-4.
+      + ('(?:'
+         + '([a-z][a-z-]*)'
+         + ('(?:'
+            + '\\s*=\\s*'
+            + ('(?:'
+               + '\"([^\"]*)\"'
+               + '|\'([^\']*)\''
+               + '|([^>\"\'\\s]*)'
+               + ')'
+               )
+            + ')'
+            ) + '?'
+         + ')'
+         )
+      // End of tag captured in group 5.
+      + '|(/?>)'
+      // Don't capture cruft
+      + '|[^\\w\\s>]+)',
+      'i');
+
+  var OUTSIDE_TAG_TOKEN = new RegExp(
+      '^(?:'
+      // Entity captured in group 1.
+      + '&(\\#[0-9]+|\\#[x][0-9a-f]+|\\w+);'
+      // Comment, doctypes, and processing instructions not captured.
+      + '|<!--[\\s\\S]*?-->|<!\w[^>]*>|<\\?[^>*]*>'
+      // '/' captured in group 2 for close tags, and name captured in group 3.
+      + '|<(/)?([a-z][a-z0-9]*)'
+      // Text captured in group 4.
+      + '|([^<&]+)'
+      // Cruft captured in group 5.
+      + '|([<&]))',
+      'i');
+
+  /**
+   * Given a SAX-like event handler, produce a function that feeds those
+   * events and a parameter to the event handler.
+   *
+   * The event handler has the form:<pre>
+   * {
+   *   // Name is an upper-case HTML tag name.  Attribs is an array of
+   *   // alternating upper-case attribute names, and attribute values.  The
+   *   // attribs array is reused by the parser.  Param is the value passed to
+   *   // the saxParser.
+   *   startTag: function (name, attribs, param) { ... },
+   *   endTag:   function (name, param) { ... },
+   *   pcdata:   function (text, param) { ... },
+   *   rcdata:   function (text, param) { ... },
+   *   cdata:    function (text, param) { ... },
+   *   startDoc: function (param) { ... },
+   *   endDod:   function (param) { ... },
+   * }</pre>
+   *
+   * @param {Object} event handler.
+   * @return {Function} that takes a chunk of html and a parameter.
+   *   The parameter is passed on to the handler methods.
+   */
+  function makeSaxParser(handler) {
+    return function parse(htmlText, param) {
+      htmlText = String(htmlText);
+      var htmlUpper = null;
+
+      var inTag = false;  // True iff we're currently processing a tag.
+      var attribs = [];  // Accumulates attribute names and values.
+      var tagName;  // The name of the tag currently being processed.
+      var eflags;  // The element flags for the current tag.
+      var openTag;  // True if the current tag is an open tag.
+
+      handler.startDoc && handler.startDoc(param);
+
+      while (htmlText) {
+        var m = htmlText.match(inTag ? INSIDE_TAG_TOKEN : OUTSIDE_TAG_TOKEN);
+        htmlText = htmlText.substring(m[0].length);
+
+        if (inTag) {
+          if (m[1]) { // attribute
+            var attribName = m[1].toUpperCase();
+            var encodedValue = m[2] || m[3] || m[4];
+            var decodedValue;
+            if (encodedValue != null) {  // Matches null & undefined
+              decodedValue = unescapeEntities(encodedValue);
+            } else {
+              // Use name as value for valueless attribs, so
+              //   <input type=checkbox checked>
+              // gets attributes ['TYPE', 'checkbox', 'CHECKED', 'CHECKED']
+              decodedValue = attribName;
+            }
+            attribs.push(attribName, decodedValue);
+          } else if (m[5]) {
+            if (eflags !== undefined) {  // False if not in whitelist.
+              if (openTag) {
+                handler.startTag && handler.startTag(tagName, attribs, param);
+              } else {
+                handler.endTag && handler.endTag(tagName, param);
+              }
+            }
+
+            if (openTag
+                && (eflags & (html4.eflags.CDATA | html4.eflags.RCDATA))) {
+              if (htmlUpper === null) {
+                htmlUpper = htmlText.toUpperCase();
+              } else {
+                htmlUpper = htmlUpper.substring(
+                    htmlUpper.length - htmlText.length);
+              }
+              var dataEnd = htmlUpper.indexOf('</' + tagName);
+              if (dataEnd < 0) { dataEnd = htmlText.length; }
+              if (eflags & html4.eflags.CDATA) {
+                handler.cdata
+                  && handler.cdata(htmlText.substring(0, dataEnd), param);
+              } else if (handler.rcdata) {
+                var rcdata = htmlText.substring(0, dataEnd);
+                handler.rcdata(
+                    normalizeRCData(htmlText.substring(0, dataEnd)), param);
+              }
+              htmlText = htmlText.substring(dataEnd);
+            }
+
+            tagName = eflags = openTag = undefined;
+            attribs.length = 0;
+            inTag = false;
           }
-          if ((flags & NMTOKEN_TYPE) && opt_nmTokenXform) {
-            value = opt_nmTokenXform(value);
+        } else {
+          if (m[1]) {  // Entity
+            handler.pcdata && handler.pcdata(m[0], param);
+          } else if (m[3]) {  // Tag
+            openTag = !m[2];
+            inTag = true;
+            tagName = m[3].toUpperCase();
+            eflags = html4.ELEMENTS.hasOwnProperty(tagName)
+                ? html4.ELEMENTS[tagName] : undefined;
+          } else if (m[4]) {  // Text
+            handler.pcdata && handler.pcdata(m[4], param);
+          } else if (m[5]) {  // Cruft
+            text = m[5] === '&' ? '&amp;' : '&lt;';
+            handler.pcdata && handler.pcdata(text, param);
           }
-          if (null == value) { continue; }
-          tok = name + '="' + escapeAttrib(value) + '"';
         }
       }
-      if (!ignoring) { out.push(tok); }
-      // TODO: some way of enforcing attribute constraints
-      if (TOK_TAG_END === type) { ignoring = false; }
+
+      handler.endDoc && handler.endDoc(param);
     }
-    return out.join('');
-  };
+  }
 
+  return {
+    normalizeRCData: normalizeRCData,
+    escapeAttrib: escapeAttrib,
+    unescapeEntities: unescapeEntities,
+    makeSaxParser: makeSaxParser
+  };
 })();
+
+/**
+ * Returns a function that strips unsafe tags and attributes from html.
+ * @param {Function} sanitizeAttributes
+ *     from tagName, attribs[]) to null or a sanitized attribute array.
+ *     The attribs array can be arbitrarily modified, but the same array
+ *     instance is reused, so should not be held.
+ * @return {Function} from html to sanitized html
+ */
+html.makeHtmlSanitizer = function (sanitizeAttributes) {
+  var out = [];
+  var stack = [];
+  var ignoring = false;
+  return html.makeSaxParser({
+        startDoc: function (_) {
+          stack = [];
+          ignoring = false;
+        },
+        startTag: function (tagName, attribs, out) {
+          if (ignoring) { return; }
+          if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
+          var eflags = html4.ELEMENTS[tagName];
+          if (eflags & html4.eflags.UNSAFE) {
+            ignoring = !(eflags & html4.eflags.EMPTY);
+            return;
+          }
+          attribs = sanitizeAttributes(tagName, attribs);
+          if (attribs) {
+            if (!(eflags & (html4.eflags.OPTIONAL_ENDTAG|html4.eflags.EMPTY))) 
{
+              stack.push(tagName);
+            }
+
+            out.push('<', tagName);
+            for (var i = 0, n = attribs.length; i < n; i += 2) {
+              var attribName = attribs[i],
+                  value = attribs[i + 1];
+              if (value != null) {  // Skip null or undefined
+                out.push(' ', attribName, '="', html.escapeAttrib(value), '"');
+              }
+            }
+            out.push('>');
+          }
+        },
+        endTag: function (tagName, out) {
+          if (ignoring) {
+            ignoring = false;
+            return;
+          }
+          if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; }
+          var eflags = html4.ELEMENTS[tagName];
+          if (!(eflags & (html4.eflags.UNSAFE|html4.eflags.EMPTY))) {
+            var index;
+            for (index = stack.length; --index >= 0;) {
+              if (stack[index] === tagName) { break; }
+            }
+            if (index < 0) { return; }  // Not opened.
+            for (var i = index; --i > index;) {
+              out.push('</', stack[i], '>');
+            }
+            stack.length = index;
+            out.push('</', tagName, '>');
+          }
+        },
+        pcdata: function (text, out) {
+          if (!ignoring) { out.push(text); }
+        },
+        rcdata: function (text, out) {
+          if (!ignoring) { out.push(text); }
+        },
+        cdata: function (text, out) {
+          if (!ignoring) { out.push(text); }
+        },
+        endDoc: function (out) {
+          for (var i = stack.length; --i >= 0;) {
+            out.push('</', stack[i], '>');
+          }
+          stack.length = 0;
+        }
+      });
+}
+
+
+/**
+ * Strips unsafe tags and attributes from html.
+ * @param {string} html to sanitize
+ * @param {Function} opt_urlXform : string -> string? -- a transform to apply 
to
+ *     url attribute values.
+ * @param {Function} opt_nmTokenXform : string -> string? -- a transform to
+ *     apply to names, ids, and classes.
+ * @return {string} html
+ */
+function html_sanitize(htmlText, opt_urlPolicy, opt_nmTokenPolicy) {
+  var out = [];
+  html.makeHtmlSanitizer(
+      function sanitizeAttribs(tagName, attribs) {
+        for (var i = 0; i < attribs.length; i += 2) {
+          var attribName = attribs[i];
+          var value = attribs[i + 1];
+          if (html4.ATTRIBS.hasOwnProperty(attribName)) {
+            switch (html4.ATTRIBS[attribName]) {
+              case html4.atype.SCRIPT:
+              case html4.atype.STYLE:
+                value = null;
+              case html4.atype.IDREF:
+              case html4.atype.NAME:
+              case html4.atype.NMTOKENS:
+                value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+                break;
+              case html4.atype.URI:
+                value = opt_urlPolicy && opt_urlPolicy(value);
+                break;
+            }
+          } else {
+            value = null;
+          }
+          attribs[i + 1] = value;
+        }
+        return attribs;
+      })(htmlText, out);
+  return out.join('');
+}

Added: incubator/shindig/trunk/features/caja/html4-defs.js
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/features/caja/html4-defs.js?rev=635936&view=auto
==============================================================================
--- incubator/shindig/trunk/features/caja/html4-defs.js (added)
+++ incubator/shindig/trunk/features/caja/html4-defs.js Tue Mar 11 07:21:16 2008
@@ -0,0 +1,290 @@
+// Copyright (C) 2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+/**
+ * @fileoverview
+ * Whitelists of HTML elements and attributes.
+ * 
+ * @author [EMAIL PROTECTED]
+ */
+
+
+/** @namespace */
+var html4 = {};
+
+/**
+ * HTML element flags.
+ * @enum {number}
+ */
+html4.eflags = {
+  OPTIONAL_ENDTAG: 1,
+  BREAKS_FLOW: 2,
+  EMPTY: 4,
+  NAVIGATES: 8,
+  CDATA: 0x10,
+  RCDATA: 0x20,
+  UNSAFE: 0x40
+};
+
+/**
+ * HTML attribute flags.
+ * @enum {number}
+ */
+html4.atype = {
+  SCRIPT: 1,
+  STYLE: 2,
+  IDREF: 3,
+  NAME: 4,
+  NMTOKENS: 5,
+  URI: 6,
+  FRAME: 7
+};
+
+/**
+ * Maps HTML4 element names to flag bitsets.
+ */
+html4.ELEMENTS = {
+  A          : html4.eflags.NAVIGATES,
+  ABBR       : 0,
+  ACRONYM    : 0,
+  ADDRESS    : 0,
+  APPLET     : html4.eflags.UNSAFE,
+  AREA       : html4.eflags.EMPTY | html4.eflags.NAVIGATES,
+  B          : 0,
+  // Changes the meaning of URIs
+  BASE       : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  // Affects global styles.
+  BASEFONT   : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  BDO        : 0,
+  BIG        : 0,
+  BLOCKQUOTE : html4.eflags.BREAKS_FLOW,
+  // Attributes merged into global body.
+  BODY       : html4.eflags.UNSAFE | html4.eflags.OPTIONAL_ENDTAG,
+  BR         : html4.eflags.EMPTY | html4.eflags.BREAKS_FLOW,
+  BUTTON     : 0,
+  CAPTION    : 0,
+  CENTER     : html4.eflags.BREAKS_FLOW,
+  CITE       : 0,
+  CODE       : 0,
+  COL        : html4.eflags.EMPTY,
+  COLGROUP   : html4.eflags.OPTIONAL_ENDTAG,
+  DD         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  DEL        : 0,
+  DFN        : 0,
+  DIR        : html4.eflags.BREAKS_FLOW,
+  DIV        : html4.eflags.BREAKS_FLOW,
+  DL         : html4.eflags.BREAKS_FLOW,
+  DT         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  EM         : 0,
+  FIELDSET   : 0,
+  FONT       : 0,
+  FORM       : html4.eflags.BREAKS_FLOW | html4.eflags.NAVIGATES,
+  FRAME      : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  // Attributes merged into global frameset.
+  FRAMESET   : html4.eflags.UNSAFE,
+  H1         : html4.eflags.BREAKS_FLOW,
+  H2         : html4.eflags.BREAKS_FLOW,
+  H3         : html4.eflags.BREAKS_FLOW,
+  H4         : html4.eflags.BREAKS_FLOW,
+  H5         : html4.eflags.BREAKS_FLOW,
+  H6         : html4.eflags.BREAKS_FLOW,
+  HEAD       : (html4.eflags.UNSAFE | html4.eflags.OPTIONAL_ENDTAG
+                | html4.eflags.BREAKS_FLOW),
+  HR         : html4.eflags.EMPTY | html4.eflags.BREAKS_FLOW,
+  HTML       : (html4.eflags.UNSAFE | html4.eflags.OPTIONAL_ENDTAG
+                | html4.eflags.BREAKS_FLOW),
+  I          : 0,
+  IFRAME     : html4.eflags.UNSAFE,
+  IMG        : html4.eflags.EMPTY,
+  INPUT      : html4.eflags.EMPTY,
+  INS        : 0,
+  ISINDEX    : (html4.eflags.UNSAFE | html4.eflags.EMPTY
+                | html4.eflags.BREAKS_FLOW | html4.eflags.NAVIGATES),
+  KBD        : 0,
+  LABEL      : 0,
+  LEGEND     : 0,
+  LI         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  // Can load global styles.
+  LINK       : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  MAP        : 0,
+  MENU       : html4.eflags.BREAKS_FLOW,
+  // Can override document headers and encoding, or cause navigation.
+  META       : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  // Ambiguous tokenization.  Content is CDATA/PCDATA depending on browser.
+  NOFRAMES   : html4.eflags.UNSAFE | html4.eflags.BREAKS_FLOW,
+  // Ambiguous tokenization.  Content is CDATA/PCDATA depending on browser.
+  NOSCRIPT   : html4.eflags.UNSAFE,
+  OBJECT     : html4.eflags.UNSAFE,
+  OL         : html4.eflags.BREAKS_FLOW,
+  OPTGROUP   : 0,
+  OPTION     : html4.eflags.OPTIONAL_ENDTAG,
+  P          : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  PARAM      : html4.eflags.UNSAFE | html4.eflags.EMPTY,
+  PLAINTEXT  : (html4.eflags.OPTIONAL_ENDTAG | html4.eflags.UNSAFE
+                | html4.eflags.CDATA),
+  PRE        : html4.eflags.BREAKS_FLOW,
+  Q          : 0,
+  S          : 0,
+  SAMP       : 0,
+  SCRIPT     : html4.eflags.UNSAFE | html4.eflags.CDATA,
+  SELECT     : 0,
+  SMALL      : 0,
+  SPAN       : 0,
+  STRIKE     : 0,
+  STRONG     : 0,
+  STYLE      : html4.eflags.UNSAFE | html4.eflags.CDATA,
+  SUB        : 0,
+  SUP        : 0,
+  TABLE      : html4.eflags.BREAKS_FLOW,
+  TBODY      : html4.eflags.OPTIONAL_ENDTAG,
+  TD         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  TEXTAREA   : html4.eflags.RCDATA,
+  TFOOT      : html4.eflags.OPTIONAL_ENDTAG,
+  TH         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  THEAD      : html4.eflags.OPTIONAL_ENDTAG,
+  TITLE      : (html4.eflags.UNSAFE | html4.eflags.BREAKS_FLOW
+                | html4.eflags.RCDATA),
+  TR         : html4.eflags.OPTIONAL_ENDTAG | html4.eflags.BREAKS_FLOW,
+  TT         : 0,
+  U          : 0,
+  UL         : html4.eflags.BREAKS_FLOW,
+  VAR        : 0,
+  XMP        : html4.eflags.CDATA
+};
+
+/**
+ * Maps HTML4 attribute names to flag bitsets.
+ */
+html4.ATTRIBS = {
+  ABBR            : 0,
+  ACCEPT          : 0,
+  'ACCEPT-CHARSET': 0,
+  ACCESSKEY       : 0,
+  ACTION          : html4.atype.URI,
+  ALIGN           : 0,
+  ALINK           : 0,
+  ALT             : 0,
+  ARCHIVE         : html4.atype.URI,
+  AXIS            : 0,
+  BACKGROUND      : html4.atype.URI,
+  BGCOLOR         : 0,
+  BORDER          : 0,
+  CELLPADDING     : 0,
+  CELLSPACING     : 0,
+  CHAR            : 0,
+  CHAROFF         : 0,
+  CHARSET         : 0,
+  CHECKED         : 0,
+  CITE            : html4.atype.URI,
+  CLASS           : html4.atype.NMTOKENS,
+  CLASSID         : html4.atype.URI,
+  CLEAR           : 0,
+  CODE            : 0,
+  CODEBASE        : html4.atype.URI,
+  CODETYPE        : 0,
+  COLOR           : 0,
+  COLS            : 0,
+  COLSPAN         : 0,
+  COMPACT         : 0,
+  CONTENT         : 0,
+  COORDS          : 0,
+  DATA            : html4.atype.URI,
+  DATETIME        : 0,
+  DECLARE         : 0,
+  DEFER           : 0,
+  DIR             : 0,
+  DISABLED        : 0,
+  ENCTYPE         : 0,
+  FACE            : 0,
+  FOR             : html4.atype.IDREF,
+  FRAME           : 0,
+  FRAMEBORDER     : 0,
+  HEADERS         : 0,
+  HEIGHT          : 0,
+  HREF            : html4.atype.URI,
+  HREFLANG        : 0,
+  HSPACE          : 0,
+  //'HTTP-EQUIV'    : 0,   // unsafe
+  ID              : html4.atype.IDREF,
+  ISMAP           : 0,
+  LABEL           : 0,
+  LANG            : 0,
+  LANGUAGE        : 0,
+  LINK            : 0,
+  LONGDESC        : html4.atype.URI,
+  MARGINHEIGHT    : 0,
+  MARGINWIDTH     : 0,
+  MAXLENGTH       : 0,
+  MEDIA           : 0,
+  METHOD          : 0,
+  MULTIPLE        : 0,
+  NAME            : html4.atype.NAME,
+  NOHREF          : 0,
+  NORESIZE        : 0,
+  NOSHADE         : 0,
+  NOWRAP          : 0,
+  OBJECT          : 0,
+  ONBLUR          : html4.atype.SCRIPT,
+  ONCHANGE        : html4.atype.SCRIPT,
+  ONCLICK         : html4.atype.SCRIPT,
+  ONDBLCLICK      : html4.atype.SCRIPT,
+  ONFOCUS         : html4.atype.SCRIPT,
+  ONKEYDOWN       : html4.atype.SCRIPT,
+  ONKEYPRESS      : html4.atype.SCRIPT,
+  ONKEYUP         : html4.atype.SCRIPT,
+  ONLOAD          : html4.atype.SCRIPT,
+  ONMOUSEDOWN     : html4.atype.SCRIPT,
+  ONMOUSEMOVE     : html4.atype.SCRIPT,
+  ONMOUSEOUT      : html4.atype.SCRIPT,
+  ONMOUSEOVER     : html4.atype.SCRIPT,
+  ONMOUSEUP       : html4.atype.SCRIPT,
+  ONRESET         : html4.atype.SCRIPT,
+  ONSELECT        : html4.atype.SCRIPT,
+  ONSUBMIT        : html4.atype.SCRIPT,
+  ONUNLOAD        : html4.atype.SCRIPT,
+  PROFILE         : html4.atype.URI,
+  PROMPT          : 0,
+  READONLY        : 0,
+  REL             : 0,
+  REV             : 0,
+  ROWS            : 0,
+  ROWSPAN         : 0,
+  RULES           : 0,
+  SCHEME          : 0,
+  SCOPE           : 0,
+  SCROLLING       : 0,
+  SELECTED        : 0,
+  SHAPE           : 0,
+  SIZE            : 0,
+  SPAN            : 0,
+  SRC             : html4.atype.URI,
+  STANDBY         : 0,
+  START           : 0,
+  STYLE           : html4.atype.STYLE,
+  SUMMARY         : 0,
+  TABINDEX        : 0,
+  TARGET          : html4.atype.FRAME,
+  TEXT            : 0,
+  TITLE           : 0,
+  TYPE            : 0,
+  USEMAP          : html4.atype.URI,
+  VALIGN          : 0,
+  VALUE           : 0,
+  VALUETYPE       : 0,
+  VERSION         : 0,
+  VLINK           : 0,
+  VSPACE          : 0,
+  WIDTH           : 0
+};

Added: incubator/shindig/trunk/features/caja/unicode.js
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/features/caja/unicode.js?rev=635936&view=auto
==============================================================================
--- incubator/shindig/trunk/features/caja/unicode.js (added)
+++ incubator/shindig/trunk/features/caja/unicode.js Tue Mar 11 07:21:16 2008
@@ -0,0 +1,92 @@
+// Copyright (C) 2008 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+/**
+ * @fileoverview
+ * Unicode character classes.
+ *
+ * @see http://www.w3.org/TR/2000/REC-xml-20001006#CharClasses
+ * @author [EMAIL PROTECTED]
+ */
+
+
+/** @namespace */
+var unicode = {};
+
+unicode.BASE_CHAR = (
+    '\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF'
+    + '\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3'
+    + '\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1'
+    + '\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6'
+    + '\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F'
+    + '\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC'
+    + '\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559'
+    + '\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A'
+    + '\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5'
+    + '\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C'
+    + '\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9'
+    + '\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10'
+    + '\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39'
+    + '\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91'
+    + '\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0'
+    + '\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33'
+    + '\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A'
+    + '\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F'
+    + '\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C'
+    + '\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61'
+    + '\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9'
+    + '\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28'
+    + '\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33'
+    + '\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D'
+    + '\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB'
+    + '\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47'
+    + '\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103'
+    + '\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140'
+    + '\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165'
+    + '\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB'
+    + '\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9'
+    + '\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45'
+    + '\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D'
+    + '\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC'
+    + '\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC'
+    + '\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA'
+    + '\u3105-\u312C\uAC00-\uD7A3');
+unicode.IDEOGRAPHIC = '\u4E00-\u9FA5\u3007\u3021-\u3029';
+unicode.LETTER = unicode.BASE_CHAR + unicode.IDEOGRAPHIC;
+unicode.COMBINING_CHAR = (
+    '\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9'
+    + '\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670'
+    + '\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED'
+    + '\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963'
+    + '\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8'
+    + '\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F'
+    + '\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83'
+    + '\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C'
+    + '\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83'
+    + '\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03'
+    + '\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83'
+    + '\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03'
+    + '\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A'
+    + '\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD'
+    + '\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84'
+    + '\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9'
+    + '\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A'),
+unicode.DIGIT = (
+    '\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF'
+    + '\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F'
+    + '\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29');
+unicode.EXTENDER = (
+    '\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035'
+    + '\u309D-\u309E\u30FC-\u30FE');

Modified: incubator/shindig/trunk/features/opensocial-reference/container.js
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/features/opensocial-reference/container.js?rev=635936&r1=635935&r2=635936&view=diff
==============================================================================
--- incubator/shindig/trunk/features/opensocial-reference/container.js 
(original)
+++ incubator/shindig/trunk/features/opensocial-reference/container.js Tue Mar 
11 07:21:16 2008
@@ -431,11 +431,26 @@
 
 
 /**
- * Caja Support
+ * Caja Support.  See features/caja/*.js
  */
 var caja;
 var ___;
 var html_sanitize;
+var attachDocumentStub;
+var plugin_dispatchEvent___;
+// See features/caja/domita.js for uriCallback's contract.
+var uriCallback = {
+  rewrite: function rewrite(uri, mimeTypes) {
+    uri = String(uri);
+    // By default, only allow references to anchors.
+    if (/^#/.test(uri)) {
+      return '#' + encodeURIComponent(decodeUriComponent(uri.substring(1)));
+    }
+    // This callback can be replaced with one that passes the URL through
+    // a proxy that checks the mimetype.
+    return null;
+  }
+};
 
 /**
  * Enable Caja support
@@ -450,6 +465,7 @@
   ___ = window["___"];
   caja = window["caja"];
   html_sanitize = window["html_sanitize"];
+  attachDocumentStub = window["attachDocumentStub"];
 
   var outers = caja.copy(___.sharedOuters);
 
@@ -460,31 +476,7 @@
     outers._IG_RegisterOnloadHandler = ___.simpleFunc(igOnload);
   }
 
-  outers.emitHtml___ = function emitHtml(var_args) {
-    var html = Array.prototype.slice.call(arguments, 0).join('');
-    document.write(html);
-  };
-
-  outers.document = function() {};
-  outers.document.getElementById = function(id) {
-    var element = document.getElementById("DOM-PREFIX-" + id);
-    if (element !== null) {
-      ___.useSetHandler(element, 'innerHTML', function(html) {
-        var temp = html_sanitize(html, null,
-            function (nmtokens) {
-              var tokens = nmtokens.split(/\s+/g);
-              for (var i = 0; i < tokens.length; ++i) {
-                if (tokens[i]) { tokens[i] = 'DOM-PREFIX-' + tokens[i]; }
-              }
-              return tokens.join(' ');
-            });
-        return this.innerHTML = temp;
-      });
-    }
-    return element;
-  };
-
-  ___.allowCall(outers.document, 'getElementById');
+  attachDocumentStub('pre-', uriCallback, outers);
 
   // Temporarily adding some gadgets calls to the opensocial code.
   // This should move into the gadgets js code very soon.
@@ -492,33 +484,19 @@
 
   // Adding all of the available opensocial calls as defined in the spec
   outers.opensocial = opensocial;
-  ___.allowCall(outers.opensocial, 'requestSendMessage');
-  ___.allowCall(outers.opensocial, 'requestShareApp');
-  ___.allowCall(outers.opensocial, 'requestCreateActivity');
-  ___.allowCall(outers.opensocial, 'hasPermission');
-  ___.allowCall(outers.opensocial, 'requestPermission');
-  ___.allowCall(outers.opensocial, 'getEnvironment');
-  ___.allowCall(outers.opensocial, 'newDataRequest');
-  ___.allowCall(outers.opensocial, 'newActivity');
-  ___.allowCall(outers.opensocial, 'newActivityMediaItem');
-  ___.allowCall(outers.opensocial, 'newMessage');
-
-  ___.allowCall(opensocial.Collection.prototype, 'getById');
-  ___.allowCall(opensocial.Collection.prototype, 'size');
-  ___.allowCall(opensocial.Collection.prototype, 'each');
-  ___.allowCall(opensocial.Collection.prototype, 'asArray');
-  ___.allowCall(opensocial.Collection.prototype, 'getTotalSize');
-  ___.allowCall(opensocial.Collection.prototype, 'getOffset');
-
-  // TODO(doll): Call caja method to support all array calls once it exists
-  ___.allowCall(Array.prototype, 'push');
-  ___.allowCall(Array.prototype, 'sort');
-
-  ___.allowCall(opensocial.Person.prototype, 'getId');
-  ___.allowCall(opensocial.Person.prototype, 'getDisplayName');
-  ___.allowCall(opensocial.Person.prototype, 'getField');
-  ___.allowCall(opensocial.Person.prototype, 'isViewer');
-  ___.allowCall(opensocial.Person.prototype, 'isOwner');
+  ___.all2(
+      ___.allowCall, outers.opensocial,
+      ['requestSendMessage', 'requestShareApp', 'requestCreateActivity',
+       'hasPermission', 'requestPermission', 'getEnvironment', 
'newDataRequest',
+       'newActivity', 'newActivityMediaItem', 'newMessage']);
+
+  ___.all2(
+      ___.allowCall, opensocial.Collection.prototype,
+      ['getById', 'size', 'each', 'asArray', 'getTotalSize', 'getOffset']);
+
+  ___.all2(
+      ___.allowCall, opensocial.Person.prototype,
+      ['getId', 'getDisplayName', 'getField', 'isViewer', 'isOwner']);
 
   ___.allowCall(opensocial.Address.prototype, 'getField');
   ___.allowCall(opensocial.BodyType.prototype, 'getField');
@@ -541,14 +519,11 @@
   ___.allowCall(opensocial.DataResponse.prototype, 'hadError');
   ___.allowCall(opensocial.DataResponse.prototype, 'get');
 
-  ___.allowCall(opensocial.DataRequest.prototype, 'getRequestObjects');
-  ___.allowCall(opensocial.DataRequest.prototype, 'add');
-  ___.allowCall(opensocial.DataRequest.prototype, 'send');
-  ___.allowCall(opensocial.DataRequest.prototype, 'newFetchPersonRequest');
-  ___.allowCall(opensocial.DataRequest.prototype, 'newFetchPeopleRequest');
-  ___.allowCall(opensocial.DataRequest.prototype, 
'newFetchPersonAppDataRequest');
-  ___.allowCall(opensocial.DataRequest.prototype, 
'newUpdatePersonAppDataRequest');
-  ___.allowCall(opensocial.DataRequest.prototype, 'newFetchActivitiesRequest');
+  ___.all2(
+      ___.allowCall, opensocial.DataRequest.prototype,
+      ['getRequestObjects', 'add', 'send', 'newFetchPersonRequest',
+       'newFetchPeopleRequest', 'newFetchPersonAppDataRequest',
+       'newUpdatePersonAppDataRequest', 'newFetchActivitiesRequest']);
 
   ___.allowCall(opensocial.Environment.prototype, 'getDomain');
   ___.allowCall(opensocial.Environment.prototype, 'supportsField');
@@ -566,16 +541,3 @@
 
   ___.setNewModuleHandler(moduleHandler);
 };
-
-/**
- * Default taming is to return obj itself. Depending on
- * other taming decisions, it may be more appropriate to
- * return an interposed wrapper.
- * @private
- */
-function plugin_tamed(obj) { return obj; }
-
-function plugin_dispatchEvent___(thisNode, event, pluginId, handlerName) {
-  return ___.getOuters(pluginId)[handlerName](plugin_tamed(thisNode),
-      plugin_tamed(event));
-}


Reply via email to