Author: awiner
Date: Tue Mar 17 18:54:19 2009
New Revision: 755349

URL: http://svn.apache.org/viewvc?rev=755349&view=rev
Log:
Add SanitizedRenderingContentRewriter.bypassSanitization() to allow trusted 
rewriters (esp. template tag handlers) to generate content that would otherwise 
be rejected (target attribute on links, OBJECT tags for flash, etc.)

Fix SocialMarkupHtmlParser to generate comment nodes so sanitization can strip 
them

Update sanitizer test to use the default ParseModule (which is 
SocialMarkupHtmlParser now)

Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Tue Mar 17 18:54:19 2009
@@ -272,7 +272,7 @@
     /** Create an Element in the DOM for an important element */
     private Element startImportantElement(QName qName, XMLAttributes 
xmlAttributes) {
       if (builder.length() > 0) {
-        
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
+        appendChild(document.createTextNode(builder.toString()));
         builder.setLength(0);
       }
 
@@ -292,7 +292,7 @@
           element.setAttribute(xmlAttributes.getLocalName(i) , 
xmlAttributes.getValue(i));
         }
       }
-      elementStack.peek().appendChild(element);
+      appendChild(element);
       return element;
     }
 
@@ -377,7 +377,7 @@
 
     public void endDocument(Augmentations augs) throws XNIException {
       if (builder.length() > 0) {
-        
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
+        appendChild(document.createTextNode(builder.toString()));
         builder.setLength(0);
       }
       elementStack.pop();
@@ -389,5 +389,9 @@
     public XMLDocumentSource getDocumentSource() {
       return null;
     }
+    
+    protected final void appendChild(Node node) {
+      elementStack.peek().appendChild(node);
+    }
   }
 }

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
 Tue Mar 17 18:54:19 2009
@@ -29,6 +29,7 @@
 import org.cyberneko.html.HTMLConfiguration;
 import org.cyberneko.html.HTMLScanner;
 import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Node;
 
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
@@ -99,6 +100,15 @@
       super.endElement(name, augs);
     }
 
+    
+    @Override
+    public void comment(XMLString text, Augmentations augs) throws 
XNIException {
+      // Add comments as comment nodes - needed to support sanitization
+      // of SocialMarkup-parsed content
+      Node comment = getDocument().createComment(new String(text.ch, 
text.offset, text.length)); 
+      appendChild(comment);
+    }
+
     @Override
     public void startElement(QName name, XMLAttributes xmlAttributes, 
Augmentations augs)
         throws XNIException {

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
 Tue Mar 17 18:54:19 2009
@@ -76,19 +76,29 @@
 
   private static final Set<String> URI_ATTRIBUTES = ImmutableSet.of("href", 
"src");
 
-  // Attributes to forcibly rewrite and require an image mime type
+  /** Key stored as element user-data to bypass sanitization */
+  private static final String BYPASS_SANITIZATION_KEY = 
"shindig.bypassSanitization";
+  
+  /** Attributes to forcibly rewrite and require an image mime type */
   private static final Map<String, ImmutableSet<String>> 
PROXY_IMAGE_ATTRIBUTES =
       ImmutableMap.of("img", ImmutableSet.of("src"));
 
   /**
-   * Is the Gadget to be rendered sanitized
-   * @param gadget
-   * @return
+   * Is the Gadget to be rendered sanitized?
+   * @return true if sanitization will be enabled
    */
   public static boolean isSanitizedRenderingRequest(Gadget gadget) {
     return ("1".equals(gadget.getContext().getParameter("sanitize")));
   }
-
+  
+  /**
+   * Marks that an element - and all its attributes and children - are
+   * trusted content. 
+   */
+  public static void bypassSanitization(Element element) {
+    element.setUserData(BYPASS_SANITIZATION_KEY, true, null);
+  }
+  
   private final Set<String> allowedTags;
   private final Set<String> allowedAttributes;
   private final CajaCssSanitizer cssSanitizer;
@@ -238,7 +248,9 @@
         case Node.ELEMENT_NODE:
         case Node.DOCUMENT_NODE:
           Element element = (Element) node;
-          if (allowedTags.contains(element.getTagName().toLowerCase())) {
+          if (canBypassSanitization(element)) {
+            return;
+          } else if (allowedTags.contains(element.getTagName().toLowerCase())) 
{
             // TODO - Add special case for stylesheet LINK nodes.
             // Special case handling for style nodes
             if (element.getTagName().equalsIgnoreCase("style")) {
@@ -301,6 +313,10 @@
     return list;
   }
 
+  private static boolean canBypassSanitization(Element element) {
+    return (element.getUserData(BYPASS_SANITIZATION_KEY) != null);
+  }
+
   /** Convert a NamedNodeMap to a list for easy and safe operations */
   private static List<Node> toList(NodeList nodes) {
     List<Node> list = new ArrayList<Node>(nodes.getLength());

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
 Tue Mar 17 18:54:19 2009
@@ -18,6 +18,9 @@
  */
 package org.apache.shindig.gadgets.render;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.shindig.common.PropertiesModule;
 import org.apache.shindig.common.uri.Uri;
@@ -27,28 +30,18 @@
 import org.apache.shindig.gadgets.http.HttpResponse;
 import org.apache.shindig.gadgets.http.HttpResponseBuilder;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.ParseModule;
 import org.apache.shindig.gadgets.parse.caja.CajaCssParser;
 import org.apache.shindig.gadgets.parse.caja.CajaCssSanitizer;
-import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
 import org.apache.shindig.gadgets.rewrite.ContentRewriter;
 import org.apache.shindig.gadgets.rewrite.ContentRewriterFeatureFactory;
 import org.apache.shindig.gadgets.rewrite.MutableContent;
 import org.apache.shindig.gadgets.servlet.ProxyBase;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
-
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-import com.google.inject.AbstractModule;
-import com.google.inject.Guice;
-import com.google.inject.Injector;
-import com.google.inject.Provider;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
 import org.junit.Before;
 import org.junit.Test;
-import org.w3c.dom.DOMImplementation;
-import org.w3c.dom.bootstrap.DOMImplementationRegistry;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
 
 import java.util.Collections;
 import java.util.HashSet;
@@ -56,6 +49,11 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+
 public class SanitizedRenderingContentRewriterTest {
   private static final Set<String> DEFAULT_TAGS = ImmutableSet.of("html", 
"head", "body");
   private static final Pattern BODY_REGEX = 
Pattern.compile(".*<body>(.*)</body>.*");
@@ -77,7 +75,7 @@
 
   @Before
   public void setUp() throws Exception {
-    Injector injector = Guice.createInjector(new TestParseModule(), new 
PropertiesModule());
+    Injector injector = Guice.createInjector(new ParseModule(), new 
PropertiesModule());
     parser = injector.getInstance(GadgetHtmlParser.class);
     gadget = new Gadget().setContext(unsanitaryGadgetContext);
     gadget.setSpec(new GadgetSpec(Uri.parse("www.example.org/gadget.xml"),
@@ -225,7 +223,28 @@
     assertNull(rewrite(req, response));
   }
 
-  @Test
+   @Test
+   public void sanitizationBypassAllowed() {
+     String markup = "<p foo=\"bar\"><b>Parag</b><!--raph--></p>";
+     // Create a rewriter that would strip everything
+     ContentRewriter rewriter = createRewriter(set(), set());
+
+     MutableContent mc = new MutableContent(parser, markup);
+     Document document = mc.getDocument();
+     // Force the content to get re-serialized
+     MutableContent.notifyEdit(document);
+     String fullMarkup = mc.getContent();
+     
+     Element paragraphTag = (Element) 
document.getElementsByTagName("p").item(0);
+     // Mark the paragraph tag element as trusted
+     SanitizedRenderingContentRewriter.bypassSanitization(paragraphTag);
+     rewriter.rewrite(gadget, mc);
+     
+     // The document should be unchanged
+     assertEquals(fullMarkup, mc.getContent());
+   }
+
+   @Test
   public void restrictHrefAndSrcAttributes() {
     String markup =
         "<element " +
@@ -294,54 +313,4 @@
     
gadget.setCurrentView(gadget.getSpec().getViews().values().iterator().next());
     assertEquals(sanitized, rewrite(gadget, markup, set("p", "b", "style"), 
set()));
   }
-
-  private static class TestParseModule extends AbstractModule {
-
-    @Override
-    protected void configure() {
-      bind(GadgetHtmlParser.class).to(NekoHtmlParser.class);
-      
bind(DOMImplementation.class).toProvider(DOMImplementationProvider.class);
-    }
-
-    /**
-     * Provider of new HTMLDocument implementations. Used to hide XML parser 
weirdness
-     */
-    public static class DOMImplementationProvider implements 
Provider<DOMImplementation> {
-
-      DOMImplementation domImpl;
-
-      public DOMImplementationProvider() {
-        try {
-          DOMImplementationRegistry registry = 
DOMImplementationRegistry.newInstance();
-          // Require the traversal API
-          domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
-        } catch (Exception e) {
-          // Try another
-        }
-        // This is ugly but effective
-        try {
-          if (domImpl == null) {
-            domImpl = (DOMImplementation)
-                
Class.forName("org.apache.xerces.internal.dom.DOMImplementationImpl").
-                    getMethod("getDOMImplementation").invoke(null);
-          }
-        } catch (Exception ex) {
-          //try another
-        }
-        try {
-          if (domImpl == null) {
-          domImpl = (DOMImplementation)
-            
Class.forName("com.sun.org.apache.xerces.internal.dom.DOMImplementationImpl").
-                getMethod("getDOMImplementation").invoke(null);
-          }
-        } catch (Exception ex) {
-          throw new RuntimeException("Could not find HTML DOM implementation", 
ex);
-        }
-      }
-
-      public DOMImplementation get() {
-        return domImpl;
-      }
-    }
-  }
 }


Reply via email to