Author: tallison
Date: Tue Oct 27 12:46:59 2015
New Revision: 1710799

URL: http://svn.apache.org/viewvc?rev=1710799&view=rev
Log:
TIKA-1782 allow XHTMLContentHandler to pass attributes of html element via 
Markus Jelsma

Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Tue Oct 27 12:46:59 2015
@@ -1,5 +1,8 @@
 Release 1.12 - Current Development
 
+  * Allow XHTMLContentHandler to pass attributes of html element 
+    via Markus Jelsma (TIKA-1782).
+
   * Fix regression with spacing in PPT via Andreas Beeker (TIKA-1777).
 
 

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
Tue Oct 27 12:46:59 2015
@@ -60,7 +60,7 @@ public class XHTMLContentHandler extends
      * skip them if they get sent to startElement/endElement by mistake.
      */
     private static final Set<String> AUTO =
-        unmodifiableSet("html", "head", "frameset");
+        unmodifiableSet("head", "frameset");
 
     /**
      * The elements that get prepended with the {@link #TAB} character.

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
 (original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
 Tue Oct 27 12:46:59 2015
@@ -140,6 +140,23 @@ public class XHTMLContentHandlerTest {
 
         assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
     }
+    
+    @Test
+    public void testAttributesOnHtml() throws Exception {
+        ToHTMLContentHandler toHTMLContentHandler = new ToHTMLContentHandler();
+        XHTMLContentHandler xhtmlContentHandler = new 
XHTMLContentHandler(toHTMLContentHandler, new Metadata());
+        AttributesImpl attributes = new AttributesImpl();
+
+        attributes.addAttribute(XHTMLContentHandler.XHTML, "itemscope", 
"itemscope", "", "");
+        attributes.addAttribute(XHTMLContentHandler.XHTML, "itemtype", 
"itemtype", "", "http://schema.org/Event";);
+
+        xhtmlContentHandler.startDocument();
+        xhtmlContentHandler.startElement(XHTMLContentHandler.XHTML, "html", 
"html", attributes);
+        xhtmlContentHandler.endElement("html");
+        xhtmlContentHandler.endDocument();
+
+        assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
+    }
 
     /**
      * Return array of non-zerolength words. Splitting on whitespace will get 
us


Reply via email to