Author: tallison
Date: Tue Oct 27 12:46:59 2015
New Revision: 1710799
URL: http://svn.apache.org/viewvc?rev=1710799&view=rev
Log:
TIKA-1782 allow XHTMLContentHandler to pass attributes of html element via
Markus Jelsma
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Tue Oct 27 12:46:59 2015
@@ -1,5 +1,8 @@
Release 1.12 - Current Development
+ * Allow XHTMLContentHandler to pass attributes of html element
+ via Markus Jelsma (TIKA-1782).
+
* Fix regression with spacing in PPT via Andreas Beeker (TIKA-1777).
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
Tue Oct 27 12:46:59 2015
@@ -60,7 +60,7 @@ public class XHTMLContentHandler extends
* skip them if they get sent to startElement/endElement by mistake.
*/
private static final Set<String> AUTO =
- unmodifiableSet("html", "head", "frameset");
+ unmodifiableSet("head", "frameset");
/**
* The elements that get prepended with the {@link #TAB} character.
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java?rev=1710799&r1=1710798&r2=1710799&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
Tue Oct 27 12:46:59 2015
@@ -140,6 +140,23 @@ public class XHTMLContentHandlerTest {
assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
}
+
+ @Test
+ public void testAttributesOnHtml() throws Exception {
+ ToHTMLContentHandler toHTMLContentHandler = new ToHTMLContentHandler();
+ XHTMLContentHandler xhtmlContentHandler = new
XHTMLContentHandler(toHTMLContentHandler, new Metadata());
+ AttributesImpl attributes = new AttributesImpl();
+
+ attributes.addAttribute(XHTMLContentHandler.XHTML, "itemscope",
"itemscope", "", "");
+ attributes.addAttribute(XHTMLContentHandler.XHTML, "itemtype",
"itemtype", "", "http://schema.org/Event");
+
+ xhtmlContentHandler.startDocument();
+ xhtmlContentHandler.startElement(XHTMLContentHandler.XHTML, "html",
"html", attributes);
+ xhtmlContentHandler.endElement("html");
+ xhtmlContentHandler.endDocument();
+
+ assertTrue(toHTMLContentHandler.toString().contains("itemscope"));
+ }
/**
* Return array of non-zerolength words. Splitting on whitespace will get
us