Author: kkrugler
Date: Thu Aug 9 21:57:16 2012
New Revision: 1371506
URL: http://svn.apache.org/viewvc?rev=1371506&view=rev
Log:
TIKA-889: XHTMLContentHandler wont emit newline when html element matches
ENDLINE set
Added test case to validate proper behavior. Trunk passes w/o changes, so
marking issue as can't reproduce.
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1371506&r1=1371505&r2=1371506&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Thu Aug 9 21:57:16 2012
@@ -762,4 +762,26 @@ public class HtmlParserTest extends Test
assertTrue(Pattern.matches("(?s).*<body/>.*$", result));
}
+ /**
+ * Test case for TIKA-889
+ * XHTMLContentHandler wont emit newline when html element matches ENDLINE
set.
+ *
+ * @see <a
href="https://issues.apache.org/jira/browse/TIKA-889">TIKA-889</a>
+ */
+ public void testNewlineAndIndent() throws Exception {
+ final String html = "<html><head><title>Title</title></head>" +
+ "<body><ul><li>one</li></ul></body></html>";
+
+ BodyContentHandler handler = new BodyContentHandler();
+ new HtmlParser().parse(
+ new ByteArrayInputStream(html.getBytes("UTF-8")),
+ handler, new Metadata(), new ParseContext());
+
+ // Make sure we get <tab>, "one", newline, newline
+ String result = handler.toString();
+
+ assertTrue(Pattern.matches("\tone\n\n", result));
+ }
+
+
}