Author: tallison
Date: Fri Jan 23 14:36:36 2015
New Revision: 1654225
URL: http://svn.apache.org/r1654225
Log:
TIKA-1529: step 1...get rid of toLowerCase in BasicContentHandlerFactoryTest
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java?rev=1654225&r1=1654224&r2=1654225&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java
Fri Jan 23 14:36:36 2015
@@ -53,7 +53,8 @@ public class BasicContentHandlerFactoryT
new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE,
-1).getNewContentHandler();
assertTrue(handler instanceof DefaultHandler);
p.parse(null, handler, null, null);
- assertTrue(handler.toString().contains(""));
+ //unfortunatley, the DefaultHandler does not return "",
+ assertContains("org.xml.sax.helpers.DefaultHandler",
handler.toString());
//tests that no write limit exception is thrown
p = new MockParser(100);
@@ -61,7 +62,7 @@ public class BasicContentHandlerFactoryT
new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE,
5).getNewContentHandler();
assertTrue(handler instanceof DefaultHandler);
p.parse(null, handler, null, null);
- assertTrue(handler.toString().contains(""));
+ assertContains("org.xml.sax.helpers.DefaultHandler",
handler.toString());
}
@Test
@@ -74,18 +75,20 @@ public class BasicContentHandlerFactoryT
assertTrue(handler instanceof ToTextContentHandler);
p.parse(null, handler, null, null);
- assertTrue(handler.toString().contains("This is the title"));
- assertTrue(handler.toString().contains("aaaaaaaaaa"));
- assertFalse(handler.toString().toLowerCase().contains("<body"));
- assertFalse(handler.toString().toLowerCase().contains("<html"));
- assertTrue(handler.toString().length() > 110000);
+ String extracted = handler.toString();
+ assertContains("This is the title", extracted);
+ assertContains("aaaaaaaaaa", extracted);
+ assertNotContains("<body", extracted);
+ assertNotContains("<html", extracted);
+ assertTrue(extracted.length() > 110000);
//now test write limit
p = new MockParser(10);
handler = new BasicContentHandlerFactory(type,
5).getNewContentHandler();
assertTrue(handler instanceof WriteOutContentHandler);
assertWriteLimitReached(p, (WriteOutContentHandler) handler);
- assertTrue(handler.toString().contains("This "));
- assertFalse(handler.toString().toLowerCase().contains("aaaa"));
+ extracted = handler.toString();
+ assertContains("This ", extracted);
+ assertNotContains("aaaa", extracted);
//now test outputstream call
p = new MockParser(OVER_DEFAULT);
@@ -120,17 +123,19 @@ public class BasicContentHandlerFactoryT
assertTrue(handler instanceof ToHTMLContentHandler);
p.parse(null, handler, null, null);
- assertTrue(handler.toString().contains("<head><title>This is the
title"));
- assertTrue(handler.toString().contains("aaaaaaaaaa"));
- assertTrue(handler.toString().length() > 110000);
+ String extracted = handler.toString();
+ assertContains("<head><title>This is the title", extracted);
+ assertContains("aaaaaaaaaa", extracted);
+ assertTrue(extracted.length() > 110000);
//now test write limit
p = new MockParser(10);
handler = new BasicContentHandlerFactory(type,
5).getNewContentHandler();
assertTrue(handler instanceof WriteOutContentHandler);
assertWriteLimitReached(p, (WriteOutContentHandler) handler);
- assertTrue(handler.toString().contains("This "));
- assertFalse(handler.toString().toLowerCase().contains("aaaa"));
+ extracted = handler.toString();
+ assertContains("This ", extracted);
+ assertNotContains("aaaa", extracted);
//now test outputstream call
p = new MockParser(OVER_DEFAULT);
@@ -163,8 +168,9 @@ public class BasicContentHandlerFactoryT
assertTrue(handler instanceof ToXMLContentHandler);
p.parse(null, handler, new Metadata(), null);
- assertTrue(handler.toString().contains("<head><title>This is the
title"));
- assertTrue(handler.toString().contains("aaaaaaaaaa"));
+ String extracted = handler.toString();
+ assertContains("<head><title>This is the title", extracted);
+ assertContains("aaaaaaaaaa", extracted);
assertTrue(handler.toString().length() > 110000);
//now test write limit
@@ -172,8 +178,9 @@ public class BasicContentHandlerFactoryT
handler = new BasicContentHandlerFactory(type,
5).getNewContentHandler();
assertTrue(handler instanceof WriteOutContentHandler);
assertWriteLimitReached(p, (WriteOutContentHandler) handler);
- assertTrue(handler.toString().contains("This "));
- assertFalse(handler.toString().toLowerCase().contains("aaaa"));
+ extracted = handler.toString();
+ assertContains("This ", extracted);
+ assertNotContains("aaaa", extracted);
//now test outputstream call
p = new MockParser(OVER_DEFAULT);
@@ -181,6 +188,7 @@ public class BasicContentHandlerFactoryT
handler = new BasicContentHandlerFactory(type,
-1).getNewContentHandler(os, ENCODING);
assertTrue(handler instanceof ToXMLContentHandler);
p.parse(null, handler, null, null);
+
assertContains("This is the title", os.toByteArray());
assertContains("aaaaaaaaaa", os.toByteArray());
assertContains("<body", os.toByteArray());
@@ -208,17 +216,19 @@ public class BasicContentHandlerFactoryT
assertTrue(handler instanceof BodyContentHandler);
p.parse(null, handler, null, null);
- assertFalse(handler.toString().contains("title"));
- assertTrue(handler.toString().contains("aaaaaaaaaa"));
- assertTrue(handler.toString().length() > 110000);
+ String extracted = handler.toString();
+ assertNotContains("title", extracted);
+ assertContains("aaaaaaaaaa", extracted);
+ assertTrue(extracted.length() > 110000);
//now test write limit
p = new MockParser(10);
handler = new BasicContentHandlerFactory(type,
5).getNewContentHandler();
assertTrue(handler instanceof BodyContentHandler);
assertWriteLimitReached(p, (BodyContentHandler)handler);
- assertFalse(handler.toString().contains("This "));
- assertTrue(handler.toString().toLowerCase().contains("aaaa"));
+ extracted = handler.toString();
+ assertNotContains("This ", extracted);
+ assertContains("aaaa", extracted);
//now test outputstream call
p = new MockParser(OVER_DEFAULT);
@@ -267,16 +277,24 @@ public class BasicContentHandlerFactoryT
assertTrue("WriteLimitReached", wlr);
}
- private void assertNotContains(String needle, byte[] hayStack)
+
+ //copied from TikaTest in tika-parsers package
+ public static void assertNotContains(String needle, String haystack) {
+ assertFalse(needle + " found in:\n" + haystack,
haystack.contains(needle));
+ }
+
+ public static void assertNotContains(String needle, byte[] hayStack)
throws UnsupportedEncodingException {
- String s = new String(hayStack, ENCODING);
- assertFalse(s.toLowerCase().contains(needle));
+ assertNotContains(needle, new String(hayStack, ENCODING));
+ }
+
+ public static void assertContains(String needle, String haystack) {
+ assertTrue(needle + " not found in:\n" + haystack,
haystack.contains(needle));
}
- private void assertContains(String needle, byte[] hayStack)
+ public static void assertContains(String needle, byte[] hayStack)
throws UnsupportedEncodingException {
- String s = new String(hayStack, ENCODING);
- assertTrue(s.contains(needle));
+ assertContains(needle, new String(hayStack, ENCODING));
}
//Simple mockparser that writes a title