Author: tallison Date: Fri Jan 23 14:36:36 2015 New Revision: 1654225 URL: http://svn.apache.org/r1654225 Log: TIKA-1529: step 1...get rid of toLowerCase in BasicContentHandlerFactoryTest
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java?rev=1654225&r1=1654224&r2=1654225&view=diff ============================================================================== --- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java (original) +++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BasicContentHandlerFactoryTest.java Fri Jan 23 14:36:36 2015 @@ -53,7 +53,8 @@ public class BasicContentHandlerFactoryT new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1).getNewContentHandler(); assertTrue(handler instanceof DefaultHandler); p.parse(null, handler, null, null); - assertTrue(handler.toString().contains("")); + //unfortunatley, the DefaultHandler does not return "", + assertContains("org.xml.sax.helpers.DefaultHandler", handler.toString()); //tests that no write limit exception is thrown p = new MockParser(100); @@ -61,7 +62,7 @@ public class BasicContentHandlerFactoryT new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, 5).getNewContentHandler(); assertTrue(handler instanceof DefaultHandler); p.parse(null, handler, null, null); - assertTrue(handler.toString().contains("")); + assertContains("org.xml.sax.helpers.DefaultHandler", handler.toString()); } @Test @@ -74,18 +75,20 @@ public class BasicContentHandlerFactoryT assertTrue(handler instanceof ToTextContentHandler); p.parse(null, handler, null, null); - assertTrue(handler.toString().contains("This is the title")); - assertTrue(handler.toString().contains("aaaaaaaaaa")); - assertFalse(handler.toString().toLowerCase().contains("<body")); - assertFalse(handler.toString().toLowerCase().contains("<html")); - assertTrue(handler.toString().length() > 110000); + String extracted = handler.toString(); + assertContains("This is the title", extracted); + assertContains("aaaaaaaaaa", extracted); + assertNotContains("<body", extracted); + assertNotContains("<html", extracted); + assertTrue(extracted.length() > 110000); //now test write limit p = new MockParser(10); handler = new BasicContentHandlerFactory(type, 5).getNewContentHandler(); assertTrue(handler instanceof WriteOutContentHandler); assertWriteLimitReached(p, (WriteOutContentHandler) handler); - assertTrue(handler.toString().contains("This ")); - assertFalse(handler.toString().toLowerCase().contains("aaaa")); + extracted = handler.toString(); + assertContains("This ", extracted); + assertNotContains("aaaa", extracted); //now test outputstream call p = new MockParser(OVER_DEFAULT); @@ -120,17 +123,19 @@ public class BasicContentHandlerFactoryT assertTrue(handler instanceof ToHTMLContentHandler); p.parse(null, handler, null, null); - assertTrue(handler.toString().contains("<head><title>This is the title")); - assertTrue(handler.toString().contains("aaaaaaaaaa")); - assertTrue(handler.toString().length() > 110000); + String extracted = handler.toString(); + assertContains("<head><title>This is the title", extracted); + assertContains("aaaaaaaaaa", extracted); + assertTrue(extracted.length() > 110000); //now test write limit p = new MockParser(10); handler = new BasicContentHandlerFactory(type, 5).getNewContentHandler(); assertTrue(handler instanceof WriteOutContentHandler); assertWriteLimitReached(p, (WriteOutContentHandler) handler); - assertTrue(handler.toString().contains("This ")); - assertFalse(handler.toString().toLowerCase().contains("aaaa")); + extracted = handler.toString(); + assertContains("This ", extracted); + assertNotContains("aaaa", extracted); //now test outputstream call p = new MockParser(OVER_DEFAULT); @@ -163,8 +168,9 @@ public class BasicContentHandlerFactoryT assertTrue(handler instanceof ToXMLContentHandler); p.parse(null, handler, new Metadata(), null); - assertTrue(handler.toString().contains("<head><title>This is the title")); - assertTrue(handler.toString().contains("aaaaaaaaaa")); + String extracted = handler.toString(); + assertContains("<head><title>This is the title", extracted); + assertContains("aaaaaaaaaa", extracted); assertTrue(handler.toString().length() > 110000); //now test write limit @@ -172,8 +178,9 @@ public class BasicContentHandlerFactoryT handler = new BasicContentHandlerFactory(type, 5).getNewContentHandler(); assertTrue(handler instanceof WriteOutContentHandler); assertWriteLimitReached(p, (WriteOutContentHandler) handler); - assertTrue(handler.toString().contains("This ")); - assertFalse(handler.toString().toLowerCase().contains("aaaa")); + extracted = handler.toString(); + assertContains("This ", extracted); + assertNotContains("aaaa", extracted); //now test outputstream call p = new MockParser(OVER_DEFAULT); @@ -181,6 +188,7 @@ public class BasicContentHandlerFactoryT handler = new BasicContentHandlerFactory(type, -1).getNewContentHandler(os, ENCODING); assertTrue(handler instanceof ToXMLContentHandler); p.parse(null, handler, null, null); + assertContains("This is the title", os.toByteArray()); assertContains("aaaaaaaaaa", os.toByteArray()); assertContains("<body", os.toByteArray()); @@ -208,17 +216,19 @@ public class BasicContentHandlerFactoryT assertTrue(handler instanceof BodyContentHandler); p.parse(null, handler, null, null); - assertFalse(handler.toString().contains("title")); - assertTrue(handler.toString().contains("aaaaaaaaaa")); - assertTrue(handler.toString().length() > 110000); + String extracted = handler.toString(); + assertNotContains("title", extracted); + assertContains("aaaaaaaaaa", extracted); + assertTrue(extracted.length() > 110000); //now test write limit p = new MockParser(10); handler = new BasicContentHandlerFactory(type, 5).getNewContentHandler(); assertTrue(handler instanceof BodyContentHandler); assertWriteLimitReached(p, (BodyContentHandler)handler); - assertFalse(handler.toString().contains("This ")); - assertTrue(handler.toString().toLowerCase().contains("aaaa")); + extracted = handler.toString(); + assertNotContains("This ", extracted); + assertContains("aaaa", extracted); //now test outputstream call p = new MockParser(OVER_DEFAULT); @@ -267,16 +277,24 @@ public class BasicContentHandlerFactoryT assertTrue("WriteLimitReached", wlr); } - private void assertNotContains(String needle, byte[] hayStack) + + //copied from TikaTest in tika-parsers package + public static void assertNotContains(String needle, String haystack) { + assertFalse(needle + " found in:\n" + haystack, haystack.contains(needle)); + } + + public static void assertNotContains(String needle, byte[] hayStack) throws UnsupportedEncodingException { - String s = new String(hayStack, ENCODING); - assertFalse(s.toLowerCase().contains(needle)); + assertNotContains(needle, new String(hayStack, ENCODING)); + } + + public static void assertContains(String needle, String haystack) { + assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle)); } - private void assertContains(String needle, byte[] hayStack) + public static void assertContains(String needle, byte[] hayStack) throws UnsupportedEncodingException { - String s = new String(hayStack, ENCODING); - assertTrue(s.contains(needle)); + assertContains(needle, new String(hayStack, ENCODING)); } //Simple mockparser that writes a title