Author: kbennett
Date: Wed Oct 17 18:00:19 2007
New Revision: 585767
URL: http://svn.apache.org/viewvc?rev=585767&view=rev
Log:
TIKA-78 - AutoDetectParserTest should include tests for bad MIME types and
resource names.
Modified:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Modified:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=585767&r1=585766&r2=585767&view=diff
==============================================================================
---
incubator/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
(original)
+++
incubator/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Wed Oct 17 18:00:19 2007
@@ -16,9 +16,12 @@
*/
package org.apache.tika.parser;
+import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
+import org.apache.commons.lang.builder.ReflectionToStringBuilder;
+import org.apache.commons.lang.builder.ToStringStyle;
import org.apache.tika.metadata.Metadata;
import org.xml.sax.ContentHandler;
@@ -26,67 +29,171 @@
public class AutoDetectParserTest extends TestCase {
- private void assertAutoDetect(
- String resource, String type, String content) throws Exception {
+ // Easy to read constants for the MIME types:
+ private static final String RAW = "application/octet-stream";
+ private static final String EXCEL = "application/vnd.ms-excel";
+ private static final String HTML = "text/html";
+ private static final String PDF = "application/pdf";
+ private static final String POWERPOINT = "application/vnd.ms-powerpoint";
+ private static final String RTF = "application/rtf";
+ private static final String PLAINTEXT = "text/plain";
+ private static final String WORD = "application/msword";
+ private static final String XML = "application/xml";
+ private static final String OPENOFFICE
+ = "application/vnd.oasis.opendocument.text";
+
+
+ /**
+ * This is where a single test is done.
+ * @param tp the parameters encapsulated in a TestParams instance
+ * @throws IOException
+ */
+ private void assertAutoDetect(TestParams tp) throws IOException {
+
InputStream input =
- AutoDetectParserTest.class.getResourceAsStream(resource);
+
AutoDetectParserTest.class.getResourceAsStream(tp.resourceRealName);
if (input == null) {
- fail("Could not open stream from specified resource: " + resource);
+ fail("Could not open stream from specified resource: "
+ + tp.resourceRealName);
}
-
+
try {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, resource);
- metadata.set(Metadata.CONTENT_TYPE, type);
+ metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
+ metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
StringWriter writer = new StringWriter();
ContentHandler handler = new WriteOutContentHandler(writer);
new AutoDetectParser().parse(input, handler, metadata);
- assertEquals(type, metadata.get(Metadata.CONTENT_TYPE));
- assertTrue(writer.toString().contains(content));
+ assertEquals("Bad content type: " + tp,
+ tp.realType, metadata.get(Metadata.CONTENT_TYPE));
+
+ assertTrue("Expected content not found: " + tp,
+ writer.toString().contains(tp.expectedContentFragment));
+ } catch (Throwable t) {
+ fail("Test error asserting auto detect for parameters: " + t
+ + "\nParameters: " + tp);
} finally {
input.close();
}
}
+ /**
+ * Convenience method -- its sole purpose of existence is to make the
+ * call to it more readable than it would be if a TestParams instance
+ * would need to be instantiated there.
+ *
+ * @param resourceRealName real name of resource
+ * @param resourceStatedName stated name -- will a bad name fool us?
+ * @param realType - the real MIME type
+ * @param statedType - stated MIME type - will a wrong one fool us?
+ * @param expectedContentFragment - something expected in the text
+ * @throws Exception
+ */
+ private void assertAutoDetect(String resourceRealName,
+ String resourceStatedName,
+ String realType,
+ String statedType,
+ String expectedContentFragment)
+ throws Exception {
+
+ assertAutoDetect(new TestParams(resourceRealName, resourceStatedName,
+ realType, statedType, expectedContentFragment));
+ }
+
+ private void assertAutoDetect(
+ String resource, String type, String content) throws Exception {
+
+ resource = "/test-documents/" + resource;
+
+ // TODO !!!! The disabled tests below should work!
+ // The correct MIME type should be determined regardless of the
+ // stated type (ContentType hint) and the stated URL name.
+
+
+ // Try different combinations of correct and incorrect arguments:
+ final String wrongMimeType = RAW;
+ assertAutoDetect(resource, resource, type, type, content);
+ assertAutoDetect(resource, resource, type, null, content);
+ assertAutoDetect(resource, resource, type, wrongMimeType, content);
+
+ assertAutoDetect(resource, null, type, type, content);
+// assertAutoDetect(resource, null, type, null, content);
+// assertAutoDetect(resource, null, type, wrongMimeType, content);
+
+ final String badResource = "a.xyz";
+// assertAutoDetect(resource, badResource, type, type,
content);
+// assertAutoDetect(resource, badResource, type, null,
content);
+// assertAutoDetect(resource, badResource, type, wrongMimeType,
content);
+ }
+
+ /**
+ * This is where the data to the test comes from. Each triplet will be
+ * passed to a method that will try different combinations of valid and
+ * invalid values.
+ *
+ * @throws Exception
+ */
public void testAutoDetect() throws Exception {
- assertAutoDetect(
- "/test-documents/testEXCEL.xls",
- "application/vnd.ms-excel",
- "Sample Excel Worksheet");
- assertAutoDetect(
- "/test-documents/testHTML.html",
- "text/html",
- "Test Indexation Html");
- assertAutoDetect(
- "/test-documents/testOpenOffice2.odt",
- "application/vnd.oasis.opendocument.text",
+ assertAutoDetect("testEXCEL.xls", EXCEL, "Sample Excel Worksheet");
+ assertAutoDetect("testHTML.html", HTML, "Test Indexation Html");
+ assertAutoDetect("testOpenOffice2.odt", OPENOFFICE,
"This is a sample Open Office document");
- assertAutoDetect(
- "/test-documents/testPDF.pdf",
- "application/pdf",
- "Content Analysis Toolkit");
- assertAutoDetect(
- "/test-documents/testPPT.ppt",
- "application/vnd.ms-powerpoint",
- "Sample Powerpoint Slide");
- assertAutoDetect(
- "/test-documents/testRTF.rtf",
- "application/rtf",
- "indexation Word");
- assertAutoDetect(
- "/test-documents/testTXT.txt",
- "text/plain",
- "indexation de Txt");
- assertAutoDetect(
- "/test-documents/testWORD.doc",
- "application/msword",
- "Sample Word Document");
- assertAutoDetect(
- "/test-documents/testXML.xml",
- "application/xml",
- "Lius");
+ assertAutoDetect("testPDF.pdf", PDF, "Content Analysis Toolkit");
+ assertAutoDetect("testPPT.ppt", POWERPOINT, "Sample Powerpoint Slide");
+ assertAutoDetect("testRTF.rtf", RTF, "indexation Word");
+ assertAutoDetect("testTXT.txt", PLAINTEXT, "indexation de Txt");
+ assertAutoDetect("testWORD.doc", WORD, "Sample Word Document");
+ assertAutoDetect("testXML.xml", XML, "Lius");
}
+
+ /**
+ * Minimal class to encapsulate all parameters -- the main reason for
+ * its existence is to aid in debugging via its toString() method.
+ *
+ * Getters and setters intentionally not provided.
+ */
+ private static class TestParams {
+
+ public String resourceRealName;
+ public String resourceStatedName;
+ public String realType;
+ public String statedType;
+ public String expectedContentFragment;
+
+
+ private TestParams(String resourceRealName,
+ String resourceStatedName,
+ String realType,
+ String statedType,
+ String expectedContentFragment) {
+ this.resourceRealName = resourceRealName;
+ this.resourceStatedName = resourceStatedName;
+ this.realType = realType;
+ this.statedType = statedType;
+ this.expectedContentFragment = expectedContentFragment;
+ }
+
+
+ /**
+ * Produces a string like the following:
+ *
+ * [EMAIL PROTECTED]
+ * resourceRealName=/test-documents/testEXCEL.xls
+ * resourceStatedName=<null>
+ * realType=application/vnd.ms-excel
+ * statedType=<null>
+ * expectedContentFragment=Sample Excel Worksheet
+ * ]
+ *
+ * @return
+ */
+
+ public String toString() {
+ return ReflectionToStringBuilder.toString(
+ this, ToStringStyle.MULTI_LINE_STYLE);
+ }
+ }
}