Author: jukka
Date: Sat Sep 12 21:02:54 2009
New Revision: 814236
URL: http://svn.apache.org/viewvc?rev=814236&view=rev
Log:
TIKA-269: Ease of use -facade for Tika
Use the new facade to simplify some parser tests.
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
Sat Sep 12 21:02:54 2009
@@ -16,15 +16,10 @@
*/
package org.apache.tika.parser.asm;
-import java.io.InputStream;
+import junit.framework.TestCase;
+import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-
-import junit.framework.TestCase;
/**
* Test case for parsing Java class files.
@@ -32,24 +27,16 @@
public class ClassParserTest extends TestCase {
public void testClassParsing() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
+ String path = "/test-documents/AutoDetectParser.class";
Metadata metadata = new Metadata();
-
- InputStream stream = ClassParserTest.class.getResourceAsStream(
- "/test-documents/AutoDetectParser.class");
- try {
- parser.parse(stream, handler, metadata);
- } finally {
- stream.close();
- }
+ String content = Tika.parseToString(
+ ClassParserTest.class.getResourceAsStream(path), metadata);
assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));
assertEquals(
"AutoDetectParser.class",
metadata.get(Metadata.RESOURCE_NAME_KEY));
- String content = handler.toString();
assertTrue(content.contains("package org.apache.tika.parser;"));
assertTrue(content.contains(
"class AutoDetectParser extends CompositeParser"));
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
Sat Sep 12 21:02:54 2009
@@ -16,61 +16,56 @@
*/
package org.apache.tika.parser.audio;
-import java.io.InputStream;
-
import junit.framework.TestCase;
+import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-import org.xml.sax.helpers.DefaultHandler;
public class AudioParserTest extends TestCase {
- private final Parser parser = new AudioParser();
-
public void testWAV() throws Exception {
+ String path = "/test-documents/testWAV.wav";
Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
- InputStream stream = getClass().getResourceAsStream(
- "/test-documents/testWAV.wav");
-
- parser.parse(stream, new DefaultHandler(), metadata);
+ String content = Tika.parseToString(
+ AudioParserTest.class.getResourceAsStream(path), metadata);
+ assertEquals("audio/x-wav", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("44100.0", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
assertEquals("16", metadata.get("bits"));
assertEquals("PCM_SIGNED", metadata.get("encoding"));
+ assertEquals("", content);
}
public void testAIFF() throws Exception {
+ String path = "/test-documents/testAIFF.aif";
Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
- InputStream stream = getClass().getResourceAsStream(
- "/test-documents/testAIFF.aif");
-
- parser.parse(stream, new DefaultHandler(), metadata);
+ String content = Tika.parseToString(
+ AudioParserTest.class.getResourceAsStream(path), metadata);
+ assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("44100.0", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
assertEquals("16", metadata.get("bits"));
assertEquals("PCM_SIGNED", metadata.get("encoding"));
+ assertEquals("", content);
}
public void testAU() throws Exception {
+ String path = "/test-documents/testAU.au";
Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
- InputStream stream = getClass().getResourceAsStream(
- "/test-documents/testAU.au");
-
- parser.parse(stream, new DefaultHandler(), metadata);
+ String content = Tika.parseToString(
+ AudioParserTest.class.getResourceAsStream(path), metadata);
+ assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("44100.0", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
assertEquals("16", metadata.get("bits"));
assertEquals("PCM_SIGNED", metadata.get("encoding"));
+ assertEquals("", content);
}
}
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
Sat Sep 12 21:02:54 2009
@@ -16,32 +16,24 @@
*/
package org.apache.tika.parser.audio;
-import java.io.InputStream;
-
import junit.framework.TestCase;
+import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
public class MidiParserTest extends TestCase {
- private final Parser parser = new MidiParser();
-
public void testMID() throws Exception {
+ String path = "/test-documents/testMID.mid";
Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
- InputStream stream = getClass().getResourceAsStream(
- "/test-documents/testMID.mid");
-
- ContentHandler handler = new BodyContentHandler();
- parser.parse(stream, handler, metadata);
+ String content = Tika.parseToString(
+ MidiParserTest.class.getResourceAsStream(path), metadata);
+ assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("2", metadata.get("tracks"));
assertEquals("0", metadata.get("patches"));
assertEquals("PPQ", metadata.get("divisionType"));
- assertTrue(handler.toString().contains("Untitled"));
+ assertTrue(content.contains("Untitled"));
}
}
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Sat Sep 12 21:02:54 2009
@@ -20,13 +20,13 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
+import java.util.HashMap;
import junit.framework.TestCase;
+import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.Attributes;
@@ -36,31 +36,26 @@
public class HtmlParserTest extends TestCase {
- private Parser parser = new HtmlParser();
-
- private static InputStream getStream(String name) {
- return Thread.currentThread().getContextClassLoader()
- .getResourceAsStream(name);
- }
-
public void testParseAscii() throws Exception {
+ String path = "/test-documents/testHTML.html";
final StringWriter href = new StringWriter();
-
ContentHandler body = new BodyContentHandler();
- ContentHandler link = new DefaultHandler() {
- @Override
- public void startElement(
- String u, String l, String n, Attributes a)
- throws SAXException {
- if ("a".equals(l)) {
- href.append(a.getValue("href"));
- }
- }
- };
Metadata metadata = new Metadata();
- InputStream stream = getStream("test-documents/testHTML.html");
+ InputStream stream = HtmlParserTest.class.getResourceAsStream(path);
try {
- parser.parse(stream, new TeeContentHandler(body, link), metadata);
+ ContentHandler link = new DefaultHandler() {
+ @Override
+ public void startElement(
+ String u, String l, String n, Attributes a)
+ throws SAXException {
+ if ("a".equals(l)) {
+ href.append(a.getValue("href"));
+ }
+ }
+ };
+ new HtmlParser().parse(
+ stream, new TeeContentHandler(body, link),
+ metadata, new HashMap<String, Object>());
} finally {
stream.close();
}
@@ -69,6 +64,7 @@
"Title : Test Indexation Html", metadata.get(Metadata.TITLE));
assertEquals("Tika Developers", metadata.get("Author"));
assertEquals("5", metadata.get("refresh"));
+
assertEquals("http://www.apache.org/", href.toString());
String content = body.toString();
@@ -81,13 +77,10 @@
}
public void XtestParseUTF8() throws IOException, SAXException,
TikaException {
- ContentHandler handler = new BodyContentHandler();
+ String path = "/test-documents/testXHTML_utf8.html";
Metadata metadata = new Metadata();
-
- parser.parse(
- getStream("test-documents/testHTML_utf8.html"),
- handler, metadata);
- String content = handler.toString();
+ String content = Tika.parseToString(
+ HtmlParserTest.class.getResourceAsStream(path), metadata);
assertTrue("Did not contain expected text:"
+ "Title : Tilte with UTF-8 chars öäå", content
@@ -102,21 +95,14 @@
}
public void testXhtmlParsing() throws Exception {
- Parser parser = new AutoDetectParser(); // Should auto-detect!
- ContentHandler handler = new BodyContentHandler();
+ String path = "/test-documents/testXHTML.html";
Metadata metadata = new Metadata();
-
- InputStream stream = HtmlParserTest.class.getResourceAsStream(
- "/test-documents/testXHTML.html");
- try {
- parser.parse(stream, handler, metadata);
- } finally {
- stream.close();
- }
+ String content = Tika.parseToString(
+ HtmlParserTest.class.getResourceAsStream(path), metadata);
assertEquals("application/xhtml+xml",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("XHTML test document", metadata.get(Metadata.TITLE));
- String content = handler.toString();
+
assertEquals("Tika Developers", metadata.get("Author"));
assertEquals("5", metadata.get("refresh"));
assertTrue(content.contains("ability of Apache Tika"));
@@ -125,13 +111,11 @@
}
public void testParseEmpty() throws Exception {
- Metadata metadata = new Metadata();
- StringWriter writer = new StringWriter();
- parser.parse(
+ ContentHandler handler = new BodyContentHandler();
+ new HtmlParser().parse(
new ByteArrayInputStream(new byte[0]),
- new BodyContentHandler(writer), metadata);
- String content = writer.toString();
- assertEquals("", content);
+ handler, new Metadata(), new HashMap<String, Object>());
+ assertEquals("", handler.toString());
}
/**
@@ -140,11 +124,8 @@
*/
public void testCharactersDirectlyUnderBodyElement() throws Exception {
String test = "<html><body>test</body></html>";
- ContentHandler handler = new BodyContentHandler();
- parser.parse(
- new ByteArrayInputStream(test.getBytes("UTF-8")),
- handler, new Metadata());
- String content = handler.toString();
+ String content = Tika.parseToString(
+ new ByteArrayInputStream(test.getBytes("UTF-8")));
assertEquals("test", content);
}
@@ -155,11 +136,8 @@
public void testWhitespaceBetweenTableCells() throws Exception {
String test =
"<html><body><table><tr><td>a</td><td>b</td></table></body></html>";
- ContentHandler handler = new BodyContentHandler();
- parser.parse(
- new ByteArrayInputStream(test.getBytes("UTF-8")),
- handler, new Metadata());
- String content = handler.toString();
+ String content = Tika.parseToString(
+ new ByteArrayInputStream(test.getBytes("UTF-8")));
assertTrue(content.contains("a"));
assertTrue(content.contains("b"));
assertFalse(content.contains("ab"));