Author: jukka
Date: Sun Sep 27 17:36:42 2009
New Revision: 819358
URL: http://svn.apache.org/viewvc?rev=819358&view=rev
Log:
TIKA-269: Ease of use -facade for Tika
Make the facade non-static to allow different configurations to be used.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
(original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java Sun Sep
27 17:36:42 2009
@@ -26,6 +26,7 @@
import java.util.HashMap;
import java.util.Map;
+import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
@@ -44,12 +45,24 @@
*/
public class Tika {
- private static final Parser parser = new AutoDetectParser();
+ /**
+ * The parser instance used by this facade.
+ */
+ private final Parser parser;
/**
- * Private constructor to prevent this class from being instantiated.
+ * Creates a Tika facade using the given configuration.
+ * @param config
*/
- private Tika() {
+ public Tika(TikaConfig config) {
+ this.parser = new AutoDetectParser(config);
+ }
+
+ /**
+ * Creates a Tika facade using the default configuration.
+ */
+ public Tika() {
+ this(TikaConfig.getDefaultConfig());
}
/**
@@ -62,7 +75,7 @@
* @return extracted text content
* @throws IOException if the document can not be read or parsed
*/
- public static Reader parse(InputStream stream, Metadata metadata)
+ public Reader parse(InputStream stream, Metadata metadata)
throws IOException {
Map<String, Object> context = new HashMap<String, Object>();
context.put(Parser.class.getName(), parser);
@@ -76,7 +89,7 @@
* @return extracted text content
* @throws IOException if the document can not be read or parsed
*/
- public static Reader parse(InputStream stream) throws IOException {
+ public Reader parse(InputStream stream) throws IOException {
return parse(stream, new Metadata());
}
@@ -88,8 +101,7 @@
* @throws FileNotFoundException if the given file does not exist
* @throws IOException if the file can not be read or parsed
*/
- public static Reader parse(File file)
- throws FileNotFoundException, IOException {
+ public Reader parse(File file) throws FileNotFoundException, IOException {
return parse(new FileInputStream(file), getFileMetadata(file));
}
@@ -101,7 +113,7 @@
* @return extracted text content
* @throws IOException if the resource can not be read or parsed
*/
- public static Reader parse(URL url) throws IOException {
+ public Reader parse(URL url) throws IOException {
return parse(url.openStream(), getUrlMetadata(url));
}
@@ -115,7 +127,7 @@
* @throws IOException if the document can not be read
* @throws TikaException if the document can not be parsed
*/
- public static String parseToString(InputStream stream, Metadata metadata)
+ public String parseToString(InputStream stream, Metadata metadata)
throws IOException, TikaException {
try {
ContentHandler handler = new BodyContentHandler();
@@ -140,7 +152,7 @@
* @throws IOException if the document can not be read
* @throws TikaException if the document can not be parsed
*/
- public static String parseToString(InputStream stream)
+ public String parseToString(InputStream stream)
throws IOException, TikaException {
return parseToString(stream, new Metadata());
}
@@ -154,7 +166,7 @@
* @throws IOException if the file can not be read
* @throws TikaException if the file can not be parsed
*/
- public static String parseToString(File file)
+ public String parseToString(File file)
throws FileNotFoundException, IOException, TikaException {
return parseToString(new FileInputStream(file), getFileMetadata(file));
}
@@ -168,8 +180,7 @@
* @throws IOException if the resource can not be read
* @throws TikaException if the resource can not be parsed
*/
- public static String parseToString(URL url)
- throws IOException, TikaException {
+ public String parseToString(URL url) throws IOException, TikaException {
return parseToString(url.openStream(), getUrlMetadata(url));
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Sun Sep 27 17:36:42 2009
@@ -165,17 +165,21 @@
* return a shared instance once it is completely immutable.
*
* @return default configuration
- * @throws TikaException if the default configuration is not available
*/
- public static TikaConfig getDefaultConfig() throws TikaException {
+ public static TikaConfig getDefaultConfig() {
try {
InputStream stream =
TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
return new TikaConfig(stream);
} catch (IOException e) {
- throw new TikaException("Unable to read default configuration", e);
+ throw new RuntimeException(
+ "Unable to read default configuration", e);
} catch (SAXException e) {
- throw new TikaException("Unable to parse default configuration",
e);
+ throw new RuntimeException(
+ "Unable to parse default configuration", e);
+ } catch (TikaException e) {
+ throw new RuntimeException(
+ "Unable to access default configuration", e);
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Sun Sep 27 17:36:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -45,12 +45,7 @@
* configuration.
*/
public AutoDetectParser() {
- try {
- setConfig(TikaConfig.getDefaultConfig());
- } catch (TikaException e) {
- // FIXME: This should never happen
- throw new RuntimeException(e);
- }
+ this(TikaConfig.getDefaultConfig());
}
public AutoDetectParser(TikaConfig config) {
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
Sun Sep 27 17:36:42 2009
@@ -29,7 +29,7 @@
public void testClassParsing() throws Exception {
String path = "/test-documents/AutoDetectParser.class";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
ClassParserTest.class.getResourceAsStream(path), metadata);
assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
Sun Sep 27 17:36:42 2009
@@ -26,7 +26,7 @@
public void testWAV() throws Exception {
String path = "/test-documents/testWAV.wav";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
AudioParserTest.class.getResourceAsStream(path), metadata);
assertEquals("audio/x-wav", metadata.get(Metadata.CONTENT_TYPE));
@@ -41,7 +41,7 @@
public void testAIFF() throws Exception {
String path = "/test-documents/testAIFF.aif";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
AudioParserTest.class.getResourceAsStream(path), metadata);
assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
@@ -56,7 +56,7 @@
public void testAU() throws Exception {
String path = "/test-documents/testAU.au";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
AudioParserTest.class.getResourceAsStream(path), metadata);
assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
Sun Sep 27 17:36:42 2009
@@ -26,7 +26,7 @@
public void testMID() throws Exception {
String path = "/test-documents/testMID.mid";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
MidiParserTest.class.getResourceAsStream(path), metadata);
assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=819358&r1=819357&r2=819358&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Sun Sep 27 17:36:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -79,7 +79,7 @@
public void XtestParseUTF8() throws IOException, SAXException,
TikaException {
String path = "/test-documents/testXHTML_utf8.html";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
HtmlParserTest.class.getResourceAsStream(path), metadata);
assertTrue("Did not contain expected text:"
@@ -97,7 +97,7 @@
public void testXhtmlParsing() throws Exception {
String path = "/test-documents/testXHTML.html";
Metadata metadata = new Metadata();
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
HtmlParserTest.class.getResourceAsStream(path), metadata);
assertEquals("application/xhtml+xml",
metadata.get(Metadata.CONTENT_TYPE));
@@ -124,7 +124,7 @@
*/
public void testCharactersDirectlyUnderBodyElement() throws Exception {
String test = "<html><body>test</body></html>";
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
new ByteArrayInputStream(test.getBytes("UTF-8")));
assertEquals("test", content);
}
@@ -136,7 +136,7 @@
public void testWhitespaceBetweenTableCells() throws Exception {
String test =
"<html><body><table><tr><td>a</td><td>b</td></table></body></html>";
- String content = Tika.parseToString(
+ String content = new Tika().parseToString(
new ByteArrayInputStream(test.getBytes("UTF-8")));
assertTrue(content.contains("a"));
assertTrue(content.contains("b"));