Author: jukka
Date: Fri Sep 11 19:19:48 2009
New Revision: 813986
URL: http://svn.apache.org/viewvc?rev=813986&view=rev
Log:
TIKA-275: Parse context
Added the new context argument to Parser.parse(). The old parse() signature
will remain for backwards compatibility until the Tika 1.0 release.
TODO: Migrate all parse() calls to use the new signature. Actually use the
context for something.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DelegatingParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/EmptyParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserPostProcessor.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -110,13 +111,14 @@
* honor the {...@link Parser} contract.
*/
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
Parser parser = getParser(metadata);
TaggedInputStream taggedStream = new TaggedInputStream(stream);
TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
try {
- parser.parse(taggedStream, taggedHandler, metadata);
+ parser.parse(taggedStream, taggedHandler, metadata, context);
} catch (RuntimeException e) {
throw new TikaException(
"Unexpected RuntimeException from " + parser, e);
@@ -131,4 +133,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DelegatingParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DelegatingParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DelegatingParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DelegatingParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -78,9 +80,20 @@
* specified delegate parser.
*/
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws SAXException, IOException, TikaException {
- delegate.parse(stream, handler, metadata);
+ delegate.parse(stream, handler, metadata, context);
+ }
+
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/EmptyParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/EmptyParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/EmptyParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/EmptyParser.java
Fri Sep 11 19:19:48 2009
@@ -16,8 +16,12 @@
*/
package org.apache.tika.parser;
+import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
@@ -31,11 +35,22 @@
public class EmptyParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws SAXException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ErrorParser.java
Fri Sep 11 19:19:48 2009
@@ -16,11 +16,15 @@
*/
package org.apache.tika.parser;
+import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
/**
* Dummy parser that always throws a {...@link TikaException} without even
@@ -30,9 +34,20 @@
public class ErrorParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws TikaException {
throw new TikaException("Parse error");
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ExternalParser.java
Fri Sep 11 19:19:48 2009
@@ -21,6 +21,8 @@
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
@@ -48,7 +50,8 @@
* No metadata is extracted.
*/
public void parse(
- final InputStream stream, ContentHandler handler, Metadata
metadata)
+ final InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler, metadata);
@@ -67,6 +70,16 @@
}
/**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
+ /**
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/Parser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -25,7 +26,7 @@
import org.xml.sax.SAXException;
/**
- * Tika parser interface
+ * Tika parser interface.
*/
public interface Parser {
@@ -35,15 +36,34 @@
* <p>
* The given document stream is consumed but not closed by this method.
* The responsibility to close the stream remains on the caller.
+ * <p>
+ * Information about the parsing context can be passed in the context
+ * parameter. See the parser implementations for the kinds of context
+ * information they expect.
*
+ * @since Apache Tika 0.5
* @param stream the document stream (input)
* @param handler handler for the XHTML SAX events (output)
* @param metadata document metadata (input and output)
+ * @param context parse context
* @throws IOException if the document stream could not be read
* @throws SAXException if the SAX events could not be processed
* @throws TikaException if the document could not be parsed
*/
- void parse(InputStream stream, ContentHandler handler, Metadata metadata)
+ void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException;
+ /**
+ * The parse() method from Tika 0.4 and earlier. Please use the
+ * {...@link #parse(InputStream, ContentHandler, Metadata, Map)} method
+ * instead in new code. Calls to this backwards compatibility method
+ * are forwarded to the new parse() method with an empty parse context.
+ *
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ void parse(InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException;
+
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -52,9 +54,20 @@
* the decorated parser) to implement extra decoration.
*/
public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
+ throws IOException, SAXException, TikaException {
+ parser.parse(stream, handler, metadata, context);
+ }
+
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
InputStream stream, ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException {
- parser.parse(stream, handler, metadata);
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserPostProcessor.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserPostProcessor.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserPostProcessor.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserPostProcessor.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -49,10 +50,12 @@
* results as described above.
*/
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
ContentHandler body = new BodyContentHandler();
- super.parse(stream, new TeeContentHandler(handler, body), metadata);
+ ContentHandler tee = new TeeContentHandler(handler, body);
+ super.parse(stream, tee, metadata, context);
String content = body.toString();
metadata.set("fulltext", content);
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -33,7 +35,8 @@
public class ClassParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
try {
ClassVisitor visitor = new XHTMLClassVisitor(handler, metadata);
@@ -48,4 +51,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
Fri Sep 11 19:19:48 2009
@@ -19,6 +19,8 @@
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import java.util.Map.Entry;
import javax.sound.sampled.AudioFormat;
@@ -35,7 +37,8 @@
public class AudioParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
@@ -85,4 +88,14 @@
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
Fri Sep 11 19:19:48 2009
@@ -19,6 +19,8 @@
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import javax.sound.midi.InvalidMidiDataException;
import javax.sound.midi.MetaMessage;
@@ -38,7 +40,8 @@
public class MidiParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
@@ -94,4 +97,14 @@
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -89,7 +90,8 @@
}
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
// Protect the stream from being closed by CyberNeko
stream = new CloseShieldInputStream(stream);
@@ -119,6 +121,16 @@
parser.parse(source);
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
private ContentHandler getTitleHandler(final Metadata metadata) {
return new WriteOutContentHandler() {
@Override
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
Fri Sep 11 19:19:48 2009
@@ -18,7 +18,9 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
import java.util.Iterator;
+import java.util.Map;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
@@ -34,7 +36,8 @@
public class ImageParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
String type = metadata.get(Metadata.CONTENT_TYPE);
if (type != null) {
@@ -55,4 +58,14 @@
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
Fri Sep 11 19:19:48 2009
@@ -19,8 +19,10 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
+import java.util.Map;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.CustomProperties;
@@ -58,7 +60,8 @@
* Extracts properties and text from an MS Document input stream
*/
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
@@ -113,6 +116,16 @@
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
private void parseSummaryEntryIfExists(
POIFSFileSystem filesystem, String entryName, Metadata metadata)
throws IOException, TikaException {
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
@@ -36,13 +38,10 @@
*/
public class OOXMLParser implements Parser {
- /**
- * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
- */
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata) throws IOException, SAXException, TikaException
{
-
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
+ throws IOException, SAXException, TikaException {
try {
OOXMLExtractor extractor = OOXMLExtractorFactory
.createExtractor((POIXMLTextExtractor) ExtractorFactory
@@ -58,4 +57,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
Fri Sep 11 19:19:48 2009
@@ -19,6 +19,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -171,7 +173,8 @@
};
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");
@@ -211,6 +214,16 @@
}
/**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
+ /**
* Returns the identified ISO-8859-1 substring from the given byte buffer.
* The return value is the zero-terminated substring retrieved from
* between the given start and end positions in the given byte buffer.
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Map;
import java.util.Stack;
import java.util.HashMap;
import java.util.Collections;
@@ -123,7 +124,8 @@
}
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
final XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler,metadata);
@@ -272,4 +274,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -55,7 +57,8 @@
}
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
ZipInputStream zip = new ZipInputStream(stream);
ZipEntry entry = zip.getNextEntry();
@@ -64,12 +67,22 @@
String type = IOUtils.toString(zip, "UTF-8");
metadata.set(Metadata.CONTENT_TYPE, type);
} else if (entry.getName().equals("meta.xml")) {
- meta.parse(zip, new DefaultHandler(), metadata);
+ meta.parse(zip, new DefaultHandler(), metadata, context);
} else if (entry.getName().equals("content.xml")) {
- content.parse(zip, handler, metadata);
+ content.parse(zip, handler, metadata, context);
}
entry = zip.getNextEntry();
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
Fri Sep 11 19:19:48 2009
@@ -19,6 +19,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -47,7 +49,8 @@
public static final String PASSWORD =
"org.apache.tika.parser.pdf.password";
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
PDDocument pdfDocument = PDDocument.load(stream);
try {
@@ -71,6 +74,16 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
private void extractMetadata(PDDocument document, Metadata metadata)
throws TikaException {
PDDocumentInformation info = document.getDocumentInformation();
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
@@ -36,7 +38,8 @@
public class RTFParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
try {
DefaultStyledDocument sd = new DefaultStyledDocument();
@@ -52,4 +55,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
Fri Sep 11 19:19:48 2009
@@ -23,6 +23,8 @@
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
@@ -59,7 +61,8 @@
public class TXTParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, "text/plain");
@@ -120,4 +123,14 @@
}
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=813986&r1=813985&r2=813986&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Fri Sep 11 19:19:48 2009
@@ -18,6 +18,8 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.Map;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
@@ -41,7 +43,8 @@
public class XMLParser implements Parser {
public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
throws IOException, SAXException, TikaException {
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
@@ -77,6 +80,16 @@
xhtml.endDocument();
}
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
protected ContentHandler getContentHandler(
ContentHandler handler, Metadata metadata) {
return new TextContentHandler(handler);