Author: jukka
Date: Tue Jan 27 14:45:06 2009
New Revision: 738100
URL: http://svn.apache.org/viewvc?rev=738100&view=rev
Log:
TIKA-95: Pluggable magic header detectors
The Detector interface allows the input stream to be null.
The MagicDetector and TextDetector classes now correctly deal with that.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
Tue Jan 27 14:45:06 2009
@@ -151,8 +151,17 @@
}
}
+ /**
+ *
+ * @param input document input stream, or <code>null</code>
+ * @param metadata ignored
+ */
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
+
long offset = 0;
// Skip bytes at the beginning, using skip() or read()
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
Tue Jan 27 14:45:06 2009
@@ -80,13 +80,16 @@
* Looks at the beginning of the document input stream to determine
* whether the document is text or not.
*
- * @param input document input stream
+ * @param input document input stream, or <code>null</code>
* @param metadata ignored
* @return "text/plain" if the input stream suggest a text document,
* "application/octet-stream" otherwise
*/
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
int ch = input.read();
if (ch == -1) {
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
Tue Jan 27 14:45:06 2009
@@ -29,6 +29,14 @@
*/
public class MagicDetectorTest extends TestCase {
+ public void testDetectNull() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
public void testDetectSimple() throws Exception {
MediaType html = new MediaType("text", "html");
Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
Tue Jan 27 14:45:06 2009
@@ -32,6 +32,12 @@
private final Detector detector = new TextDetector();
+ public void testDetectNull() throws Exception {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
public void testDetectText() throws Exception {
assertText(new byte[0]);