Author: jukka
Date: Tue Jan 27 21:53:49 2009
New Revision: 738261
URL: http://svn.apache.org/viewvc?rev=738261&view=rev
Log:
TIKA-95: Pluggable magic header detectors
Updated the TextDetector to comply with the modified Detector contract.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java?rev=738261&r1=738260&r2=738261&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
Tue Jan 27 21:53:49 2009
@@ -90,15 +90,21 @@
if (input == null) {
return MediaType.OCTET_STREAM;
}
- for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
- int ch = input.read();
- if (ch == -1) {
- return MediaType.TEXT_PLAIN;
- } else if (ch < IS_CONTROL_BYTE.length && IS_CONTROL_BYTE[ch]) {
- return MediaType.OCTET_STREAM;
+
+ input.mark(NUMBER_OF_BYTES_TO_TEST);
+ try {
+ for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
+ int ch = input.read();
+ if (ch == -1) {
+ return MediaType.TEXT_PLAIN;
+ } else if (ch < IS_CONTROL_BYTE.length && IS_CONTROL_BYTE[ch])
{
+ return MediaType.OCTET_STREAM;
+ }
}
+ return MediaType.TEXT_PLAIN;
+ } finally {
+ input.reset();
}
- return MediaType.TEXT_PLAIN;
}
}
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=738261&r1=738260&r2=738261&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
Tue Jan 27 21:53:49 2009
@@ -18,6 +18,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.util.Arrays;
import junit.framework.TestCase;
@@ -62,10 +63,16 @@
private void assertText(byte[] data) {
try {
+ InputStream stream = new ByteArrayInputStream(data);
assertEquals(
MediaType.TEXT_PLAIN,
- detector.detect(
- new ByteArrayInputStream(data), new Metadata()));
+ detector.detect(stream, new Metadata()));
+
+ // Test that the stream has been reset
+ for (int i = 0; i < data.length; i++) {
+ assertEquals(data[i], (byte) stream.read());
+ }
+ assertEquals(-1, stream.read());
} catch (IOException e) {
fail("Unexpected exception from TextDetector");
}