Author: jukka
Date: Tue Jan 27 21:53:49 2009
New Revision: 738261

URL: http://svn.apache.org/viewvc?rev=738261&view=rev
Log:
TIKA-95: Pluggable magic header detectors

Updated the TextDetector to comply with the modified Detector contract.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
    lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java?rev=738261&r1=738260&r2=738261&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java 
Tue Jan 27 21:53:49 2009
@@ -90,15 +90,21 @@
         if (input == null) {
             return MediaType.OCTET_STREAM;
         }
-        for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
-            int ch = input.read();
-            if (ch == -1) {
-                return MediaType.TEXT_PLAIN;
-            } else if (ch < IS_CONTROL_BYTE.length && IS_CONTROL_BYTE[ch]) {
-                return MediaType.OCTET_STREAM;
+
+        input.mark(NUMBER_OF_BYTES_TO_TEST);
+        try {
+            for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
+                int ch = input.read();
+                if (ch == -1) {
+                    return MediaType.TEXT_PLAIN;
+                } else if (ch < IS_CONTROL_BYTE.length && IS_CONTROL_BYTE[ch]) 
{
+                    return MediaType.OCTET_STREAM;
+                }
             }
+            return MediaType.TEXT_PLAIN;
+        } finally {
+            input.reset();
         }
-        return MediaType.TEXT_PLAIN;
     }
 
 }

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=738261&r1=738260&r2=738261&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java 
(original)
+++ 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java 
Tue Jan 27 21:53:49 2009
@@ -18,6 +18,7 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Arrays;
 
 import junit.framework.TestCase;
@@ -62,10 +63,16 @@
 
     private void assertText(byte[] data) {
         try {
+            InputStream stream = new ByteArrayInputStream(data);
             assertEquals(
                     MediaType.TEXT_PLAIN,
-                    detector.detect(
-                            new ByteArrayInputStream(data), new Metadata()));
+                    detector.detect(stream, new Metadata()));
+
+            // Test that the stream has been reset
+            for (int i = 0; i < data.length; i++) {
+                assertEquals(data[i], (byte) stream.read());
+            }
+            assertEquals(-1, stream.read());
         } catch (IOException e) {
             fail("Unexpected exception from TextDetector");
         }


Reply via email to