Author: jukka
Date: Tue Jan 27 22:09:28 2009
New Revision: 738269

URL: http://svn.apache.org/viewvc?rev=738269&view=rev
Log:
TIKA-95: Pluggable magic header detectors

Updated MagicDetector to comply with the modified Detector contract.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
    
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=738269&r1=738268&r2=738269&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java 
Tue Jan 27 22:09:28 2009
@@ -162,55 +162,61 @@
             return MediaType.OCTET_STREAM;
         }
 
-        long offset = 0;
-
-        // Skip bytes at the beginning, using skip() or read()
-        while (offset < offsetRangeBegin) {
-            long n = input.skip(offsetRangeBegin - offset);
-            if (n > 0) {
-                offset += n;
-            } else if (input.read() != -1) {
-                offset += 1;
-            } else {
-                return MediaType.OCTET_STREAM;
+        input.mark(length);
+        try {
+            long offset = 0;
+
+            // Skip bytes at the beginning, using skip() or read()
+            while (offset < offsetRangeBegin) {
+                long n = input.skip(offsetRangeBegin - offset);
+                if (n > 0) {
+                    offset += n;
+                } else if (input.read() != -1) {
+                    offset += 1;
+                } else {
+                    return MediaType.OCTET_STREAM;
+                }
             }
-        }
 
-        // Fill in the comparison window
-        while (offset < offsetRangeBegin + sourceBuffer.length) {
-            int i = (int) (offset - offsetRangeBegin);
-            int n = input.read(sourceBuffer, i, sourceBuffer.length - i);
-            if (n == -1) {
-                return MediaType.OCTET_STREAM;
+            // Fill in the comparison window
+            while (offset < offsetRangeBegin + sourceBuffer.length) {
+                int i = (int) (offset - offsetRangeBegin);
+                int n = input.read(sourceBuffer, i, sourceBuffer.length - i);
+                if (n == -1) {
+                    return MediaType.OCTET_STREAM;
+                }
+                offset += n;
             }
-            offset += n;
-        }
 
-        // Loop until we've covered the entire offset range
-        while (true) {
-            // Apply the mask, if any
-            if (mask != null) {
-                for (int i = 0; i < length; i++) {
-                    compareBuffer[i] = (byte) (sourceBuffer[i] & mask[i]);
+            // Loop until we've covered the entire offset range
+            while (true) {
+                // Apply the mask, if any
+                if (mask != null) {
+                    for (int i = 0; i < length; i++) {
+                        compareBuffer[i] = (byte) (sourceBuffer[i] & mask[i]);
+                    }
                 }
-            }
 
-            if (Arrays.equals(pattern, compareBuffer)) {
-                // We have a match, so return the matching media type
-                return type;
-            } else if (offset < offsetRangeEnd + sourceBuffer.length) {
-                // No match, move the comparison window forward and try again
-                int c = input.read();
-                if (c == -1) {
+                if (Arrays.equals(pattern, compareBuffer)) {
+                    // We have a match, so return the matching media type
+                    return type;
+                } else if (offset < offsetRangeEnd + sourceBuffer.length) {
+                    // No match, move the comparison window forward
+                    int c = input.read();
+                    if (c == -1) {
+                        return MediaType.OCTET_STREAM;
+                    }
+                    System.arraycopy(
+                            sourceBuffer, 1, sourceBuffer, 0, length - 1);
+                    sourceBuffer[length - 1] = (byte) c;
+                    offset += 1;
+                } else {
+                    // We have reached the end of the offset range, no match
                     return MediaType.OCTET_STREAM;
                 }
-                System.arraycopy(sourceBuffer, 1, sourceBuffer, 0, length - 1);
-                sourceBuffer[length - 1] = (byte) c;
-                offset += 1;
-            } else {
-                // We have reached the end of the offset range, no match.
-                return MediaType.OCTET_STREAM;
             }
+        } finally {
+            input.reset();
         }
     }
 

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=738269&r1=738268&r2=738269&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java 
(original)
+++ 
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java 
Tue Jan 27 22:09:28 2009
@@ -18,6 +18,7 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 
 import junit.framework.TestCase;
 
@@ -102,9 +103,15 @@
 
     private void assertDetect(Detector detector, MediaType type, String data) {
         try {
-            assertEquals(type, detector.detect(
-                    new ByteArrayInputStream(data.getBytes("ASCII")),
-                    new Metadata()));
+            byte[] bytes = data.getBytes("ASCII");
+            InputStream stream = new ByteArrayInputStream(bytes);
+            assertEquals(type, detector.detect(stream, new Metadata()));
+
+            // Test that the stream has been reset
+            for (int i = 0; i < bytes.length; i++) {
+                assertEquals(bytes[i], (byte) stream.read());
+            }
+            assertEquals(-1, stream.read());
         } catch (IOException e) {
             fail("Unexpected exception from MagicDetector");
         }


Reply via email to