Author: jukka
Date: Tue Jan 27 22:09:28 2009
New Revision: 738269
URL: http://svn.apache.org/viewvc?rev=738269&view=rev
Log:
TIKA-95: Pluggable magic header detectors
Updated MagicDetector to comply with the modified Detector contract.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=738269&r1=738268&r2=738269&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
Tue Jan 27 22:09:28 2009
@@ -162,55 +162,61 @@
return MediaType.OCTET_STREAM;
}
- long offset = 0;
-
- // Skip bytes at the beginning, using skip() or read()
- while (offset < offsetRangeBegin) {
- long n = input.skip(offsetRangeBegin - offset);
- if (n > 0) {
- offset += n;
- } else if (input.read() != -1) {
- offset += 1;
- } else {
- return MediaType.OCTET_STREAM;
+ input.mark(length);
+ try {
+ long offset = 0;
+
+ // Skip bytes at the beginning, using skip() or read()
+ while (offset < offsetRangeBegin) {
+ long n = input.skip(offsetRangeBegin - offset);
+ if (n > 0) {
+ offset += n;
+ } else if (input.read() != -1) {
+ offset += 1;
+ } else {
+ return MediaType.OCTET_STREAM;
+ }
}
- }
- // Fill in the comparison window
- while (offset < offsetRangeBegin + sourceBuffer.length) {
- int i = (int) (offset - offsetRangeBegin);
- int n = input.read(sourceBuffer, i, sourceBuffer.length - i);
- if (n == -1) {
- return MediaType.OCTET_STREAM;
+ // Fill in the comparison window
+ while (offset < offsetRangeBegin + sourceBuffer.length) {
+ int i = (int) (offset - offsetRangeBegin);
+ int n = input.read(sourceBuffer, i, sourceBuffer.length - i);
+ if (n == -1) {
+ return MediaType.OCTET_STREAM;
+ }
+ offset += n;
}
- offset += n;
- }
- // Loop until we've covered the entire offset range
- while (true) {
- // Apply the mask, if any
- if (mask != null) {
- for (int i = 0; i < length; i++) {
- compareBuffer[i] = (byte) (sourceBuffer[i] & mask[i]);
+ // Loop until we've covered the entire offset range
+ while (true) {
+ // Apply the mask, if any
+ if (mask != null) {
+ for (int i = 0; i < length; i++) {
+ compareBuffer[i] = (byte) (sourceBuffer[i] & mask[i]);
+ }
}
- }
- if (Arrays.equals(pattern, compareBuffer)) {
- // We have a match, so return the matching media type
- return type;
- } else if (offset < offsetRangeEnd + sourceBuffer.length) {
- // No match, move the comparison window forward and try again
- int c = input.read();
- if (c == -1) {
+ if (Arrays.equals(pattern, compareBuffer)) {
+ // We have a match, so return the matching media type
+ return type;
+ } else if (offset < offsetRangeEnd + sourceBuffer.length) {
+ // No match, move the comparison window forward
+ int c = input.read();
+ if (c == -1) {
+ return MediaType.OCTET_STREAM;
+ }
+ System.arraycopy(
+ sourceBuffer, 1, sourceBuffer, 0, length - 1);
+ sourceBuffer[length - 1] = (byte) c;
+ offset += 1;
+ } else {
+ // We have reached the end of the offset range, no match
return MediaType.OCTET_STREAM;
}
- System.arraycopy(sourceBuffer, 1, sourceBuffer, 0, length - 1);
- sourceBuffer[length - 1] = (byte) c;
- offset += 1;
- } else {
- // We have reached the end of the offset range, no match.
- return MediaType.OCTET_STREAM;
}
+ } finally {
+ input.reset();
}
}
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=738269&r1=738268&r2=738269&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
Tue Jan 27 22:09:28 2009
@@ -18,6 +18,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.io.InputStream;
import junit.framework.TestCase;
@@ -102,9 +103,15 @@
private void assertDetect(Detector detector, MediaType type, String data) {
try {
- assertEquals(type, detector.detect(
- new ByteArrayInputStream(data.getBytes("ASCII")),
- new Metadata()));
+ byte[] bytes = data.getBytes("ASCII");
+ InputStream stream = new ByteArrayInputStream(bytes);
+ assertEquals(type, detector.detect(stream, new Metadata()));
+
+ // Test that the stream has been reset
+ for (int i = 0; i < bytes.length; i++) {
+ assertEquals(bytes[i], (byte) stream.read());
+ }
+ assertEquals(-1, stream.read());
} catch (IOException e) {
fail("Unexpected exception from MagicDetector");
}