Author: nick
Date: Mon Aug 9 14:48:25 2010
New Revision: 983661
URL: http://svn.apache.org/viewvc?rev=983661&view=rev
Log:
TIKA-474 - Do what we can with MP3 files where the ID3 header is truncated
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3
(with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=983661&r1=983660&r2=983661&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
Mon Aug 9 14:48:25 2010
@@ -106,8 +106,9 @@ public class ID3v2Frame implements MP3Fr
extendedHeader = readFully(inp, size);
}
- // Get the frame's data
- data = readFully(inp, length);
+ // Get the frame's data, or at least as much
+ // of it as we could do
+ data = readFully(inp, length, false);
}
protected static int getInt(byte[] data) {
@@ -150,6 +151,10 @@ public class ID3v2Frame implements MP3Fr
protected static byte[] readFully(InputStream inp, int length)
throws IOException {
+ return readFully(inp, length, true);
+ }
+ protected static byte[] readFully(InputStream inp, int length, boolean
shortDataIsFatal)
+ throws IOException {
byte[] b = new byte[length];
int pos = 0;
@@ -157,7 +162,13 @@ public class ID3v2Frame implements MP3Fr
while (pos < length) {
read = inp.read(b, pos, length-pos);
if (read == -1) {
- throw new IOException("Tried to read " + length + " bytes, but
only " + pos + " bytes present");
+ if(shortDataIsFatal) {
+ throw new IOException("Tried to read " + length + " bytes,
but only " + pos + " bytes present");
+ } else {
+ // Give them what we found
+ // TODO Log the short read
+ return b;
+ }
}
pos += read;
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=983661&r1=983660&r2=983661&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
Mon Aug 9 14:48:25 2010
@@ -265,4 +265,43 @@ public class Mp3ParserTest extends TestC
assertEquals("44100", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
}
+
+ /**
+ * This tests that we can handle without errors (but perhaps not
+ * all content) a file with a very very large ID3 frame that
+ * has been truncated before the end of the ID3 tags.
+ * In this case, it is a file with JPEG data in the ID3, which
+ * is trunacted before the end of the JPEG bit of the ID3 frame.
+ */
+ public void testTIKA474() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/testMP3truncated.mp3");
+
+
+ try {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ // Check we coud get the headers from the start
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Girl you have no faith in medicine",
metadata.get(Metadata.TITLE));
+ assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertTrue(content.contains("Girl you have no faith in medicine"));
+ assertTrue(content.contains("The White Stripes"));
+ assertTrue(content.contains("Elephant"));
+ assertTrue(content.contains("2003"));
+
+ // File lacks any audio frames, so we can't know these
+ assertEquals(null, metadata.get("version"));
+ assertEquals(null, metadata.get("samplerate"));
+ assertEquals(null, metadata.get("channels"));
+ }
}
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3?rev=983661&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream