Author: nick
Date: Mon Aug 2 15:55:13 2010
New Revision: 981573
URL: http://svn.apache.org/viewvc?rev=981573&view=rev
Log:
Don't break on MP3 files where the ID3v2.4 tags are broken, and lie about their
size (TIKA-424)
(The unit test for this will not normally be run, unless you explicitly
download the sample file, as we can't re-distribute it as part of Tika)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=981573&r1=981572&r2=981573&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
Mon Aug 2 15:55:13 2010
@@ -182,7 +182,8 @@ public class ID3v2Frame implements MP3Fr
// Detect by the first byte being sub 0x20
String encoding = "ISO-8859-1";
byte maybeEncodingFlag = data[offset];
- if (maybeEncodingFlag == 0 || maybeEncodingFlag == 1) {
+ if (maybeEncodingFlag == 0 || maybeEncodingFlag == 1 ||
+ maybeEncodingFlag == 2 || maybeEncodingFlag == 3) {
offset++;
actualLength--;
if (maybeEncodingFlag == 1) {
@@ -290,9 +291,10 @@ public class ID3v2Frame implements MP3Fr
}
// Now data
+ int copyFrom = offset+nameLength+sizeLength+flagLength;
+ size = Math.min(size, frameData.length-copyFrom);
data = new byte[size];
- System.arraycopy(frameData,
- offset+nameLength+sizeLength+flagLength, data, 0, size);
+ System.arraycopy(frameData, copyFrom, data, 0, size);
}
protected int getSize() {
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=981573&r1=981572&r2=981573&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
Mon Aug 2 15:55:13 2010
@@ -23,6 +23,7 @@ import junit.framework.TestCase;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
@@ -43,7 +44,7 @@ public class Mp3ParserTest extends TestC
InputStream stream = Mp3ParserTest.class.getResourceAsStream(
"/test-documents/testMP3id3v1.mp3");
try {
- parser.parse(stream, handler, metadata);
+ parser.parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
@@ -77,7 +78,7 @@ public class Mp3ParserTest extends TestC
InputStream stream = Mp3ParserTest.class.getResourceAsStream(
"/test-documents/testMP3id3v2.mp3");
try {
- parser.parse(stream, handler, metadata);
+ parser.parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
@@ -111,7 +112,7 @@ public class Mp3ParserTest extends TestC
InputStream stream = Mp3ParserTest.class.getResourceAsStream(
"/test-documents/testMP3id3v1_v2.mp3");
try {
- parser.parse(stream, handler, metadata);
+ parser.parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
@@ -149,7 +150,7 @@ public class Mp3ParserTest extends TestC
InputStream stream = Mp3ParserTest.class.getResourceAsStream(
"/test-documents/testMP3lyrics.mp3");
try {
- parser.parse(stream, handler, metadata);
+ parser.parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
@@ -192,4 +193,44 @@ public class Mp3ParserTest extends TestC
assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0},
0, 3));
}
+
+ /**
+ * This test will do nothing, unless you've downloaded the
+ * mp3 file from TIKA-424 - the file cannot be
+ * distributed with Tika.
+ * This file has corrupt ID3v2.4 tags in it - the length
+ * parameters are written in bytes, not bytes/4
+ * Check that we can at least read the file without breaking,
+ * even if the tags are going to be junk...
+ */
+ public void testTIKA424() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+ "/test-documents/test2.mp3");
+ if(stream == null) {
+ // You haven't downloaded the file
+ // Skip the test
+ return;
+ }
+
+ try {
+ parser.parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("Plus loin vers l'ouestTPE1\u0000\u0000",
metadata.get(Metadata.TITLE).substring(0,28));
+ assertEquals(null, metadata.get(Metadata.AUTHOR));
+
+ String content = handler.toString();
+ assertTrue(content.contains("Plus loin vers l'ouest"));
+
+ assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+ assertEquals("44100", metadata.get("samplerate"));
+ assertEquals("2", metadata.get("channels"));
+ }
}