Author: nick
Date: Mon Aug  2 15:55:13 2010
New Revision: 981573

URL: http://svn.apache.org/viewvc?rev=981573&view=rev
Log:
Don't break on MP3 files where the ID3v2.4 tags are broken, and lie about their 
size (TIKA-424)
(The unit test for this will not normally be run, unless you explicitly 
download the sample file, as we can't re-distribute it as part of Tika)

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=981573&r1=981572&r2=981573&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
 Mon Aug  2 15:55:13 2010
@@ -182,7 +182,8 @@ public class ID3v2Frame implements MP3Fr
         // Detect by the first byte being sub 0x20
         String encoding = "ISO-8859-1";
         byte maybeEncodingFlag = data[offset];
-        if (maybeEncodingFlag == 0 || maybeEncodingFlag == 1) {
+        if (maybeEncodingFlag == 0 || maybeEncodingFlag == 1 ||
+              maybeEncodingFlag == 2 || maybeEncodingFlag == 3) {
             offset++;
             actualLength--;
             if (maybeEncodingFlag == 1) {
@@ -290,9 +291,10 @@ public class ID3v2Frame implements MP3Fr
             }
 
             // Now data
+            int copyFrom = offset+nameLength+sizeLength+flagLength;
+            size = Math.min(size, frameData.length-copyFrom);
             data = new byte[size];
-            System.arraycopy(frameData, 
-                    offset+nameLength+sizeLength+flagLength, data, 0, size);
+            System.arraycopy(frameData, copyFrom, data, 0, size);
         }
 
         protected int getSize() {

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=981573&r1=981572&r2=981573&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
 Mon Aug  2 15:55:13 2010
@@ -23,6 +23,7 @@ import junit.framework.TestCase;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -43,7 +44,7 @@ public class Mp3ParserTest extends TestC
         InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                 "/test-documents/testMP3id3v1.mp3");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }
@@ -77,7 +78,7 @@ public class Mp3ParserTest extends TestC
         InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                 "/test-documents/testMP3id3v2.mp3");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }
@@ -111,7 +112,7 @@ public class Mp3ParserTest extends TestC
         InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                 "/test-documents/testMP3id3v1_v2.mp3");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }
@@ -149,7 +150,7 @@ public class Mp3ParserTest extends TestC
         InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                 "/test-documents/testMP3lyrics.mp3");
         try {
-            parser.parse(stream, handler, metadata);
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
             stream.close();
         }
@@ -192,4 +193,44 @@ public class Mp3ParserTest extends TestC
        assertEquals("", ID3v2Frame.getTagString(new byte[] {0,0,0,0}, 0, 3));
        assertEquals("A", ID3v2Frame.getTagString(new byte[] {(byte)'A',0,0,0}, 
0, 3));
     }
+    
+    /**
+     * This test will do nothing, unless you've downloaded the
+     *  mp3 file from TIKA-424 - the file cannot be
+     *  distributed with Tika.
+     * This file has corrupt ID3v2.4 tags in it - the length
+     *  parameters are written in bytes, not bytes/4
+     * Check that we can at least read the file without breaking,
+     *  even if the tags are going to be junk...
+     */
+    public void testTIKA424() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+               "/test-documents/test2.mp3");
+       if(stream == null) {
+          // You haven't downloaded the file
+          // Skip the test
+          return;
+       }
+       
+       try {
+           parser.parse(stream, handler, metadata, new ParseContext());
+       } finally {
+           stream.close();
+       }
+
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("Plus loin vers l'ouestTPE1\u0000\u0000", 
metadata.get(Metadata.TITLE).substring(0,28));
+       assertEquals(null, metadata.get(Metadata.AUTHOR));
+
+       String content = handler.toString();
+       assertTrue(content.contains("Plus loin vers l'ouest"));
+       
+       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+       assertEquals("44100", metadata.get("samplerate"));
+       assertEquals("2", metadata.get("channels"));
+    }
 }


Reply via email to