Author: nick
Date: Mon Aug  9 14:48:25 2010
New Revision: 983661

URL: http://svn.apache.org/viewvc?rev=983661&view=rev
Log:
TIKA-474 - Do what we can with MP3 files where the ID3 header is truncated

Added:
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3  
 (with props)
Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=983661&r1=983660&r2=983661&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
 Mon Aug  9 14:48:25 2010
@@ -106,8 +106,9 @@ public class ID3v2Frame implements MP3Fr
             extendedHeader = readFully(inp, size);
         }
 
-        // Get the frame's data
-        data = readFully(inp, length);
+        // Get the frame's data, or at least as much
+        //  of it as we could do
+        data = readFully(inp, length, false);
     }
 
     protected static int getInt(byte[] data) {
@@ -150,6 +151,10 @@ public class ID3v2Frame implements MP3Fr
 
     protected static byte[] readFully(InputStream inp, int length)
             throws IOException {
+       return readFully(inp, length, true);
+    }
+    protected static byte[] readFully(InputStream inp, int length, boolean 
shortDataIsFatal)
+            throws IOException {
         byte[] b = new byte[length];
 
         int pos = 0;
@@ -157,7 +162,13 @@ public class ID3v2Frame implements MP3Fr
         while (pos < length) {
             read = inp.read(b, pos, length-pos);
             if (read == -1) {
-                throw new IOException("Tried to read " + length + " bytes, but 
only " + pos + " bytes present"); 
+                if(shortDataIsFatal) {
+                   throw new IOException("Tried to read " + length + " bytes, 
but only " + pos + " bytes present");
+                } else {
+                   // Give them what we found
+                   // TODO Log the short read
+                   return b;
+                }
             }
             pos += read;
         }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java?rev=983661&r1=983660&r2=983661&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
 Mon Aug  9 14:48:25 2010
@@ -265,4 +265,43 @@ public class Mp3ParserTest extends TestC
        assertEquals("44100", metadata.get("samplerate"));
        assertEquals("2", metadata.get("channels"));
     }
+    
+    /**
+     * This tests that we can handle without errors (but perhaps not
+     *  all content) a file with a very very large ID3 frame that
+     *  has been truncated before the end of the ID3 tags.
+     * In this case, it is a file with JPEG data in the ID3, which
+     *  is trunacted before the end of the JPEG bit of the ID3 frame.
+     */
+    public void testTIKA474() throws Exception {
+       Parser parser = new AutoDetectParser(); // Should auto-detect!
+       ContentHandler handler = new BodyContentHandler();
+       Metadata metadata = new Metadata();
+
+       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+               "/test-documents/testMP3truncated.mp3");
+       
+       
+       try {
+           parser.parse(stream, handler, metadata, new ParseContext());
+       } finally {
+           stream.close();
+       }
+
+       // Check we coud get the headers from the start
+       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+       assertEquals("Girl you have no faith in medicine", 
metadata.get(Metadata.TITLE));
+       assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
+
+       String content = handler.toString();
+       assertTrue(content.contains("Girl you have no faith in medicine"));
+       assertTrue(content.contains("The White Stripes"));
+       assertTrue(content.contains("Elephant"));
+       assertTrue(content.contains("2003"));
+       
+       // File lacks any audio frames, so we can't know these
+       assertEquals(null, metadata.get("version"));
+       assertEquals(null, metadata.get("samplerate"));
+       assertEquals(null, metadata.get("channels"));
+    }
 }

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3?rev=983661&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testMP3truncated.mp3
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to