This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 80cfd6d  TIKA-2730 -- allow last frame to be truncated w/o throwing an 
EOF
80cfd6d is described below

commit 80cfd6d4a4270f8f3697c6dc083b3dedfc36c86a
Author: TALLISON <[email protected]>
AuthorDate: Wed Sep 19 12:51:23 2018 -0400

    TIKA-2730 -- allow last frame to be truncated w/o throwing an EOF
---
 .../java/org/apache/tika/parser/mp3/Mp3Parser.java |   9 ++++--
 .../org/apache/tika/parser/mp3/MpegStream.java     |  11 ++++---
 .../org/apache/tika/parser/mp3/Mp3ParserTest.java  |  33 ++++++++++++++++++++-
 .../test-documents/testMP3i18n_truncated.mp3       | Bin 0 -> 40672 bytes
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
index 3b79f31..345f486 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
@@ -194,15 +194,18 @@ public class Mp3Parser extends AbstractParser {
         // Now iterate over all audio frames in the file
         AudioFrame frame = mpegStream.nextFrame();
         float duration = 0;
-        while (frame != null)
+        boolean skipped = true;
+        while (frame != null && skipped)
         {
             duration += frame.getDuration();
             if (firstAudio == null)
             {
                 firstAudio = frame;
             }
-            mpegStream.skipFrame();
-            frame = mpegStream.nextFrame();
+            skipped = mpegStream.skipFrame();
+            if (skipped) {
+                frame = mpegStream.nextFrame();
+            }
         }
 
        // ID3v1 tags live at the end of the file
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
index 1814c12..4984fea 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
@@ -150,10 +150,10 @@ class MpegStream extends PushbackInputStream
      * Skips the current MPEG frame. This method can be called after a valid
      * MPEG header has been retrieved using {@code nextFrame()}. In this case
      * the underlying stream is advanced to the end of the associated MPEG
-     * frame. Otherwise, this method has no effect. The return value indicates
-     * whether a frame could be skipped.
+     * frame or until the EOF is reached. The return value indicates
+     * whether the full frame could be skipped.
      * 
-     * @return <b>true</b> if a frame could be skipped, <b>false</b> otherwise
+     * @return <b>true</b> if a frame could be skipped, <b>false</b> 
otherwise, perhaps EOF?
      * @throws IOException if an IO error occurs
      */
     public boolean skipFrame() throws IOException
@@ -162,11 +162,10 @@ class MpegStream extends PushbackInputStream
         {
             long toSkip = currentHeader.getLength() - HEADER_SIZE;
             long skipped = IOUtils.skipFully(in, toSkip);
+            currentHeader = null;
             if (skipped < toSkip) {
-                throw new EOFException("EOF: tried to skip "+toSkip +
-                        " but could only skip "+skipped);
+                return false;
             }
-            currentHeader = null;
             return true;
         }
         return false;
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
index 1c615f6..ef15cf1 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
@@ -248,7 +248,38 @@ public class Mp3ParserTest {
        assertEquals("1", metadata.get("channels"));
        checkDuration(metadata, 2);
    }
-    
+    /**
+     * Tests that a file with the last frame slightly
+     * truncated does not cause an EOF and does
+     * not lead to an infinite loop.
+     */
+    @Test
+    public void testMp3ParsingID3i18nTruncated() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        try (InputStream stream = Mp3ParserTest.class.getResourceAsStream(
+                "/test-documents/testMP3i18n_truncated.mp3")) {
+            parser.parse(stream, handler, metadata, new ParseContext());
+        }
+
+        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("Une chason en Fran\u00e7ais", 
metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Artist \u2468\u2460", 
metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
+        assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
+
+        assertEquals(
+                "Eng - Comment Desc\nThis is a \u1357\u2468\u2460 Comment",
+                metadata.get(XMPDM.LOG_COMMENT)
+        );
+
+        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
+        assertEquals("44100", metadata.get("samplerate"));
+        assertEquals("1", metadata.get("channels"));
+        checkDuration(metadata, 2);
+    }
     
     /**
      * Tests that a file with both lyrics and
diff --git 
a/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3 
b/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3
new file mode 100644
index 0000000..c2cd30d
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testMP3i18n_truncated.mp3 
differ

Reply via email to