Author: bodewig
Date: Mon Nov  7 16:36:51 2011
New Revision: 1198806

URL: http://svn.apache.org/viewvc?rev=1198806&view=rev
Log:
support uncompressing concatenated .bz2 files.  COMPRESS-146.  Submitted by 
Lasse Collin

Modified:
    commons/proper/compress/trunk/src/changes/changes.xml
    
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java

Modified: commons/proper/compress/trunk/src/changes/changes.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1198806&r1=1198805&r2=1198806&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/changes/changes.xml (original)
+++ commons/proper/compress/trunk/src/changes/changes.xml Mon Nov  7 16:36:51 
2011
@@ -49,6 +49,10 @@ The <action> type attribute can be add,u
       <action issue="COMPRESS-156" type="add" date="2011-11-02">
         Support for the XZ format has been added.
       </action> 
+      <action issue="COMPRESS-146" type="update" date="2011-11-07">
+        BZip2CompressorInputStream now optionally supports reading of
+        concatenated .bz2 files.
+      </action>
     </release>
     <release version="1.3" date="2011-11-01"
              description="Release 1.3 - API compatible to 1.2 but requires 
Java5 at runtime">

Modified: 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java
URL: 
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java?rev=1198806&r1=1198805&r2=1198806&view=diff
==============================================================================
--- 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java
 (original)
+++ 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java
 Mon Nov  7 16:36:51 2011
@@ -62,6 +62,7 @@ public class BZip2CompressorInputStream 
     private int nInUse;
 
     private InputStream in;
+    private final boolean decompressConcatenated;
 
     private int currentChar = -1;
 
@@ -97,8 +98,9 @@ public class BZip2CompressorInputStream 
     private BZip2CompressorInputStream.Data data;
 
     /**
-     * Constructs a new BZip2CompressorInputStream which decompresses bytes 
read from the
-     * specified stream.
+     * Constructs a new BZip2CompressorInputStream which decompresses bytes
+     * read from the specified stream. This doesn't suppprt decompressing
+     * concatenated .bz2 files.
      * 
      * @throws IOException
      *             if the stream content is malformed or an I/O error occurs.
@@ -106,10 +108,37 @@ public class BZip2CompressorInputStream 
      *             if <tt>in == null</tt>
      */
     public BZip2CompressorInputStream(final InputStream in) throws IOException 
{
+        this(in, false);
+    }
+
+    /**
+     * Constructs a new BZip2CompressorInputStream which decompresses bytes
+     * read from the specified stream.
+     *
+     * @param inputStream  the InputStream from which this object should
+     *                     be created of
+     * @param decompressConcatenated
+     *                     if true, decompress until the end of the input;
+     *                     if false, stop after the first .bz2 stream and
+     *                     leave the input position to point to the next
+     *                     byte after the .bz2 stream
+     *
+     * @throws IOException
+     *             if the stream content is malformed or an I/O error occurs.
+     * @throws NullPointerException
+     *             if <tt>in == null</tt>
+     */
+    public BZip2CompressorInputStream(final InputStream in,
+                                      final boolean decompressConcatenated)
+            throws IOException {
         super();
 
         this.in = in;
-        init();
+        this.decompressConcatenated = decompressConcatenated;
+
+        init(true);
+        initBlock();
+        setupBlock();
     }
 
     /** {@inheritDoc} */
@@ -210,54 +239,71 @@ public class BZip2CompressorInputStream 
         return retChar;
     }
 
-    private void init() throws IOException {
+    private boolean init(boolean isFirstStream) throws IOException {
         if (null == in) {
             throw new IOException("No InputStream");
         }
-        checkMagicChar('B', "first");
-        checkMagicChar('Z', "second");
-        checkMagicChar('h', "third");
+
+        int magic0 = this.in.read();
+        int magic1 = this.in.read();
+        int magic2 = this.in.read();
+        if (magic0 == -1 && !isFirstStream)
+            return false;
+
+        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h')
+            throw new IOException(isFirstStream
+                    ? "Stream is not in the BZip2 format"
+                    : "Garbage after a valid BZip2 stream");
 
         int blockSize = this.in.read();
         if ((blockSize < '1') || (blockSize > '9')) {
-            throw new IOException("Stream is not BZip2 formatted: illegal "
-                                  + "blocksize " + (char) blockSize);
+            throw new IOException("BZip2 block size is invalid");
         }
 
         this.blockSize100k = blockSize - '0';
 
-        initBlock();
-        setupBlock();
-    }
+        this.bsLive = 0;
+        this.computedCombinedCRC = 0;
 
-    private void checkMagicChar(char expected, String position)
-        throws IOException {
-        int magic = this.in.read();
-        if (magic != expected) {
-            throw new IOException("Stream is not BZip2 formatted: expected '"
-                                  + expected + "' as " + position + " byte but 
got '"
-                                  + (char) magic + "'");
-        }
+        return true;
     }
 
     private void initBlock() throws IOException {
-        char magic0 = bsGetUByte();
-        char magic1 = bsGetUByte();
-        char magic2 = bsGetUByte();
-        char magic3 = bsGetUByte();
-        char magic4 = bsGetUByte();
-        char magic5 = bsGetUByte();
-
-        if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
-            && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) {
-            complete(); // end of file
-        } else if (magic0 != 0x31 || // '1'
-                   magic1 != 0x41 || // ')'
-                   magic2 != 0x59 || // 'Y'
-                   magic3 != 0x26 || // '&'
-                   magic4 != 0x53 || // 'S'
-                   magic5 != 0x59 // 'Y'
-                   ) {
+        char magic0;
+        char magic1;
+        char magic2;
+        char magic3;
+        char magic4;
+        char magic5;
+
+        while (true) {
+            // Get the block magic bytes.
+            magic0 = bsGetUByte();
+            magic1 = bsGetUByte();
+            magic2 = bsGetUByte();
+            magic3 = bsGetUByte();
+            magic4 = bsGetUByte();
+            magic5 = bsGetUByte();
+
+            // If isn't end of stream magic, break out of the loop.
+            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
+                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90)
+                break;
+
+            // End of stream was reached. Check the combined CRC and
+            // advance to the next .bz2 stream if decoding concatenated
+            // streams.
+            if (complete())
+                return;
+        }
+
+        if (magic0 != 0x31 || // '1'
+            magic1 != 0x41 || // ')'
+            magic2 != 0x59 || // 'Y'
+            magic3 != 0x26 || // '&'
+            magic4 != 0x53 || // 'S'
+            magic5 != 0x59 // 'Y'
+            ) {
             this.currentState = EOF;
             throw new IOException("bad block header");
         } else {
@@ -299,7 +345,7 @@ public class BZip2CompressorInputStream 
         this.computedCombinedCRC ^= this.computedBlockCRC;
     }
 
-    private void complete() throws IOException {
+    private boolean complete() throws IOException {
         this.storedCombinedCRC = bsGetInt();
         this.currentState = EOF;
         this.data = null;
@@ -307,6 +353,10 @@ public class BZip2CompressorInputStream 
         if (this.storedCombinedCRC != this.computedCombinedCRC) {
             throw new IOException("BZip2 CRC error");
         }
+
+        // Look for the next .bz2 stream if decompressing
+        // concatenated files.
+        return !decompressConcatenated || !init(false);
     }
 
     @Override


Reply via email to