Hi,

attached patch enables an program to add a listener when a new bzip2
block is detected.

The notifier is called with:
 - xxx.newBlock(this, currBlockNo, currBlockPosition)

- this = the current BZip2CompressorInputStream object
- currBlockNo = The current block number, increased for each block
- currBlockPosition = The offset (i.e. start position) in the compressed
input stream of the current block

the "this" is send to enable the listener to get the number of read
bytes in the uncompressed input stream via this.getBytesRead().

With the attached patch a program can parse a bzip2 file and create an
index of all bzip2 block offsets on the fly. This index can later on be
used to skip to the correct block for faster positioning.

comments are welcome.

with kind regards
thomas
Index: src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java
===================================================================
--- src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java	(Revision 1402440)
+++ src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java	(Arbeitskopie)
@@ -26,6 +26,8 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
 
 import org.apache.commons.compress.compressors.CompressorInputStream;
 
@@ -97,6 +99,12 @@
      */
     private BZip2CompressorInputStream.Data data;
 
+    private long currBlockNo;
+    private long currInPosition;
+    private long currBlockPosition;
+
+    private List<BZip2BlockListener> listener = new CopyOnWriteArrayList<BZip2BlockListener>();
+
     /**
      * Constructs a new BZip2CompressorInputStream which decompresses bytes
      * read from the specified stream. This doesn't suppprt decompressing
@@ -111,6 +119,18 @@
         this(in, false);
     }
 
+    public BZip2CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException {
+        this(in,decompressConcatenated,null);
+    }
+
+    public void addListener(BZip2BlockListener listener) {
+        this.listener.add(listener);
+    }
+
+    public void removeListener(BZip2BlockListener listener) {
+        this.listener.remove(listener);
+    }
+
     /**
      * Constructs a new BZip2CompressorInputStream which decompresses bytes
      * read from the specified stream.
@@ -127,13 +147,15 @@
      * @throws NullPointerException
      *             if <tt>in == null</tt>
      */
-    public BZip2CompressorInputStream(final InputStream in,
-                                      final boolean decompressConcatenated)
+    public BZip2CompressorInputStream(InputStream in,
+                                      boolean decompressConcatenated,
+                                      BZip2BlockListener listener)
             throws IOException {
         super();
 
         this.in = in;
         this.decompressConcatenated = decompressConcatenated;
+        this.listener.add(listener);
 
         init(true);
         initBlock();
@@ -176,12 +198,17 @@
 
         final int hi = offs + len;
         int destOffs = offs;
-        for (int b; (destOffs < hi) && ((b = read0()) >= 0);) {
-            dest[destOffs++] = (byte) b;
+        int c = 0;
+        for (int b; destOffs < hi;) {
+            b = read0();
+            if(b >= 0) {
+                count(1);
+                c++;
+                dest[destOffs++] = (byte) b;
+            } else
+                return b;
         }
 
-        int c = (destOffs == offs) ? -1 : (destOffs - offs);
-        count(c);
         return c;
     }
 
@@ -239,17 +266,24 @@
         return retChar;
     }
 
+    private int readNextByte(InputStream in) throws IOException {
+        int b = in.read();
+        if(b >= 0)
+            currInPosition++;
+        return b;
+    }
+
     private boolean init(boolean isFirstStream) throws IOException {
         if (null == in) {
             throw new IOException("No InputStream");
         }
 
-        int magic0 = this.in.read();
+        int magic0 = readNextByte(this.in);
         if (magic0 == -1 && !isFirstStream) {
             return false;
         }
-        int magic1 = this.in.read();
-        int magic2 = this.in.read();
+        int magic1 = readNextByte(this.in);
+        int magic2 = readNextByte(this.in);
 
         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
             throw new IOException(isFirstStream
@@ -257,7 +291,7 @@
                     : "Garbage after a valid BZip2 stream");
         }
 
-        int blockSize = this.in.read();
+        int blockSize = readNextByte(this.in);
         if ((blockSize < '1') || (blockSize > '9')) {
             throw new IOException("BZip2 block size is invalid");
         }
@@ -322,7 +356,7 @@
                 this.data = new Data(this.blockSize100k);
             }
 
-            // currBlockNo++;
+            notifyNewBlock();
             getAndMoveToFrontDecode();
 
             this.crc.initialiseCRC();
@@ -330,6 +364,14 @@
         }
     }
 
+    private void notifyNewBlock() {
+        currBlockNo++;
+        currBlockPosition = currInPosition - 11;
+        for(BZip2BlockListener b: listener) {
+            b.newBlock(this, currBlockNo, currBlockPosition);
+        }
+    }
+
     private void endBlock() throws IOException {
         this.computedBlockCRC = this.crc.getFinalCRC();
 
@@ -385,7 +427,7 @@
         if (bsLiveShadow < n) {
             final InputStream inShadow = this.in;
             do {
-                int thech = inShadow.read();
+                int thech = readNextByte(inShadow);
 
                 if (thech < 0) {
                     throw new IOException("unexpected end of stream");
@@ -407,7 +449,7 @@
         int bsBuffShadow = this.bsBuff;
 
         if (bsLiveShadow < 1) {
-            int thech = this.in.read();
+            int thech = readNextByte(this.in);
 
             if (thech < 0) {
                 throw new IOException("unexpected end of stream");
@@ -652,7 +694,7 @@
                     // Inlined:
                     // int zvec = bsR(zn);
                     while (bsLiveShadow < zn) {
-                        final int thech = inShadow.read();
+                        final int thech = readNextByte(inShadow);
                         if (thech >= 0) {
                             bsBuffShadow = (bsBuffShadow << 8) | thech;
                             bsLiveShadow += 8;
@@ -668,7 +710,7 @@
                     while (zvec > limit_zt[zn]) {
                         zn++;
                         while (bsLiveShadow < 1) {
-                            final int thech = inShadow.read();
+                            final int thech = readNextByte(inShadow);
                             if (thech >= 0) {
                                 bsBuffShadow = (bsBuffShadow << 8) | thech;
                                 bsLiveShadow += 8;
@@ -735,7 +777,7 @@
                 // Inlined:
                 // int zvec = bsR(zn);
                 while (bsLiveShadow < zn) {
-                    final int thech = inShadow.read();
+                    final int thech = readNextByte(inShadow);
                     if (thech >= 0) {
                         bsBuffShadow = (bsBuffShadow << 8) | thech;
                         bsLiveShadow += 8;
@@ -751,7 +793,7 @@
                 while (zvec > limit_zt[zn]) {
                     zn++;
                     while (bsLiveShadow < 1) {
-                        final int thech = inShadow.read();
+                        final int thech = readNextByte(inShadow);
                         if (thech >= 0) {
                             bsBuffShadow = (bsBuffShadow << 8) | thech;
                             bsLiveShadow += 8;
@@ -785,7 +827,7 @@
         while (zvec > limit_zt[zn]) {
             zn++;
             while (bsLiveShadow < 1) {
-                final int thech = inShadow.read();
+                final int thech = readNextByte(inShadow);
 
                 if (thech >= 0) {
                     bsBuffShadow = (bsBuffShadow << 8) | thech;

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org
For additional commands, e-mail: dev-h...@commons.apache.org

Reply via email to