Repository: commons-compress
Updated Branches:
  refs/heads/master 56e82da90 -> 51d03dda0


COMPRESS-271 extract LZ4-independent code into reusable stream


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/51d03dda
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/51d03dda
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/51d03dda

Branch: refs/heads/master
Commit: 51d03dda0e0dacbc2f13d0de196a7acff9ce0134
Parents: 56e82da
Author: Stefan Bodewig <[email protected]>
Authored: Tue Jan 17 20:10:42 2017 +0100
Committer: Stefan Bodewig <[email protected]>
Committed: Tue Jan 17 20:10:42 2017 +0100

----------------------------------------------------------------------
 .../lz4/BlockLZ4CompressorInputStream.java      | 166 +-----------
 .../AbstractLZ77CompressorInputStream.java      | 261 +++++++++++++++++++
 2 files changed, 270 insertions(+), 157 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/51d03dda/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
index 635dec9..89e74f7 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
@@ -21,9 +21,8 @@ package org.apache.commons.compress.compressors.lz4;
 import java.io.IOException;
 import java.io.InputStream;
 
-import org.apache.commons.compress.compressors.CompressorInputStream;
+import 
org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
 import org.apache.commons.compress.utils.ByteUtils;
-import org.apache.commons.compress.utils.IOUtils;
 
 /**
  * CompressorInputStream for the LZ4 block format.
@@ -31,50 +30,19 @@ import org.apache.commons.compress.utils.IOUtils;
  * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html";>LZ4 Block 
Format Description</a>
  * @since 1.14
  */
-public class BlockLZ4CompressorInputStream extends CompressorInputStream {
+public class BlockLZ4CompressorInputStream extends 
AbstractLZ77CompressorInputStream {
 
     private static final int WINDOW_SIZE = 1 << 16;
     private static final int SIZE_BITS = 4;
     private static final int COPY_SIZE_MASK = (1 << SIZE_BITS) - 1;
     private static final int LITERAL_SIZE_MASK = COPY_SIZE_MASK << SIZE_BITS;
 
-    /** Buffer to write decompressed bytes to for back-references */
-    private final byte[] buf = new byte[3 * WINDOW_SIZE];
-
-    /** One behind the index of the last byte in the buffer that was written */
-    private int writeIndex;
-
-    /** Index of the next byte to be read. */
-    private int readIndex;
-
-    /** The underlying stream to read compressed data from */
-    private final InputStream in;
-
-    /** Number of bytes still to be read from the current literal or copy. */
-    private long bytesRemaining;
-
     /** Copy-size part of the block starting byte. */
     private int nextCopySize;
 
-    /** Offset of the current copy. */
-    private int copyOffset;
-
     /** Current state of the stream */
     private State state = State.NO_BLOCK;
 
-    /** uncompressed size */
-    private int size = 0;
-
-    // used in no-arg read method
-    private final byte[] oneByte = new byte[1];
-
-    private final ByteUtils.ByteSupplier supplier = new 
ByteUtils.ByteSupplier() {
-        @Override
-        public int getAsByte() throws IOException {
-            return readOneByte();
-        }
-    };
-
     /**
      * Creates a new LZ4 input stream.
      *
@@ -84,27 +52,7 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
      * @throws IOException if reading fails
      */
     public BlockLZ4CompressorInputStream(final InputStream is) throws 
IOException {
-        this.in = is;
-        writeIndex = readIndex = 0;
-        bytesRemaining = 0;
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public int read() throws IOException {
-        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public void close() throws IOException {
-        in.close();
-    }
-
-    /** {@inheritDoc} */
-    @Override
-    public int available() {
-        return writeIndex - readIndex;
+        super(is, WINDOW_SIZE);
     }
 
     /**
@@ -121,7 +69,7 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
             /*FALLTHROUGH*/
         case IN_LITERAL:
             int litLen = readLiteral(b, off, len);
-            if (bytesRemaining == 0) {
+            if (!hasMoreDataInBlock()) {
                 state = State.LOOKING_FOR_COPY;
             }
             return litLen;
@@ -133,7 +81,7 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
             /*FALLTHROUGH*/
         case IN_COPY:
             int copyLen = readCopy(b, off, len);
-            if (bytesRemaining == 0) {
+            if (!hasMoreDataInBlock()) {
                 state = State.NO_BLOCK;
             }
             return copyLen;
@@ -142,15 +90,6 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
         }
     }
 
-    /**
-     * Get the uncompressed size of the stream
-     *
-     * @return the uncompressed size
-     */
-    public int getSize() {
-        return size;
-    }
-
     private void readSizes() throws IOException {
         int nextBlock = readOneByte();
         if (nextBlock == -1) {
@@ -161,7 +100,7 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
         if (literalSizePart == COPY_SIZE_MASK) {
             literalSizePart += readSizeBytes();
         }
-        bytesRemaining = literalSizePart;
+        startLiteral(literalSizePart);
         state = State.IN_LITERAL;
     }
 
@@ -178,52 +117,12 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
         return accum;
     }
 
-    private int readLiteral(final byte[] b, final int off, final int len) 
throws IOException {
-        final int avail = available();
-        if (len > avail) {
-            tryToReadLiteral(len - avail);
-        }
-        return readFromBuffer(b, off, len);
-    }
-
-    private void tryToReadLiteral(int bytesToRead) throws IOException {
-        final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, 
bytesRemaining),
-                                                   buf.length - writeIndex);
-        final int bytesRead = reallyTryToRead > 0
-            ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead)
-            : 0 /* happens for bytesRemaining == 0 */;
-        count(bytesRead);
-        if (reallyTryToRead != bytesRead) {
-            throw new IOException("Premature end of stream reading literal");
-        }
-        writeIndex += reallyTryToRead;
-        bytesRemaining -= reallyTryToRead;
-    }
-
-    private int readFromBuffer(final byte[] b, final int off, final int len) 
throws IOException {
-        final int readable = Math.min(len, available());
-        if (readable > 0) {
-            System.arraycopy(buf, readIndex, b, off, readable);
-            readIndex += readable;
-            if (readIndex > 2 * WINDOW_SIZE) {
-                slideBuffer();
-            }
-        }
-        size += readable;
-        return readable;
-    }
-
-    private void slideBuffer() {
-        System.arraycopy(buf, WINDOW_SIZE, buf, 0, WINDOW_SIZE);
-        writeIndex -= WINDOW_SIZE;
-        readIndex -= WINDOW_SIZE;
-    }
-
     /**
      * @return false if there is no more copy - this means this is the
      * last block of the stream.
      */
     private boolean initializeCopy() throws IOException {
+        int copyOffset = 0;
         try {
             copyOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
         } catch (IOException ex) {
@@ -236,59 +135,12 @@ public class BlockLZ4CompressorInputStream extends 
CompressorInputStream {
         if (nextCopySize == COPY_SIZE_MASK) {
             copySize += readSizeBytes();
         }
-        bytesRemaining = copySize + 4; // minimal match length 4 is encoded as 0
+        // minimal match length 4 is encoded as 0
+        startCopy(copyOffset, copySize + 4);
         state = State.IN_COPY;
         return true;
     }
 
-    private int readCopy(final byte[] b, final int off, final int len) throws 
IOException {
-        final int avail = available();
-        if (len > avail) {
-            tryToCopy(len - avail);
-        }
-        return readFromBuffer(b, off, len);
-    }
-
-    private void tryToCopy(int bytesToCopy) throws IOException {
-        // this will fit into the buffer without sliding and not
-        // require more than is available inside the copy
-        int copy = (int) Math.min(Math.min(bytesToCopy, bytesRemaining),
-                                  buf.length - writeIndex);
-        if (copy == 0) {
-            // NOP
-        } else if (copyOffset == 1) { // pretty common special case
-            final byte last = buf[writeIndex - 1];
-            for (int i = 0; i < copy; i++) {
-                buf[writeIndex++] = last;
-            }
-        } else if (copy < copyOffset) {
-            System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, 
copy);
-            writeIndex += copy;
-        } else {
-            final int fullRots = copy / copyOffset;
-            for (int i = 0; i < fullRots; i++) {
-                System.arraycopy(buf, writeIndex - copyOffset, buf, 
writeIndex, copyOffset);
-                writeIndex += copyOffset;
-            }
-
-            final int pad = copy - (copyOffset * fullRots);
-            if (pad > 0) {
-                System.arraycopy(buf, writeIndex - copyOffset, buf, 
writeIndex, pad);
-                writeIndex += pad;
-            }
-        }
-        bytesRemaining -= copy;
-    }
-
-    private int readOneByte() throws IOException {
-        final int b = in.read();
-        if (b != -1) {
-            count(1);
-            return b & 0xFF;
-        }
-        return -1;
-    }
-
     private enum State {
         NO_BLOCK, IN_LITERAL, LOOKING_FOR_COPY, IN_COPY, EOF
     }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/51d03dda/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
new file mode 100644
index 0000000..ea9b8b2
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.lz77support;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.utils.ByteUtils;
+import org.apache.commons.compress.utils.IOUtils;
+
+/**
+ * Encapsulates code common to LZ77 decompressors.
+ *
+ * <p>Assumes the stream consists of blocks of literal data and
+ * back-references (called copies) in any order. Of course the first
+ * block must be a literal block for the scheme to work.</p>
+ *
+ * @since 1.14
+ */
+public abstract class AbstractLZ77CompressorInputStream extends 
CompressorInputStream {
+
+    /** Size of the window - must be bigger than the biggest offset expected. 
*/
+    private final int windowSize;
+
+    /** Buffer to write decompressed bytes to for back-references */
+    private final byte[] buf;
+
+    /** One behind the index of the last byte in the buffer that was written */
+    private int writeIndex;
+
+    /** Index of the next byte to be read. */
+    private int readIndex;
+
+    /** The underlying stream to read compressed data from */
+    private final InputStream in;
+
+    /** Number of bytes still to be read from the current literal or copy. */
+    private long bytesRemaining;
+
+    /** Offset of the current copy. */
+    private int copyOffset;
+
+    /** uncompressed size */
+    private int size = 0;
+
+    // used in no-arg read method
+    private final byte[] oneByte = new byte[1];
+
+    /**
+     * Supplier that delegates to {@link #readOneByte}.
+     */
+    protected final ByteUtils.ByteSupplier supplier = new 
ByteUtils.ByteSupplier() {
+        @Override
+        public int getAsByte() throws IOException {
+            return readOneByte();
+        }
+    };
+
+    /**
+     * Creates a new LZ77 input stream.
+     *
+     * @param is
+     *            An InputStream to read compressed data from
+     * @param windowSize
+     *            Size of the window kept for back-references, must be bigger 
than the biggest offset expected.
+     *
+     * @throws IOException if reading fails
+     */
+    public AbstractLZ77CompressorInputStream(final InputStream is, int 
windowSize) throws IOException {
+        this.in = is;
+        this.windowSize = windowSize;
+        buf = new byte[3 * windowSize];
+        writeIndex = readIndex = 0;
+        bytesRemaining = 0;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int read() throws IOException {
+        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int available() {
+        return writeIndex - readIndex;
+    }
+
+    /**
+     * Get the uncompressed size of the stream
+     *
+     * @return the uncompressed size
+     */
+    public int getSize() {
+        return size;
+    }
+
+    /**
+     * Used by subclasses to signal the next block contains the given
+     * amount of literal data.
+     * @param length the length of the block
+     */
+    protected final void startLiteral(long length) {
+        bytesRemaining = length;
+    }
+
+    /**
+     * Is there still data remaining inside the current block?
+     * @return true if there is still data remaining inside the current block.
+     */
+    protected final boolean hasMoreDataInBlock() {
+        return bytesRemaining > 0;
+    }
+
+    /**
+     * Reads data from the current literal block.
+     * @param b buffer to write data to
+     * @param off offset to start writing to
+     * @param len maximum amount of data to read
+     * @return number of bytes read, may be 0. Will never return -1 as
+     * EOF-detection is the responsibility of the subclass
+     * @throws IOException if the underlying stream throws or signals
+     * an EOF before the amount of data promised for the block have
+     * been read
+     */
+    protected final int readLiteral(final byte[] b, final int off, final int 
len) throws IOException {
+        final int avail = available();
+        if (len > avail) {
+            tryToReadLiteral(len - avail);
+        }
+        return readFromBuffer(b, off, len);
+    }
+
+    private void tryToReadLiteral(int bytesToRead) throws IOException {
+        final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, 
bytesRemaining),
+                                                   buf.length - writeIndex);
+        final int bytesRead = reallyTryToRead > 0
+            ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead)
+            : 0 /* happens for bytesRemaining == 0 */;
+        count(bytesRead);
+        if (reallyTryToRead != bytesRead) {
+            throw new IOException("Premature end of stream reading literal");
+        }
+        writeIndex += reallyTryToRead;
+        bytesRemaining -= reallyTryToRead;
+    }
+
+    private int readFromBuffer(final byte[] b, final int off, final int len) {
+        final int readable = Math.min(len, available());
+        if (readable > 0) {
+            System.arraycopy(buf, readIndex, b, off, readable);
+            readIndex += readable;
+            if (readIndex > 2 * windowSize) {
+                slideBuffer();
+            }
+        }
+        size += readable;
+        return readable;
+    }
+
+    private void slideBuffer() {
+        System.arraycopy(buf, windowSize, buf, 0, windowSize);
+        writeIndex -= windowSize;
+        readIndex -= windowSize;
+    }
+
+    /**
+     * Used by subclasses to signal the next block contains a back-reference 
with the given coordinates.
+     * @param the offset of the back-reference
+     * @param length the length of the back-reference
+     */
+    protected final void startCopy(int offset, long length) {
+        copyOffset = offset;
+        bytesRemaining = length;
+    }
+
+    /**
+     * Reads data from the current back-reference.
+     * @param b buffer to write data to
+     * @param off offset to start writing to
+     * @param len maximum amount of data to read
+     * @return number of bytes read, may be 0. Will never return -1 as
+     * EOF-detection is the responsibility of the subclass
+     */
+    protected final int readCopy(final byte[] b, final int off, final int len) 
{
+        final int avail = available();
+        if (len > avail) {
+            tryToCopy(len - avail);
+        }
+        return readFromBuffer(b, off, len);
+    }
+
+    private void tryToCopy(int bytesToCopy) {
+        // this will fit into the buffer without sliding and not
+        // require more than is available inside the copy
+        int copy = (int) Math.min(Math.min(bytesToCopy, bytesRemaining),
+                                  buf.length - writeIndex);
+        if (copy == 0) {
+            // NOP
+        } else if (copyOffset == 1) { // pretty common special case
+            final byte last = buf[writeIndex - 1];
+            for (int i = 0; i < copy; i++) {
+                buf[writeIndex++] = last;
+            }
+        } else if (copy < copyOffset) {
+            System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, 
copy);
+            writeIndex += copy;
+        } else {
+            final int fullRots = copy / copyOffset;
+            for (int i = 0; i < fullRots; i++) {
+                System.arraycopy(buf, writeIndex - copyOffset, buf, 
writeIndex, copyOffset);
+                writeIndex += copyOffset;
+            }
+
+            final int pad = copy - (copyOffset * fullRots);
+            if (pad > 0) {
+                System.arraycopy(buf, writeIndex - copyOffset, buf, 
writeIndex, pad);
+                writeIndex += pad;
+            }
+        }
+        bytesRemaining -= copy;
+    }
+
+    /**
+     * Reads a single byte from the real input stream and ensures the data is 
accounted for.
+     *
+     * @return the byte read as value between 0 and 255 or -1 if EOF has been 
reached.
+     * @throws IOException if the underlying stream throws
+     */
+    protected final int readOneByte() throws IOException {
+        final int b = in.read();
+        if (b != -1) {
+            count(1);
+            return b & 0xFF;
+        }
+        return -1;
+    }
+}

Reply via email to