Repository: commons-compress Updated Branches: refs/heads/master 56e82da90 -> 51d03dda0
COMPRESS-271 extract LZ4-independent code into reusable stream Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/51d03dda Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/51d03dda Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/51d03dda Branch: refs/heads/master Commit: 51d03dda0e0dacbc2f13d0de196a7acff9ce0134 Parents: 56e82da Author: Stefan Bodewig <[email protected]> Authored: Tue Jan 17 20:10:42 2017 +0100 Committer: Stefan Bodewig <[email protected]> Committed: Tue Jan 17 20:10:42 2017 +0100 ---------------------------------------------------------------------- .../lz4/BlockLZ4CompressorInputStream.java | 166 +----------- .../AbstractLZ77CompressorInputStream.java | 261 +++++++++++++++++++ 2 files changed, 270 insertions(+), 157 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/51d03dda/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java index 635dec9..89e74f7 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java @@ -21,9 +21,8 @@ package org.apache.commons.compress.compressors.lz4; import java.io.IOException; import java.io.InputStream; -import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream; import org.apache.commons.compress.utils.ByteUtils; -import org.apache.commons.compress.utils.IOUtils; /** * CompressorInputStream for the LZ4 block format. @@ -31,50 +30,19 @@ import org.apache.commons.compress.utils.IOUtils; * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a> * @since 1.14 */ -public class BlockLZ4CompressorInputStream extends CompressorInputStream { +public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream { private static final int WINDOW_SIZE = 1 << 16; private static final int SIZE_BITS = 4; private static final int COPY_SIZE_MASK = (1 << SIZE_BITS) - 1; private static final int LITERAL_SIZE_MASK = COPY_SIZE_MASK << SIZE_BITS; - /** Buffer to write decompressed bytes to for back-references */ - private final byte[] buf = new byte[3 * WINDOW_SIZE]; - - /** One behind the index of the last byte in the buffer that was written */ - private int writeIndex; - - /** Index of the next byte to be read. */ - private int readIndex; - - /** The underlying stream to read compressed data from */ - private final InputStream in; - - /** Number of bytes still to be read from the current literal or copy. */ - private long bytesRemaining; - /** Copy-size part of the block starting byte. */ private int nextCopySize; - /** Offset of the current copy. */ - private int copyOffset; - /** Current state of the stream */ private State state = State.NO_BLOCK; - /** uncompressed size */ - private int size = 0; - - // used in no-arg read method - private final byte[] oneByte = new byte[1]; - - private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { - @Override - public int getAsByte() throws IOException { - return readOneByte(); - } - }; - /** * Creates a new LZ4 input stream. * @@ -84,27 +52,7 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { * @throws IOException if reading fails */ public BlockLZ4CompressorInputStream(final InputStream is) throws IOException { - this.in = is; - writeIndex = readIndex = 0; - bytesRemaining = 0; - } - - /** {@inheritDoc} */ - @Override - public int read() throws IOException { - return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; - } - - /** {@inheritDoc} */ - @Override - public void close() throws IOException { - in.close(); - } - - /** {@inheritDoc} */ - @Override - public int available() { - return writeIndex - readIndex; + super(is, WINDOW_SIZE); } /** @@ -121,7 +69,7 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { /*FALLTHROUGH*/ case IN_LITERAL: int litLen = readLiteral(b, off, len); - if (bytesRemaining == 0) { + if (!hasMoreDataInBlock()) { state = State.LOOKING_FOR_COPY; } return litLen; @@ -133,7 +81,7 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { /*FALLTHROUGH*/ case IN_COPY: int copyLen = readCopy(b, off, len); - if (bytesRemaining == 0) { + if (!hasMoreDataInBlock()) { state = State.NO_BLOCK; } return copyLen; @@ -142,15 +90,6 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { } } - /** - * Get the uncompressed size of the stream - * - * @return the uncompressed size - */ - public int getSize() { - return size; - } - private void readSizes() throws IOException { int nextBlock = readOneByte(); if (nextBlock == -1) { @@ -161,7 +100,7 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { if (literalSizePart == COPY_SIZE_MASK) { literalSizePart += readSizeBytes(); } - bytesRemaining = literalSizePart; + startLiteral(literalSizePart); state = State.IN_LITERAL; } @@ -178,52 +117,12 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { return accum; } - private int readLiteral(final byte[] b, final int off, final int len) throws IOException { - final int avail = available(); - if (len > avail) { - tryToReadLiteral(len - avail); - } - return readFromBuffer(b, off, len); - } - - private void tryToReadLiteral(int bytesToRead) throws IOException { - final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, bytesRemaining), - buf.length - writeIndex); - final int bytesRead = reallyTryToRead > 0 - ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead) - : 0 /* happens for bytesRemaining == 0 */; - count(bytesRead); - if (reallyTryToRead != bytesRead) { - throw new IOException("Premature end of stream reading literal"); - } - writeIndex += reallyTryToRead; - bytesRemaining -= reallyTryToRead; - } - - private int readFromBuffer(final byte[] b, final int off, final int len) throws IOException { - final int readable = Math.min(len, available()); - if (readable > 0) { - System.arraycopy(buf, readIndex, b, off, readable); - readIndex += readable; - if (readIndex > 2 * WINDOW_SIZE) { - slideBuffer(); - } - } - size += readable; - return readable; - } - - private void slideBuffer() { - System.arraycopy(buf, WINDOW_SIZE, buf, 0, WINDOW_SIZE); - writeIndex -= WINDOW_SIZE; - readIndex -= WINDOW_SIZE; - } - /** * @return false if there is no more copy - this means this is the * last block of the stream. */ private boolean initializeCopy() throws IOException { + int copyOffset = 0; try { copyOffset = (int) ByteUtils.fromLittleEndian(supplier, 2); } catch (IOException ex) { @@ -236,59 +135,12 @@ public class BlockLZ4CompressorInputStream extends CompressorInputStream { if (nextCopySize == COPY_SIZE_MASK) { copySize += readSizeBytes(); } - bytesRemaining = copySize + 4; // minimal match length 4 is encoded as 0 + // minimal match length 4 is encoded as 0 + startCopy(copyOffset, copySize + 4); state = State.IN_COPY; return true; } - private int readCopy(final byte[] b, final int off, final int len) throws IOException { - final int avail = available(); - if (len > avail) { - tryToCopy(len - avail); - } - return readFromBuffer(b, off, len); - } - - private void tryToCopy(int bytesToCopy) throws IOException { - // this will fit into the buffer without sliding and not - // require more than is available inside the copy - int copy = (int) Math.min(Math.min(bytesToCopy, bytesRemaining), - buf.length - writeIndex); - if (copy == 0) { - // NOP - } else if (copyOffset == 1) { // pretty common special case - final byte last = buf[writeIndex - 1]; - for (int i = 0; i < copy; i++) { - buf[writeIndex++] = last; - } - } else if (copy < copyOffset) { - System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copy); - writeIndex += copy; - } else { - final int fullRots = copy / copyOffset; - for (int i = 0; i < fullRots; i++) { - System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copyOffset); - writeIndex += copyOffset; - } - - final int pad = copy - (copyOffset * fullRots); - if (pad > 0) { - System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, pad); - writeIndex += pad; - } - } - bytesRemaining -= copy; - } - - private int readOneByte() throws IOException { - final int b = in.read(); - if (b != -1) { - count(1); - return b & 0xFF; - } - return -1; - } - private enum State { NO_BLOCK, IN_LITERAL, LOOKING_FOR_COPY, IN_COPY, EOF } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/51d03dda/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java new file mode 100644 index 0000000..ea9b8b2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz77support; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.IOUtils; + +/** + * Encapsulates code common to LZ77 decompressors. + * + * <p>Assumes the stream consists of blocks of literal data and + * back-references (called copies) in any order. Of course the first + * block must be a literal block for the scheme to work.</p> + * + * @since 1.14 + */ +public abstract class AbstractLZ77CompressorInputStream extends CompressorInputStream { + + /** Size of the window - must be bigger than the biggest offset expected. */ + private final int windowSize; + + /** Buffer to write decompressed bytes to for back-references */ + private final byte[] buf; + + /** One behind the index of the last byte in the buffer that was written */ + private int writeIndex; + + /** Index of the next byte to be read. */ + private int readIndex; + + /** The underlying stream to read compressed data from */ + private final InputStream in; + + /** Number of bytes still to be read from the current literal or copy. */ + private long bytesRemaining; + + /** Offset of the current copy. */ + private int copyOffset; + + /** uncompressed size */ + private int size = 0; + + // used in no-arg read method + private final byte[] oneByte = new byte[1]; + + /** + * Supplier that delegates to {@link #readOneByte}. + */ + protected final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { + @Override + public int getAsByte() throws IOException { + return readOneByte(); + } + }; + + /** + * Creates a new LZ77 input stream. + * + * @param is + * An InputStream to read compressed data from + * @param windowSize + * Size of the window kept for back-references, must be bigger than the biggest offset expected. + * + * @throws IOException if reading fails + */ + public AbstractLZ77CompressorInputStream(final InputStream is, int windowSize) throws IOException { + this.in = is; + this.windowSize = windowSize; + buf = new byte[3 * windowSize]; + writeIndex = readIndex = 0; + bytesRemaining = 0; + } + + /** {@inheritDoc} */ + @Override + public int read() throws IOException { + return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + in.close(); + } + + /** {@inheritDoc} */ + @Override + public int available() { + return writeIndex - readIndex; + } + + /** + * Get the uncompressed size of the stream + * + * @return the uncompressed size + */ + public int getSize() { + return size; + } + + /** + * Used by subclasses to signal the next block contains the given + * amount of literal data. + * @param length the length of the block + */ + protected final void startLiteral(long length) { + bytesRemaining = length; + } + + /** + * Is there still data remaining inside the current block? + * @return true if there is still data remaining inside the current block. + */ + protected final boolean hasMoreDataInBlock() { + return bytesRemaining > 0; + } + + /** + * Reads data from the current literal block. + * @param b buffer to write data to + * @param off offset to start writing to + * @param len maximum amount of data to read + * @return number of bytes read, may be 0. Will never return -1 as + * EOF-detection is the responsibility of the subclass + * @throws IOException if the underlying stream throws or signals + * an EOF before the amount of data promised for the block have + * been read + */ + protected final int readLiteral(final byte[] b, final int off, final int len) throws IOException { + final int avail = available(); + if (len > avail) { + tryToReadLiteral(len - avail); + } + return readFromBuffer(b, off, len); + } + + private void tryToReadLiteral(int bytesToRead) throws IOException { + final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, bytesRemaining), + buf.length - writeIndex); + final int bytesRead = reallyTryToRead > 0 + ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead) + : 0 /* happens for bytesRemaining == 0 */; + count(bytesRead); + if (reallyTryToRead != bytesRead) { + throw new IOException("Premature end of stream reading literal"); + } + writeIndex += reallyTryToRead; + bytesRemaining -= reallyTryToRead; + } + + private int readFromBuffer(final byte[] b, final int off, final int len) { + final int readable = Math.min(len, available()); + if (readable > 0) { + System.arraycopy(buf, readIndex, b, off, readable); + readIndex += readable; + if (readIndex > 2 * windowSize) { + slideBuffer(); + } + } + size += readable; + return readable; + } + + private void slideBuffer() { + System.arraycopy(buf, windowSize, buf, 0, windowSize); + writeIndex -= windowSize; + readIndex -= windowSize; + } + + /** + * Used by subclasses to signal the next block contains a back-reference with the given coordinates. + * @param the offset of the back-reference + * @param length the length of the back-reference + */ + protected final void startCopy(int offset, long length) { + copyOffset = offset; + bytesRemaining = length; + } + + /** + * Reads data from the current back-reference. + * @param b buffer to write data to + * @param off offset to start writing to + * @param len maximum amount of data to read + * @return number of bytes read, may be 0. Will never return -1 as + * EOF-detection is the responsibility of the subclass + */ + protected final int readCopy(final byte[] b, final int off, final int len) { + final int avail = available(); + if (len > avail) { + tryToCopy(len - avail); + } + return readFromBuffer(b, off, len); + } + + private void tryToCopy(int bytesToCopy) { + // this will fit into the buffer without sliding and not + // require more than is available inside the copy + int copy = (int) Math.min(Math.min(bytesToCopy, bytesRemaining), + buf.length - writeIndex); + if (copy == 0) { + // NOP + } else if (copyOffset == 1) { // pretty common special case + final byte last = buf[writeIndex - 1]; + for (int i = 0; i < copy; i++) { + buf[writeIndex++] = last; + } + } else if (copy < copyOffset) { + System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copy); + writeIndex += copy; + } else { + final int fullRots = copy / copyOffset; + for (int i = 0; i < fullRots; i++) { + System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copyOffset); + writeIndex += copyOffset; + } + + final int pad = copy - (copyOffset * fullRots); + if (pad > 0) { + System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, pad); + writeIndex += pad; + } + } + bytesRemaining -= copy; + } + + /** + * Reads a single byte from the real input stream and ensures the data is accounted for. + * + * @return the byte read as value between 0 and 255 or -1 if EOF has been reached. + * @throws IOException if the underlying stream throws + */ + protected final int readOneByte() throws IOException { + final int b = in.read(); + if (b != -1) { + count(1); + return b & 0xFF; + } + return -1; + } +}
