improve docs
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/b1e52489 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/b1e52489 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/b1e52489 Branch: refs/heads/master Commit: b1e524891f908cd667b8c54b4a04b3af2e65969a Parents: cfc68ac Author: Stefan Bodewig <[email protected]> Authored: Wed Feb 8 17:55:08 2017 +0100 Committer: Stefan Bodewig <[email protected]> Committed: Wed Feb 8 17:55:08 2017 +0100 ---------------------------------------------------------------------- .../AbstractLZ77CompressorInputStream.java | 19 ++++++++++++++++--- .../compressors/lz77support/LZ77Compressor.java | 19 ++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/b1e52489/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java index cd9fb7e..284b79f 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/AbstractLZ77CompressorInputStream.java @@ -41,7 +41,7 @@ import org.apache.commons.compress.utils.IOUtils; * implementation delegates to the no-arg version, leading to infinite * mutual recursion and a {@code StackOverflowError} otherwise.</p> * - * <p>The contract for subclasses {@code read} implementation is:</p> + * <p>The contract for subclasses' {@code read} implementation is:</p> * <ul> * * <li>keep track of the current state of the stream. Is it inside a @@ -77,10 +77,17 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS /** Size of the window - must be bigger than the biggest offset expected. */ private final int windowSize; - /** Buffer to write decompressed bytes to for back-references */ + /** + * Buffer to write decompressed bytes to for back-references, will + * be three times windowSize big. + * + * <p>Three times so we can slide the whole buffer a windowSize to + * the left once we've read twice windowSize and still have enough + * data inside of it to satisfy back-references.</p> + */ private final byte[] buf; - /** One behind the index of the last byte in the buffer that was written */ + /** One behind the index of the last byte in the buffer that was written, i.e. the next position to write to */ private int writeIndex; /** Index of the next byte to be read. */ @@ -171,7 +178,9 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS if (writeIndex != 0) { throw new IllegalStateException("the stream has already been read from, can't prefill anymore"); } + // we don't need more data than the big offset could refer to, so cap it int len = Math.min(windowSize, data.length); + // we need the last data as we are dealing with *back*-references System.arraycopy(data, data.length - len, buf, 0, len); writeIndex += len; readIndex += len; @@ -214,6 +223,7 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS } private void tryToReadLiteral(int bytesToRead) throws IOException { + // min of "what is still inside the literal", "what does the user want" and "how muc can fit into the buffer" final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, bytesRemaining), buf.length - writeIndex); final int bytesRead = reallyTryToRead > 0 @@ -287,6 +297,9 @@ public abstract class AbstractLZ77CompressorInputStream extends CompressorInputS System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, copy); writeIndex += copy; } else { + // back-reference overlaps with the bytes created from it + // like go back two bytes and then copy six (by copying + // the last two bytes three time). final int fullRots = copy / backReferenceOffset; for (int i = 0; i < fullRots; i++) { System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, backReferenceOffset); http://git-wip-us.apache.org/repos/asf/commons-compress/blob/b1e52489/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java index 0dcdb2a..63b880a 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java @@ -38,9 +38,9 @@ import java.util.Arrays; * back-references - so it can be re-used. It follows the algorithm * explained in section 4 of RFC 1951 (DEFLATE) and currently doesn't * implement the "lazy match" optimization. The three-byte hash - * function used in this class is the same used by zlib and InfoZIP's - * ZIP implementation of DEFLATE. Strongly inspired by InfoZIP's - * implementation.</p> + * function used in this class is the same as the one used by zlib and + * InfoZIP's ZIP implementation of DEFLATE. The whole class is + * strongly inspired by InfoZIP's implementation.</p> * * <p>LZ77 is used vaguely here (as well as many other places that * talk about it :-), LZSS would likely be closer to the truth but @@ -49,7 +49,7 @@ import java.util.Arrays; * <p>The API consists of a compressor that is fed <code>byte</code>s * and emits {@link Block}s to a registered callback where the blocks * represent either {@link LiteralBlock literal blocks}, {@link - * BackReference back references} or {@link EOD end of data + * BackReference back-references} or {@link EOD end of data * markers}. In order to ensure the callback receives all information, * the {@code #finish} method must be used once all data has been fed * into the compressor.</p> @@ -220,7 +220,9 @@ public class LZ77Compressor { private int blockStart = 0; // position of the current match private int matchStart = NO_MATCH; - // number of insertString calls for the up to three last bytes of the last match + // number of missed insertString calls for the up to three last + // bytes of the last match that can only be performed once more + // data has been read private int missedInserts = 0; /** @@ -269,7 +271,7 @@ public class LZ77Compressor { */ public void compress(byte[] data, int off, int len) throws IOException { final int wSize = params.getWindowSize(); - while (len > wSize) { + while (len > wSize) { // chop into windowSize sized chunks doCompress(data, off, wSize); off += wSize; len -= wSize; @@ -311,8 +313,11 @@ public class LZ77Compressor { if (currentPosition != 0 || lookahead != 0) { throw new IllegalStateException("the compressor has already started to accept data, can't prefill anymore"); } + + // don't need more than windowSize for back-references final int len = Math.min(params.getWindowSize(), data.length); System.arraycopy(data, data.length - len, window, 0, len); + if (len >= NUMBER_OF_BYTES_IN_HASH) { initialize(); final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1; @@ -320,7 +325,7 @@ public class LZ77Compressor { insertString(i); } missedInserts = NUMBER_OF_BYTES_IN_HASH - 1; - } else { + } else { // not enough data to hash anything missedInserts = len; } blockStart = currentPosition = len;
