COMPRESS-271 add structure to keep track of blocks to write
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/0d0dfb14 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/0d0dfb14 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/0d0dfb14 Branch: refs/heads/master Commit: 0d0dfb142f721abb473a0324ceede467e195d368 Parents: ea4d697 Author: Stefan Bodewig <[email protected]> Authored: Wed Jan 18 21:29:17 2017 +0100 Committer: Stefan Bodewig <[email protected]> Committed: Wed Jan 18 21:29:17 2017 +0100 ---------------------------------------------------------------------- .../lz4/BlockLZ4CompressorOutputStream.java | 77 +++++++- .../compressors/lz77support/LZ77Compressor.java | 4 +- .../lz4/BlockLZ4CompressorOutputStreamTest.java | 181 +++++++++++++++++++ 3 files changed, 256 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java index 415e1f5..d9c47d4 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java @@ -20,6 +20,9 @@ package org.apache.commons.compress.compressors.lz4; import java.io.IOException; import java.io.OutputStream; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.commons.compress.compressors.lz77support.LZ77Compressor; @@ -34,6 +37,10 @@ import org.apache.commons.compress.utils.ByteUtils; */ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { + private static final int MIN_BACK_REFERENCE_LENGTH = 4; + private static final int MIN_LENGTH_OF_LAST_LITERAL = 5; + private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12; + /* The LZ4 block format has a few properties that make it less @@ -69,7 +76,6 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { private final LZ77Compressor compressor; private final OutputStream os; - private final ByteUtils.ByteConsumer consumer; // used in one-arg write method private final byte[] oneByte = new byte[1]; @@ -86,10 +92,9 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { */ public BlockLZ4CompressorOutputStream(final OutputStream os) throws IOException { this.os = os; - consumer = new ByteUtils.OutputStreamByteConsumer(os); int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1; - compressor = new LZ77Compressor(new Parameters(BlockLZ4CompressorInputStream.WINDOW_SIZE, 4, maxLen, maxLen, - maxLen), + compressor = new LZ77Compressor(new Parameters(BlockLZ4CompressorInputStream.WINDOW_SIZE, + MIN_BACK_REFERENCE_LENGTH, maxLen, maxLen, maxLen), new LZ77Compressor.Callback() { public void accept(LZ77Compressor.Block block) throws IOException { //System.err.println(block); @@ -141,4 +146,68 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { private void writeFinalLiteralBlock() throws IOException { } + + final static class Pair { + private final List<byte[]> literals = new LinkedList<>(); + private int brOffset, brLength; + + void addLiteral(LZ77Compressor.LiteralBlock block) { + literals.add(Arrays.copyOfRange(block.getData(), block.getOffset(), + block.getOffset() + block.getLength())); + } + void setBackReference(LZ77Compressor.BackReference block) { + if (hasBackReference()) { + throw new IllegalStateException(); + } + brOffset = block.getOffset(); + brLength = block.getLength(); + } + boolean hasBackReference() { + return brOffset > 0; + } + boolean canBeWritten(int lengthOfBlocksAfterThisPair) { + return hasBackReference() + && lengthOfBlocksAfterThisPair >= MIN_LENGTH_OF_LAST_LITERAL + && lengthOfBlocksAfterThisPair + brOffset + brLength >= MIN_OFFSET_OF_LAST_BACK_REFERENCE; + } + int length() { + return literalLength() + brLength; + } + void writeTo(OutputStream out) throws IOException { + int litLength = literalLength(); + out.write(lengths(litLength, brLength)); + if (litLength >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) { + writeLength(litLength - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out); + } + for (byte[] b : literals) { + out.write(b); + } + if (hasBackReference()) { + ByteUtils.toLittleEndian(out, brOffset, 2); + if (brLength - MIN_BACK_REFERENCE_LENGTH >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) { + writeLength(brLength - MIN_BACK_REFERENCE_LENGTH + - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out); + } + } + } + private int literalLength() { + int length = 0; + for (byte[] b : literals) { + length += b.length; + } + return length; + } + private static int lengths(int litLength, int brLength) { + int l = litLength < 15 ? litLength : 15; + int br = brLength < 4 ? 0 : (brLength < 19 ? brLength - 4 : 15); + return (l << BlockLZ4CompressorInputStream.SIZE_BITS) | br; + } + private static void writeLength(int length, OutputStream out) throws IOException { + while (length >= 255) { + out.write(255); + length -= 255; + } + out.write(length); + } + } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java index 25bcde0..6755dd1 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java @@ -98,7 +98,7 @@ public class LZ77Compressor { public static final class LiteralBlock extends Block { private final byte[] data; private final int offset, length; - /* package private for tests */ LiteralBlock(byte[] data, int offset, int length) { + public LiteralBlock(byte[] data, int offset, int length) { this.data = data; this.offset = offset; this.length = length; @@ -138,7 +138,7 @@ public class LZ77Compressor { */ public static final class BackReference extends Block { private final int offset, length; - private BackReference(int offset, int length) { + public BackReference(int offset, int length) { this.offset = offset; this.length = length; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java new file mode 100644 index 0000000..3a39a8e --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.lz77support.LZ77Compressor; +import org.junit.Assert; +import org.junit.Test; + +public class BlockLZ4CompressorOutputStreamTest { + + @Test + public void pairSeesbackReferenceWhenSet() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + Assert.assertFalse(p.hasBackReference()); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + Assert.assertTrue(p.hasBackReference()); + } + + @Test + public void canWriteBackReferenceFollowedByLongLiteral() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + Assert.assertTrue(p.canBeWritten(11)); + } + + @Test + public void canWriteBackReferenceFollowedByShortLiteralIfOffsetIsBigEnough() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(10, 4)); + Assert.assertTrue(p.canBeWritten(5)); + } + + @Test + public void canWriteBackReferenceFollowedByShortLiteralIfLengthIsBigEnough() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 10)); + Assert.assertTrue(p.canBeWritten(5)); + } + + @Test + public void cantWriteBackReferenceFollowedByLiteralThatIsTooShort() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(10, 14)); + Assert.assertFalse(p.canBeWritten(4)); + } + + @Test + public void cantWriteBackReferenceIfAccumulatedOffsetIsTooShort() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + Assert.assertFalse(p.canBeWritten(5)); + } + + @Test + public void pairAccumulatesLengths() { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 2, 5)); + Assert.assertEquals(13, p.length()); + } + + @Test + public void canWritePairWithoutLiterals() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 4)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 0, 1, 0 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor19ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 19)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, 0 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor273ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 273)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 254 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor274ByteLengthBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + p.setBackReference(new LZ77Compressor.BackReference(1, 274)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 255, 0 }, bos.toByteArray()); + } + + @Test + public void canWritePairWithoutBackReference() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { 4<<4, 2, 3, 4, 5 }, bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor15ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9)); + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 6)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6 }, + bos.toByteArray()); + } + + @Test + public void writesCorrectSizeFor269ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + for (int i = 0; i < 26; i++) { + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10)); + } + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 254, 1 }, + Arrays.copyOfRange(bos.toByteArray(), 0, 3)); + } + + @Test + public void writesCorrectSizeFor270ByteLengthLiteral() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + for (int i = 0; i < 27; i++) { + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10)); + } + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 255, 0, 1 }, + Arrays.copyOfRange(bos.toByteArray(), 0, 4)); + } + + @Test + public void writesCompletePair() throws IOException { + BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair(); + byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4)); + b[2] = 19; + p.setBackReference(new LZ77Compressor.BackReference(1, 5)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + p.writeTo(bos); + Assert.assertArrayEquals(new byte[] { (4<<4) + 1, 2, 3, 4, 5, 1, 0 }, + bos.toByteArray()); + } +}
