COMPRESS-271 add structure to keep track of blocks to write

Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/0d0dfb14
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/0d0dfb14
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/0d0dfb14

Branch: refs/heads/master
Commit: 0d0dfb142f721abb473a0324ceede467e195d368
Parents: ea4d697
Author: Stefan Bodewig <[email protected]>
Authored: Wed Jan 18 21:29:17 2017 +0100
Committer: Stefan Bodewig <[email protected]>
Committed: Wed Jan 18 21:29:17 2017 +0100

----------------------------------------------------------------------
 .../lz4/BlockLZ4CompressorOutputStream.java     |  77 +++++++-
 .../compressors/lz77support/LZ77Compressor.java |   4 +-
 .../lz4/BlockLZ4CompressorOutputStreamTest.java | 181 +++++++++++++++++++
 3 files changed, 256 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
index 415e1f5..d9c47d4 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java
@@ -20,6 +20,9 @@ package org.apache.commons.compress.compressors.lz4;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
 
 import org.apache.commons.compress.compressors.CompressorOutputStream;
 import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
@@ -34,6 +37,10 @@ import org.apache.commons.compress.utils.ByteUtils;
  */
 public class BlockLZ4CompressorOutputStream extends CompressorOutputStream {
 
+    private static final int MIN_BACK_REFERENCE_LENGTH = 4;
+    private static final int MIN_LENGTH_OF_LAST_LITERAL = 5;
+    private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12;
+
     /*
 
       The LZ4 block format has a few properties that make it less
@@ -69,7 +76,6 @@ public class BlockLZ4CompressorOutputStream extends 
CompressorOutputStream {
 
     private final LZ77Compressor compressor;
     private final OutputStream os;
-    private final ByteUtils.ByteConsumer consumer;
 
     // used in one-arg write method
     private final byte[] oneByte = new byte[1];
@@ -86,10 +92,9 @@ public class BlockLZ4CompressorOutputStream extends 
CompressorOutputStream {
      */
     public BlockLZ4CompressorOutputStream(final OutputStream os) throws 
IOException {
         this.os = os;
-        consumer = new ByteUtils.OutputStreamByteConsumer(os);
         int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1;
-        compressor = new LZ77Compressor(new 
Parameters(BlockLZ4CompressorInputStream.WINDOW_SIZE, 4, maxLen, maxLen,
-            maxLen),
+        compressor = new LZ77Compressor(new 
Parameters(BlockLZ4CompressorInputStream.WINDOW_SIZE,
+            MIN_BACK_REFERENCE_LENGTH, maxLen, maxLen, maxLen),
             new LZ77Compressor.Callback() {
                 public void accept(LZ77Compressor.Block block) throws 
IOException {
                     //System.err.println(block);
@@ -141,4 +146,68 @@ public class BlockLZ4CompressorOutputStream extends 
CompressorOutputStream {
 
     private void writeFinalLiteralBlock() throws IOException {
     }
+
+    final static class Pair {
+        private final List<byte[]> literals = new LinkedList<>();
+        private int brOffset, brLength;
+
+        void addLiteral(LZ77Compressor.LiteralBlock block) {
+            literals.add(Arrays.copyOfRange(block.getData(), block.getOffset(),
+                block.getOffset() + block.getLength()));
+        }
+        void setBackReference(LZ77Compressor.BackReference block) {
+            if (hasBackReference()) {
+                throw new IllegalStateException();
+            }
+            brOffset = block.getOffset();
+            brLength = block.getLength();
+        }
+        boolean hasBackReference() {
+            return brOffset > 0;
+        }
+        boolean canBeWritten(int lengthOfBlocksAfterThisPair) {
+            return hasBackReference()
+                && lengthOfBlocksAfterThisPair >= MIN_LENGTH_OF_LAST_LITERAL
+                && lengthOfBlocksAfterThisPair + brOffset + brLength >= 
MIN_OFFSET_OF_LAST_BACK_REFERENCE;
+        }
+        int length() {
+            return literalLength() + brLength;
+        }
+        void writeTo(OutputStream out) throws IOException {
+            int litLength = literalLength();
+            out.write(lengths(litLength, brLength));
+            if (litLength >= 
BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
+                writeLength(litLength - 
BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
+            }
+            for (byte[] b : literals) {
+                out.write(b);
+            }
+            if (hasBackReference()) {
+                ByteUtils.toLittleEndian(out, brOffset, 2);
+                if (brLength - MIN_BACK_REFERENCE_LENGTH >= 
BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
+                    writeLength(brLength - MIN_BACK_REFERENCE_LENGTH
+                        - 
BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
+                }
+            }
+        }
+        private int literalLength() {
+            int length = 0;
+            for (byte[] b : literals) {
+                length += b.length;
+            }
+            return length;
+        }
+        private static int lengths(int litLength, int brLength) {
+            int l = litLength < 15 ? litLength : 15;
+            int br = brLength < 4 ? 0 : (brLength < 19 ? brLength - 4 : 15);
+            return (l << BlockLZ4CompressorInputStream.SIZE_BITS) | br;
+        }
+        private static void writeLength(int length, OutputStream out) throws 
IOException {
+            while (length >= 255) {
+                out.write(255);
+                length -= 255;
+            }
+            out.write(length);
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
index 25bcde0..6755dd1 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
@@ -98,7 +98,7 @@ public class LZ77Compressor {
     public static final class LiteralBlock extends Block {
         private final byte[] data;
         private final int offset, length;
-        /* package private for tests */ LiteralBlock(byte[] data, int offset, 
int length) {
+        public LiteralBlock(byte[] data, int offset, int length) {
             this.data = data;
             this.offset = offset;
             this.length = length;
@@ -138,7 +138,7 @@ public class LZ77Compressor {
      */
     public static final class BackReference extends Block {
         private final int offset, length;
-        private BackReference(int offset, int length) {
+        public BackReference(int offset, int length) {
             this.offset = offset;
             this.length = length;
         }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0d0dfb14/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java
 
b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java
new file mode 100644
index 0000000..3a39a8e
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStreamTest.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.lz4;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class BlockLZ4CompressorOutputStreamTest {
+
+    @Test
+    public void pairSeesbackReferenceWhenSet() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        Assert.assertFalse(p.hasBackReference());
+        p.setBackReference(new LZ77Compressor.BackReference(1, 4));
+        Assert.assertTrue(p.hasBackReference());
+    }
+
+    @Test
+    public void canWriteBackReferenceFollowedByLongLiteral() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 4));
+        Assert.assertTrue(p.canBeWritten(11));
+    }
+
+    @Test
+    public void 
canWriteBackReferenceFollowedByShortLiteralIfOffsetIsBigEnough() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(10, 4));
+        Assert.assertTrue(p.canBeWritten(5));
+    }
+
+    @Test
+    public void 
canWriteBackReferenceFollowedByShortLiteralIfLengthIsBigEnough() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 10));
+        Assert.assertTrue(p.canBeWritten(5));
+    }
+
+    @Test
+    public void cantWriteBackReferenceFollowedByLiteralThatIsTooShort() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(10, 14));
+        Assert.assertFalse(p.canBeWritten(4));
+    }
+
+    @Test
+    public void cantWriteBackReferenceIfAccumulatedOffsetIsTooShort() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 4));
+        Assert.assertFalse(p.canBeWritten(5));
+    }
+
+    @Test
+    public void pairAccumulatesLengths() {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 4));
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 2, 5));
+        Assert.assertEquals(13, p.length());
+    }
+
+    @Test
+    public void canWritePairWithoutLiterals() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 4));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { 0, 1, 0 }, bos.toByteArray());
+    }
+
+    @Test
+    public void writesCorrectSizeFor19ByteLengthBackReference() throws 
IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 19));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { 15, 1, 0, 0 }, 
bos.toByteArray());
+    }
+
+    @Test
+    public void writesCorrectSizeFor273ByteLengthBackReference() throws 
IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 273));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 254 }, 
bos.toByteArray());
+    }
+
+    @Test
+    public void writesCorrectSizeFor274ByteLengthBackReference() throws 
IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        p.setBackReference(new LZ77Compressor.BackReference(1, 274));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { 15, 1, 0, (byte) 255, 0 }, 
bos.toByteArray());
+    }
+
+    @Test
+    public void canWritePairWithoutBackReference() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { 4<<4, 2, 3, 4, 5 }, 
bos.toByteArray());
+    }
+
+    @Test
+    public void writesCorrectSizeFor15ByteLengthLiteral() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 6));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { (byte) (15<<4), 0, 1, 2, 3, 4, 
5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6 },
+            bos.toByteArray());
+    }
+
+    @Test
+    public void writesCorrectSizeFor269ByteLengthLiteral() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+        for (int i = 0; i < 26; i++) {
+            p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
+        }
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 254, 1 },
+            Arrays.copyOfRange(bos.toByteArray(), 0, 3));
+    }
+
+    @Test
+    public void writesCorrectSizeFor270ByteLengthLiteral() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+        for (int i = 0; i < 27; i++) {
+            p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
+        }
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { (byte) (15<<4), (byte) 255, 0, 1 
},
+            Arrays.copyOfRange(bos.toByteArray(), 0, 4));
+    }
+
+    @Test
+    public void writesCompletePair() throws IOException {
+        BlockLZ4CompressorOutputStream.Pair p = new 
BlockLZ4CompressorOutputStream.Pair();
+        byte[] b = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
+        b[2] = 19;
+        p.setBackReference(new LZ77Compressor.BackReference(1, 5));
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        p.writeTo(bos);
+        Assert.assertArrayEquals(new byte[] { (4<<4) + 1, 2, 3, 4, 5, 1, 0 },
+            bos.toByteArray());
+    }
+}

Reply via email to