Repository: commons-compress
Updated Branches:
  refs/heads/master 89bc17055 -> 1c382914c


COMPRESS-423 - Add ZStandard decompression support using Zstd-JNI


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/7984387a
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/7984387a
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/7984387a

Branch: refs/heads/master
Commit: 7984387af004fcfe1d1ee12e2c8e6b68f23be001
Parents: 89bc170
Author: Andre F de Miranda <trix...@users.noreply.github.com>
Authored: Sat Oct 14 17:57:19 2017 +1100
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Tue Oct 17 20:17:01 2017 +0200

----------------------------------------------------------------------
 pom.xml                                         |   6 +
 .../compressors/CompressorStreamFactory.java    |  25 ++-
 .../zstandard/ZstdCompressorInputStream.java    |  95 +++++++++++
 .../compressors/zstandard/ZstdUtils.java        |  88 +++++++++++
 .../ZstdCompressorInputStreamTest.java          | 157 +++++++++++++++++++
 src/test/resources/bla.tar.zst                  | Bin 0 -> 473 bytes
 src/test/resources/zstandard.testdata           |   3 +
 src/test/resources/zstandard.testdata.zst       | Bin 0 -> 94 bytes
 8 files changed, 372 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 31fc4cd..6a33b38 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,6 +74,12 @@ jar, tar, zip, dump, 7z, arj.
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>com.github.luben</groupId>
+      <artifactId>zstd-jni</artifactId>
+      <version>1.3.1-1</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
       <groupId>org.brotli</groupId>
       <artifactId>dec</artifactId>
       <version>0.1.2</version>

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
 
b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
index 4bd22aa..b446963 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
@@ -55,6 +55,8 @@ import 
org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
 import org.apache.commons.compress.compressors.xz.XZUtils;
 import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
+import 
org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
+import org.apache.commons.compress.compressors.zstandard.ZstdUtils;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.compress.utils.Lists;
 import org.apache.commons.compress.utils.ServiceLoaderIterator;
@@ -191,6 +193,14 @@ public class CompressorStreamFactory implements 
CompressorStreamProvider {
     public static final String LZ4_FRAMED = "lz4-framed";
 
     /**
+     * Constant (value {@value}) used to identify the ZStandard compression
+     * algorithm. Not supported as an output stream type.
+     *
+     * @since 1.15
+     */
+    public static final String ZSTANDARD = "zst";
+
+    /**
      * Constructs a new sorted map from input stream provider names to provider
      * objects.
      *
@@ -279,7 +289,7 @@ public class CompressorStreamFactory implements 
CompressorStreamProvider {
     public static String getBrotli() {
         return BROTLI;
     }
-    
+
     public static String getBzip2() {
         return BZIP2;
     }
@@ -328,6 +338,10 @@ public class CompressorStreamFactory implements 
CompressorStreamProvider {
         return LZ4_BLOCK;
     }
 
+    public static String getZstandard() {
+        return ZSTANDARD;
+    }
+
     static void putAll(final Set<String> names, final CompressorStreamProvider 
provider,
             final TreeMap<String, CompressorStreamProvider> map) {
         for (final String name : names) {
@@ -555,6 +569,13 @@ public class CompressorStreamFactory implements 
CompressorStreamProvider {
                 return new XZCompressorInputStream(in, 
actualDecompressConcatenated, memoryLimitInKb);
             }
 
+            if (ZSTANDARD.equalsIgnoreCase(name)) {
+                if (!ZstdUtils.isZstdCompressionAvailable()) {
+                    throw new CompressorException("XZ compression is not 
available.");
+                }
+                return new ZstdCompressorInputStream(in);
+            }
+
             if (LZMA.equalsIgnoreCase(name)) {
                 if (!LZMAUtils.isLZMACompressionAvailable()) {
                     throw new CompressorException("LZMA compression is not 
available");
@@ -701,7 +722,7 @@ public class CompressorStreamFactory implements 
CompressorStreamProvider {
     @Override
     public Set<String> getInputStreamCompressorNames() {
         return Sets.newHashSet(GZIP, BROTLI, BZIP2, XZ, LZMA, PACK200, 
DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z, LZ4_BLOCK,
-            LZ4_FRAMED);
+            LZ4_FRAMED, ZSTANDARD);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java
new file mode 100644
index 0000000..1e5dd8d
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.compress.compressors.zstandard;
+
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.github.luben.zstd.ZstdInputStream;
+import org.apache.commons.compress.compressors.CompressorInputStream;
+
+/**
+ * {@link CompressorInputStream} implementation to decode Zstandard encoded 
stream.
+ * Library relies on <a href="https://github.com/luben/zstd-jni/";>Zstandard 
JNI</a>
+ *
+ * @since 1.15
+ */
+public class ZstdCompressorInputStream extends CompressorInputStream {
+
+    private final com.github.luben.zstd.ZstdInputStream decIS;
+
+    public ZstdCompressorInputStream(final InputStream in) throws IOException {
+        this.decIS = new ZstdInputStream(in);
+    }
+
+    @Override
+    public int available() throws IOException {
+        return decIS.available();
+    }
+
+    @Override
+    public void close() throws IOException {
+        decIS.close();
+    }
+
+    @Override
+    public int read(final byte[] b) throws IOException {
+        return decIS.read(b);
+    }
+
+    @Override
+    public long skip(final long n) throws IOException {
+        return decIS.skip(n);
+    }
+
+    @Override
+    public void mark(final int readlimit) {
+        decIS.mark(readlimit);
+    }
+
+    @Override
+    public boolean markSupported() {
+        return decIS.markSupported();
+    }
+
+    @Override
+    public int read() throws IOException {
+        final int ret = decIS.read();
+        count(ret == -1 ? 0 : 1);
+        return ret;
+    }
+
+    @Override
+    public int read(final byte[] buf, final int off, final int len) throws 
IOException {
+        final int ret = decIS.read(buf, off, len);
+        count(ret);
+        return ret;
+    }
+
+    @Override
+    public String toString() {
+        return decIS.toString();
+    }
+
+    @Override
+    public void reset() throws IOException {
+        decIS.reset();
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
 
b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
new file mode 100644
index 0000000..0eb8fa1
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.zstandard;
+
+/**
+ * Utility code for the Zstandard compression format.
+ * @ThreadSafe
+ * @since 1.14
+ */
+public class ZstdUtils {
+
+    static enum CachedAvailability {
+        DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE
+    }
+
+    private static volatile CachedAvailability cachedZstdAvailability;
+
+    static {
+        cachedZstdAvailability = CachedAvailability.DONT_CACHE;
+        try {
+            Class.forName("org.osgi.framework.BundleEvent");
+        } catch (final Exception ex) { // NOSONAR
+            setCacheZstdAvailablity(true);
+        }
+    }
+
+    /** Private constructor to prevent instantiation of this utility class. */
+    private ZstdUtils() {
+    }
+
+    /**
+     * Are the classes required to support Zstandard compression available?
+     * @return true if the classes required to support Zstandard compression 
are available
+     */
+    public static boolean isZstdCompressionAvailable() {
+        final CachedAvailability cachedResult = cachedZstdAvailability;
+        if (cachedResult != CachedAvailability.DONT_CACHE) {
+            return cachedResult == CachedAvailability.CACHED_AVAILABLE;
+        }
+        return internalIsZstdCompressionAvailable();
+    }
+
+    private static boolean internalIsZstdCompressionAvailable() {
+        try {
+            Class.forName("com.github.luben.zstd.ZstdInputStream");
+            return true;
+        } catch (NoClassDefFoundError | Exception error) {
+            return false;
+        }
+    }
+
+    /**
+     * Whether to cache the result of the Zstandard for Java check.
+     *
+     * <p>This defaults to {@code false} in an OSGi environment and {@code 
true} otherwise.</p>
+     * @param doCache whether to cache the result
+     */
+    public static void setCacheZstdAvailablity(final boolean doCache) {
+        if (!doCache) {
+            cachedZstdAvailability = CachedAvailability.DONT_CACHE;
+        } else if (cachedZstdAvailability == CachedAvailability.DONT_CACHE) {
+            final boolean hasZstd = internalIsZstdCompressionAvailable();
+            cachedZstdAvailability = hasZstd ? 
CachedAvailability.CACHED_AVAILABLE
+                : CachedAvailability.CACHED_UNAVAILABLE;
+        }
+    }
+
+    // only exists to support unit tests
+    static CachedAvailability getCachedZstdAvailability() {
+        return cachedZstdAvailability;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java
 
b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java
new file mode 100644
index 0000000..5ed276c
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.compress.compressors.zstandard;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.compress.AbstractTestCase;
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import 
org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ZstdCompressorInputStreamTest extends AbstractTestCase {
+
+    /**
+     * Test bridge works fine
+     * @throws {@link IOException}
+     */
+    @Test
+    public void testZstdDecode() throws IOException {
+        final File input = getFile("zstandard.testdata.zst");
+        final File expected = getFile("zstandard.testdata");
+        try (InputStream inputStream = new FileInputStream(input);
+            InputStream expectedStream = new FileInputStream(expected);
+            ZstdCompressorInputStream zstdInputStream = new 
ZstdCompressorInputStream(inputStream)) {
+            final byte[] b = new byte[97];
+            IOUtils.readFully(expectedStream, b);
+            final ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            int readByte = -1;
+            while((readByte = zstdInputStream.read()) != -1) {
+                bos.write(readByte);
+            }
+            Assert.assertArrayEquals(b, bos.toByteArray());
+        }
+    }
+
+    @Test
+    public void testCachingIsEnabledByDefaultAndZstdUtilsPresent() {
+        assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, 
ZstdUtils.getCachedZstdAvailability());
+        assertTrue(ZstdUtils.isZstdCompressionAvailable());
+    }
+
+    @Test
+    public void testCanTurnOffCaching() {
+        try {
+            ZstdUtils.setCacheZstdAvailablity(false);
+            assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, 
ZstdUtils.getCachedZstdAvailability());
+            assertTrue(ZstdUtils.isZstdCompressionAvailable());
+        } finally {
+            ZstdUtils.setCacheZstdAvailablity(true);
+        }
+    }
+
+    @Test
+    public void testTurningOnCachingReEvaluatesAvailability() {
+        try {
+            ZstdUtils.setCacheZstdAvailablity(false);
+            assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, 
ZstdUtils.getCachedZstdAvailability());
+            ZstdUtils.setCacheZstdAvailablity(true);
+            assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, 
ZstdUtils.getCachedZstdAvailability());
+        } finally {
+            ZstdUtils.setCacheZstdAvailablity(true);
+        }
+    }
+
+    @Test
+    public void shouldBeAbleToSkipAByte() throws IOException {
+        final File input = getFile("zstandard.testdata.zst");
+        try (InputStream is = new FileInputStream(input)) {
+            final ZstdCompressorInputStream in =
+                    new ZstdCompressorInputStream(is);
+            Assert.assertEquals(1, in.skip(1));
+            in.close();
+        }
+    }
+
+    @Test
+    public void singleByteReadWorksAsExpected() throws IOException {
+
+        final File input = getFile("zstandard.testdata.zst");
+
+        final File original = getFile("zstandard.testdata");
+        final long originalFileLength = original.length();
+
+        byte[] originalFileContent = new byte[((int) originalFileLength)];
+
+        try (InputStream ois = new FileInputStream(original)) {
+            ois.read(originalFileContent);
+        }
+
+        try (InputStream is = new FileInputStream(input)) {
+            final ZstdCompressorInputStream in =
+                    new ZstdCompressorInputStream(is);
+
+            Assert.assertEquals(originalFileContent[0], in.read());
+            in.close();
+        }
+    }
+
+    @Test
+    public void singleByteReadReturnsMinusOneAtEof() throws IOException {
+        final File input = getFile("zstandard.testdata.zst");
+        try (InputStream is = new FileInputStream(input)) {
+            final ZstdCompressorInputStream in =
+                    new ZstdCompressorInputStream(is);
+            IOUtils.toByteArray(in);
+            Assert.assertEquals(-1, in.read());
+            in.close();
+        }
+    }
+
+    @Test
+    public void testZstandardUnarchive() throws Exception {
+        final File input = getFile("bla.tar.zst");
+        final File output = new File(dir, "bla.tar");
+        try (InputStream is = new FileInputStream(input)) {
+            final CompressorInputStream in = new CompressorStreamFactory()
+                    .createCompressorInputStream("zst", is);
+            FileOutputStream out = null;
+            try {
+                out = new FileOutputStream(output);
+                IOUtils.copy(in, out);
+            } finally {
+                if (out != null) {
+                    out.close();
+                }
+                in.close();
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/bla.tar.zst
----------------------------------------------------------------------
diff --git a/src/test/resources/bla.tar.zst b/src/test/resources/bla.tar.zst
new file mode 100644
index 0000000..d5fd6e0
Binary files /dev/null and b/src/test/resources/bla.tar.zst differ

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/zstandard.testdata
----------------------------------------------------------------------
diff --git a/src/test/resources/zstandard.testdata 
b/src/test/resources/zstandard.testdata
new file mode 100644
index 0000000..e51bfd4
--- /dev/null
+++ b/src/test/resources/zstandard.testdata
@@ -0,0 +1,3 @@
+And as usual, instead of ipsum lorem we shall just state very clearly:
+
+Test test test chocolate

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/zstandard.testdata.zst
----------------------------------------------------------------------
diff --git a/src/test/resources/zstandard.testdata.zst 
b/src/test/resources/zstandard.testdata.zst
new file mode 100644
index 0000000..86c03fe
Binary files /dev/null and b/src/test/resources/zstandard.testdata.zst differ

Reply via email to