Repository: commons-compress Updated Branches: refs/heads/master 89bc17055 -> 1c382914c
COMPRESS-423 - Add ZStandard decompression support using Zstd-JNI Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/7984387a Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/7984387a Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/7984387a Branch: refs/heads/master Commit: 7984387af004fcfe1d1ee12e2c8e6b68f23be001 Parents: 89bc170 Author: Andre F de Miranda <trix...@users.noreply.github.com> Authored: Sat Oct 14 17:57:19 2017 +1100 Committer: Stefan Bodewig <bode...@apache.org> Committed: Tue Oct 17 20:17:01 2017 +0200 ---------------------------------------------------------------------- pom.xml | 6 + .../compressors/CompressorStreamFactory.java | 25 ++- .../zstandard/ZstdCompressorInputStream.java | 95 +++++++++++ .../compressors/zstandard/ZstdUtils.java | 88 +++++++++++ .../ZstdCompressorInputStreamTest.java | 157 +++++++++++++++++++ src/test/resources/bla.tar.zst | Bin 0 -> 473 bytes src/test/resources/zstandard.testdata | 3 + src/test/resources/zstandard.testdata.zst | Bin 0 -> 94 bytes 8 files changed, 372 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 31fc4cd..6a33b38 100644 --- a/pom.xml +++ b/pom.xml @@ -74,6 +74,12 @@ jar, tar, zip, dump, 7z, arj. <scope>test</scope> </dependency> <dependency> + <groupId>com.github.luben</groupId> + <artifactId>zstd-jni</artifactId> + <version>1.3.1-1</version> + <optional>true</optional> + </dependency> + <dependency> <groupId>org.brotli</groupId> <artifactId>dec</artifactId> <version>0.1.2</version> http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java index 4bd22aa..b446963 100644 --- a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java @@ -55,6 +55,8 @@ import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; import org.apache.commons.compress.compressors.xz.XZUtils; import org.apache.commons.compress.compressors.z.ZCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdUtils; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.Lists; import org.apache.commons.compress.utils.ServiceLoaderIterator; @@ -191,6 +193,14 @@ public class CompressorStreamFactory implements CompressorStreamProvider { public static final String LZ4_FRAMED = "lz4-framed"; /** + * Constant (value {@value}) used to identify the ZStandard compression + * algorithm. Not supported as an output stream type. + * + * @since 1.15 + */ + public static final String ZSTANDARD = "zst"; + + /** * Constructs a new sorted map from input stream provider names to provider * objects. * @@ -279,7 +289,7 @@ public class CompressorStreamFactory implements CompressorStreamProvider { public static String getBrotli() { return BROTLI; } - + public static String getBzip2() { return BZIP2; } @@ -328,6 +338,10 @@ public class CompressorStreamFactory implements CompressorStreamProvider { return LZ4_BLOCK; } + public static String getZstandard() { + return ZSTANDARD; + } + static void putAll(final Set<String> names, final CompressorStreamProvider provider, final TreeMap<String, CompressorStreamProvider> map) { for (final String name : names) { @@ -555,6 +569,13 @@ public class CompressorStreamFactory implements CompressorStreamProvider { return new XZCompressorInputStream(in, actualDecompressConcatenated, memoryLimitInKb); } + if (ZSTANDARD.equalsIgnoreCase(name)) { + if (!ZstdUtils.isZstdCompressionAvailable()) { + throw new CompressorException("XZ compression is not available."); + } + return new ZstdCompressorInputStream(in); + } + if (LZMA.equalsIgnoreCase(name)) { if (!LZMAUtils.isLZMACompressionAvailable()) { throw new CompressorException("LZMA compression is not available"); @@ -701,7 +722,7 @@ public class CompressorStreamFactory implements CompressorStreamProvider { @Override public Set<String> getInputStreamCompressorNames() { return Sets.newHashSet(GZIP, BROTLI, BZIP2, XZ, LZMA, PACK200, DEFLATE, SNAPPY_RAW, SNAPPY_FRAMED, Z, LZ4_BLOCK, - LZ4_FRAMED); + LZ4_FRAMED, ZSTANDARD); } @Override http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java new file mode 100644 index 0000000..1e5dd8d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStream.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + + +import java.io.IOException; +import java.io.InputStream; + +import com.github.luben.zstd.ZstdInputStream; +import org.apache.commons.compress.compressors.CompressorInputStream; + +/** + * {@link CompressorInputStream} implementation to decode Zstandard encoded stream. + * Library relies on <a href="https://github.com/luben/zstd-jni/">Zstandard JNI</a> + * + * @since 1.15 + */ +public class ZstdCompressorInputStream extends CompressorInputStream { + + private final com.github.luben.zstd.ZstdInputStream decIS; + + public ZstdCompressorInputStream(final InputStream in) throws IOException { + this.decIS = new ZstdInputStream(in); + } + + @Override + public int available() throws IOException { + return decIS.available(); + } + + @Override + public void close() throws IOException { + decIS.close(); + } + + @Override + public int read(final byte[] b) throws IOException { + return decIS.read(b); + } + + @Override + public long skip(final long n) throws IOException { + return decIS.skip(n); + } + + @Override + public void mark(final int readlimit) { + decIS.mark(readlimit); + } + + @Override + public boolean markSupported() { + return decIS.markSupported(); + } + + @Override + public int read() throws IOException { + final int ret = decIS.read(); + count(ret == -1 ? 0 : 1); + return ret; + } + + @Override + public int read(final byte[] buf, final int off, final int len) throws IOException { + final int ret = decIS.read(buf, off, len); + count(ret); + return ret; + } + + @Override + public String toString() { + return decIS.toString(); + } + + @Override + public void reset() throws IOException { + decIS.reset(); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java new file mode 100644 index 0000000..0eb8fa1 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.zstandard; + +/** + * Utility code for the Zstandard compression format. + * @ThreadSafe + * @since 1.14 + */ +public class ZstdUtils { + + static enum CachedAvailability { + DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE + } + + private static volatile CachedAvailability cachedZstdAvailability; + + static { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + try { + Class.forName("org.osgi.framework.BundleEvent"); + } catch (final Exception ex) { // NOSONAR + setCacheZstdAvailablity(true); + } + } + + /** Private constructor to prevent instantiation of this utility class. */ + private ZstdUtils() { + } + + /** + * Are the classes required to support Zstandard compression available? + * @return true if the classes required to support Zstandard compression are available + */ + public static boolean isZstdCompressionAvailable() { + final CachedAvailability cachedResult = cachedZstdAvailability; + if (cachedResult != CachedAvailability.DONT_CACHE) { + return cachedResult == CachedAvailability.CACHED_AVAILABLE; + } + return internalIsZstdCompressionAvailable(); + } + + private static boolean internalIsZstdCompressionAvailable() { + try { + Class.forName("com.github.luben.zstd.ZstdInputStream"); + return true; + } catch (NoClassDefFoundError | Exception error) { + return false; + } + } + + /** + * Whether to cache the result of the Zstandard for Java check. + * + * <p>This defaults to {@code false} in an OSGi environment and {@code true} otherwise.</p> + * @param doCache whether to cache the result + */ + public static void setCacheZstdAvailablity(final boolean doCache) { + if (!doCache) { + cachedZstdAvailability = CachedAvailability.DONT_CACHE; + } else if (cachedZstdAvailability == CachedAvailability.DONT_CACHE) { + final boolean hasZstd = internalIsZstdCompressionAvailable(); + cachedZstdAvailability = hasZstd ? CachedAvailability.CACHED_AVAILABLE + : CachedAvailability.CACHED_UNAVAILABLE; + } + } + + // only exists to support unit tests + static CachedAvailability getCachedZstdAvailability() { + return cachedZstdAvailability; + } +} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java new file mode 100644 index 0000000..5ed276c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorInputStreamTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class ZstdCompressorInputStreamTest extends AbstractTestCase { + + /** + * Test bridge works fine + * @throws {@link IOException} + */ + @Test + public void testZstdDecode() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + final File expected = getFile("zstandard.testdata"); + try (InputStream inputStream = new FileInputStream(input); + InputStream expectedStream = new FileInputStream(expected); + ZstdCompressorInputStream zstdInputStream = new ZstdCompressorInputStream(inputStream)) { + final byte[] b = new byte[97]; + IOUtils.readFully(expectedStream, b); + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int readByte = -1; + while((readByte = zstdInputStream.read()) != -1) { + bos.write(readByte); + } + Assert.assertArrayEquals(b, bos.toByteArray()); + } + } + + @Test + public void testCachingIsEnabledByDefaultAndZstdUtilsPresent() { + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } + + @Test + public void testCanTurnOffCaching() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + assertTrue(ZstdUtils.isZstdCompressionAvailable()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void testTurningOnCachingReEvaluatesAvailability() { + try { + ZstdUtils.setCacheZstdAvailablity(false); + assertEquals(ZstdUtils.CachedAvailability.DONT_CACHE, ZstdUtils.getCachedZstdAvailability()); + ZstdUtils.setCacheZstdAvailablity(true); + assertEquals(ZstdUtils.CachedAvailability.CACHED_AVAILABLE, ZstdUtils.getCachedZstdAvailability()); + } finally { + ZstdUtils.setCacheZstdAvailablity(true); + } + } + + @Test + public void shouldBeAbleToSkipAByte() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + Assert.assertEquals(1, in.skip(1)); + in.close(); + } + } + + @Test + public void singleByteReadWorksAsExpected() throws IOException { + + final File input = getFile("zstandard.testdata.zst"); + + final File original = getFile("zstandard.testdata"); + final long originalFileLength = original.length(); + + byte[] originalFileContent = new byte[((int) originalFileLength)]; + + try (InputStream ois = new FileInputStream(original)) { + ois.read(originalFileContent); + } + + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + + Assert.assertEquals(originalFileContent[0], in.read()); + in.close(); + } + } + + @Test + public void singleByteReadReturnsMinusOneAtEof() throws IOException { + final File input = getFile("zstandard.testdata.zst"); + try (InputStream is = new FileInputStream(input)) { + final ZstdCompressorInputStream in = + new ZstdCompressorInputStream(is); + IOUtils.toByteArray(in); + Assert.assertEquals(-1, in.read()); + in.close(); + } + } + + @Test + public void testZstandardUnarchive() throws Exception { + final File input = getFile("bla.tar.zst"); + final File output = new File(dir, "bla.tar"); + try (InputStream is = new FileInputStream(input)) { + final CompressorInputStream in = new CompressorStreamFactory() + .createCompressorInputStream("zst", is); + FileOutputStream out = null; + try { + out = new FileOutputStream(output); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } + } + +} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/bla.tar.zst ---------------------------------------------------------------------- diff --git a/src/test/resources/bla.tar.zst b/src/test/resources/bla.tar.zst new file mode 100644 index 0000000..d5fd6e0 Binary files /dev/null and b/src/test/resources/bla.tar.zst differ http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/zstandard.testdata ---------------------------------------------------------------------- diff --git a/src/test/resources/zstandard.testdata b/src/test/resources/zstandard.testdata new file mode 100644 index 0000000..e51bfd4 --- /dev/null +++ b/src/test/resources/zstandard.testdata @@ -0,0 +1,3 @@ +And as usual, instead of ipsum lorem we shall just state very clearly: + +Test test test chocolate http://git-wip-us.apache.org/repos/asf/commons-compress/blob/7984387a/src/test/resources/zstandard.testdata.zst ---------------------------------------------------------------------- diff --git a/src/test/resources/zstandard.testdata.zst b/src/test/resources/zstandard.testdata.zst new file mode 100644 index 0000000..86c03fe Binary files /dev/null and b/src/test/resources/zstandard.testdata.zst differ