COMPRESS-352 add support for IWA files Apple has created a Snappy dialect used in iWorks archives.
The test case is copied from Tika. Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/18daf66b Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/18daf66b Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/18daf66b Branch: refs/heads/master Commit: 18daf66b2ccf4c7df9618c9a7067ab56bfa96593 Parents: cfd5387 Author: Stefan Bodewig <bode...@apache.org> Authored: Sun May 22 15:00:14 2016 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Sun May 22 15:01:20 2016 +0200 ---------------------------------------------------------------------- .../FramedSnappyCompressorInputStream.java | 32 ++++++++++-- .../compressors/snappy/FramedSnappyDialect.java | 52 +++++++++++++++++++ .../FramedSnappyCompressorInputStreamTest.java | 27 ++++++++++ src/test/resources/testNumbersNew.numbers | Bin 0 -> 179147 bytes 4 files changed, 107 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java index 2ef2dcc..96ac7f9 100644 --- a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java @@ -58,6 +58,8 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream { /** The underlying stream to read compressed data from */ private final PushbackInputStream in; + /** The dialect to expect */ + private final FramedSnappyDialect dialect; private SnappyCompressorInputStream currentCompressedChunk; @@ -71,14 +73,31 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream { private final PureJavaCrc32C checksum = new PureJavaCrc32C(); /** + * Constructs a new input stream that decompresses + * snappy-framed-compressed data from the specified input stream + * using the {@link FramedSnappyDialect#STANDARD} dialect. + * @param in the InputStream from which to read the compressed data + * @throws IOException if reading fails + */ + public FramedSnappyCompressorInputStream(final InputStream in) throws IOException { + this(in, FramedSnappyDialect.STANDARD); + } + + /** * Constructs a new input stream that decompresses snappy-framed-compressed data * from the specified input stream. * @param in the InputStream from which to read the compressed data + * @param dialect the dialect used by the compressed stream * @throws IOException if reading fails */ - public FramedSnappyCompressorInputStream(final InputStream in) throws IOException { + public FramedSnappyCompressorInputStream(final InputStream in, + final FramedSnappyDialect dialect) + throws IOException { this.in = new PushbackInputStream(in, 1); - readStreamIdentifier(); + this.dialect = dialect; + if (dialect.hasStreamIdentifier()) { + readStreamIdentifier(); + } } /** {@inheritDoc} */ @@ -182,8 +201,13 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream { uncompressedBytesRemaining = readSize() - 4 /* CRC */; expectedChecksum = unmask(readCrc()); } else if (type == COMPRESSED_CHUNK_TYPE) { - final long size = readSize() - 4 /* CRC */; - expectedChecksum = unmask(readCrc()); + boolean expectChecksum = dialect.usesChecksumWithCompressedChunks(); + final long size = readSize() - (expectChecksum ? 4 : 0); + if (expectChecksum) { + expectedChecksum = unmask(readCrc()); + } else { + expectedChecksum = -1; + } currentCompressedChunk = new SnappyCompressorInputStream(new BoundedInputStream(in, size)); // constructor reads uncompressed size http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java new file mode 100644 index 0000000..1f0d2b8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.snappy; + +/** + * Dialects of the framing format that {@link FramedSnappyCompressorInputStream} can deal with. + * @since 1.12 + */ +public enum FramedSnappyDialect { + /** + * The standard as defined by the <a + * href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy + * framing format description</a> + */ + STANDARD(true, true), + /** + * The format used by Apple's iWork Archives (.iwa files). + */ + IWORK_ARCHIVE(false, false); + + private final boolean streamIdentifier, checksumWithCompressedChunks; + + private FramedSnappyDialect(boolean hasStreamIdentifier, + boolean usesChecksumWithCompressedChunks) { + this.streamIdentifier = hasStreamIdentifier; + this.checksumWithCompressedChunks = usesChecksumWithCompressedChunks; + } + + boolean hasStreamIdentifier() { + return streamIdentifier; + } + + boolean usesChecksumWithCompressedChunks() { + return checksumWithCompressedChunks; + } +} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java index 3126c4d..b12ddeb 100644 --- a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java @@ -28,6 +28,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.archivers.zip.ZipFile; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.IOUtils; import org.junit.Test; @@ -170,6 +171,32 @@ public final class FramedSnappyCompressorInputStreamTest testChecksumUnmasking(0xffffc757l); } + @Test + public void readIWAFile() throws Exception { + final ZipFile zip = new ZipFile(getFile("testNumbersNew.numbers")); + try { + InputStream is = zip.getInputStream(zip.getEntry("Index/Document.iwa")); + try { + final FramedSnappyCompressorInputStream in = + new FramedSnappyCompressorInputStream(is, FramedSnappyDialect.IWORK_ARCHIVE); + FileOutputStream out = null; + try { + out = new FileOutputStream(new File(dir, "snappyIWATest.raw")); + IOUtils.copy(in, out); + } finally { + if (out != null) { + out.close(); + } + in.close(); + } + } finally { + is.close(); + } + } finally { + zip.close(); + } + } + private void testChecksumUnmasking(final long x) { assertEquals(Long.toHexString(x), Long.toHexString(FramedSnappyCompressorInputStream http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/resources/testNumbersNew.numbers ---------------------------------------------------------------------- diff --git a/src/test/resources/testNumbersNew.numbers b/src/test/resources/testNumbersNew.numbers new file mode 100644 index 0000000..3f9a013 Binary files /dev/null and b/src/test/resources/testNumbersNew.numbers differ