Richard Blank created COMPRESS-686:
--------------------------------------
Summary: Fix compression into BZip2 format
Key: COMPRESS-686
URL: https://issues.apache.org/jira/browse/COMPRESS-686
Project: Commons Compress
Issue Type: Bug
Components: Compressors
Affects Versions: 1.27.0
Reporter: Richard Blank
h3. Problem
When a file gets compressed into BZip2 format using class
*BZip2CompressorOutputStream* then the format of the file is not valid.
This can be proven trying to decompress the compressed file as it will fail
with an error. For example using the _bzip2_ tool raises "bzip2: Compressed
file ends unexpectedly;" while this _library_ returns error message "IO Stream
is not in the BZip2 format".
For larger files one can see the last part (block?) of the file doesn't get
written to the compressed output. Small files remain empty.
h3. Reproduction
The error can be reproduced by running the following Java test on version
1.27.0. This test does not fail against older versions (e.g. 1.26.2).
{noformat}
java.io.IOException: Stream is not in the BZip2 format
at
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.init(BZip2CompressorInputStream.java:545)
at
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.<init>(BZip2CompressorInputStream.java:282)
at
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.<init>(BZip2CompressorInputStream.java:266)
at
net.ripe.rirstats.service.ArchiveServiceTest.decompressBzip2File(ArchiveServiceTest.java:56)
at
net.ripe.rirstats.service.ArchiveServiceTest.shouldCompressAndDecompressCorrectly(ArchiveServiceTest.java:30)
{noformat}
{noformat}
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import
org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.junit.jupiter.api.Test;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import static org.junit.jupiter.api.Assertions.assertEquals;
class ArchiveServiceTest {
@Test
void shouldCompressAndDecompressCorrectly() throws Exception {
Path tmpDir = Files.createTempDirectory("compressionTest");
Path file = Path.of(tmpDir.toString(), "test.txt");
var contents = "random contents";
Files.writeString(file, contents, StandardOpenOption.CREATE);
Path compressedFile = compressFile(file);
Path decompressedFile = decompressBzip2File(compressedFile);
String actual = Files.readString(decompressedFile);
assertEquals(contents, actual);
}
private static Path compressFile(Path file) throws IOException {
Path newFile = Paths.get(file.getParent().toString(),
file.getFileName().toString() + ".bz2");
try (InputStream in = Files.newInputStream(file);
BZip2CompressorOutputStream bzOut = new
BZip2CompressorOutputStream(
new BufferedOutputStream(
Files.newOutputStream(newFile)))) {
final byte[] buffer = new byte[4096];
int n;
while ((n = in.read(buffer)) != -1) {
bzOut.write(buffer, 0, n);
}
}
return newFile;
}
private static Path decompressBzip2File(Path file) throws IOException {
Path decompressedFile = Paths.get(file.getParent().toString(),
file.getFileName().toString() + ".decompressed");
try (InputStream fileInputStream = new FileInputStream(file.toFile());
BZip2CompressorInputStream bzIn = new
BZip2CompressorInputStream(fileInputStream);
OutputStream fileOutputStream = new
FileOutputStream(decompressedFile.toFile())) {
final byte[] buffer = new byte[4096];
int n;
while ((n = bzIn.read(buffer)) != -1) {
fileOutputStream.write(buffer, 0, n);
}
}
return decompressedFile;
}
}
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)