Richard Blank created COMPRESS-686:
--------------------------------------

             Summary: Fix compression into BZip2 format
                 Key: COMPRESS-686
                 URL: https://issues.apache.org/jira/browse/COMPRESS-686
             Project: Commons Compress
          Issue Type: Bug
          Components: Compressors
    Affects Versions: 1.27.0
            Reporter: Richard Blank


h3. Problem

When a file gets compressed into BZip2 format using class 
*BZip2CompressorOutputStream* then the format of the file is not valid.

This can be proven trying to decompress the compressed file as it will fail 
with an error. For example using the _bzip2_ tool raises "bzip2: Compressed 
file ends unexpectedly;" while this _library_ returns error message "IO Stream 
is not in the BZip2 format".

For larger files one can see the last part (block?) of the file doesn't get 
written to the compressed output. Small files remain empty.

h3. Reproduction

The error can be reproduced by running the following Java test on version 
1.27.0. This test does not fail against older versions (e.g. 1.26.2).

{noformat}
java.io.IOException: Stream is not in the BZip2 format

        at 
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.init(BZip2CompressorInputStream.java:545)
        at 
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.<init>(BZip2CompressorInputStream.java:282)
        at 
org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream.<init>(BZip2CompressorInputStream.java:266)
        at 
net.ripe.rirstats.service.ArchiveServiceTest.decompressBzip2File(ArchiveServiceTest.java:56)
        at 
net.ripe.rirstats.service.ArchiveServiceTest.shouldCompressAndDecompressCorrectly(ArchiveServiceTest.java:30)
{noformat}

{noformat}
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import 
org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.junit.jupiter.api.Test;

import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;

import static org.junit.jupiter.api.Assertions.assertEquals;

class ArchiveServiceTest {

    @Test
    void shouldCompressAndDecompressCorrectly() throws Exception {
        Path tmpDir = Files.createTempDirectory("compressionTest");
        Path file = Path.of(tmpDir.toString(), "test.txt");
        var contents = "random contents";
        Files.writeString(file, contents, StandardOpenOption.CREATE);

        Path compressedFile = compressFile(file);
        Path decompressedFile = decompressBzip2File(compressedFile);

        String actual = Files.readString(decompressedFile);
        assertEquals(contents, actual);
    }

    private static Path compressFile(Path file) throws IOException {
        Path newFile = Paths.get(file.getParent().toString(), 
file.getFileName().toString() + ".bz2");
        try (InputStream in = Files.newInputStream(file);
             BZip2CompressorOutputStream bzOut = new 
BZip2CompressorOutputStream(
                    new BufferedOutputStream(
                            Files.newOutputStream(newFile)))) {

            final byte[] buffer = new byte[4096];
            int n;
            while ((n = in.read(buffer)) != -1) {
                bzOut.write(buffer, 0, n);
            }
        }
        return newFile;
    }

    private static Path decompressBzip2File(Path file) throws IOException {
        Path decompressedFile = Paths.get(file.getParent().toString(), 
file.getFileName().toString() + ".decompressed");

        try (InputStream fileInputStream = new FileInputStream(file.toFile());
             BZip2CompressorInputStream bzIn = new 
BZip2CompressorInputStream(fileInputStream);
             OutputStream fileOutputStream = new 
FileOutputStream(decompressedFile.toFile())) {

            final byte[] buffer = new byte[4096];
            int n;
            while ((n = bzIn.read(buffer)) != -1) {
                fileOutputStream.write(buffer, 0, n);
            }
        }
        return decompressedFile;
    }
}
{noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to