[
https://issues.apache.org/jira/browse/COMPRESS-146?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13146180#comment-13146180
]
Stefan Bodewig commented on COMPRESS-146:
-----------------------------------------
yes, we probably want all three formats to be consistent here.
I'm not sure what the danger of changing the default really would be, I
vaguelly recall people complaining about GzipInputStream after JDK7 added
support for concatenated streams (I may be totally wrong on this, though).
> BZip2CompressorInputStream always treats 0x177245385090 as EOF, but should
> treat this as EOS
> --------------------------------------------------------------------------------------------
>
> Key: COMPRESS-146
> URL: https://issues.apache.org/jira/browse/COMPRESS-146
> Project: Commons Compress
> Issue Type: Bug
> Components: Compressors
> Environment: all
> Reporter: Dmitriy Smirnov
> Priority: Critical
> Labels: 0x177245385090
> Fix For: 1.4
>
> Attachments: bzip2-concatenated.patch
>
> Original Estimate: 4h
> Remaining Estimate: 4h
>
> BZip2CompressorInputStream always treats 0x177245385090 as EOF, but should
> treat this as EOS
> This error occurs mostly on large size files as sudden EOF somwere in the
> middle of the file.
> An example of data from archived file:
> $ cat fastq.ax.bz2 | od -t x1 | grep -A 1 '17 72 45'
> 22711660 d0 ff b6 01 20 10 ff ff 17 72 45 38 50 90 2e ff
> 22711700 b2 d3 42 5a 68 39 31 41 59 26 53 59 84 3c 41 75
> --
> 24637020 c5 49 ff 19 80 49 20 7f ff 17 72 45 38 50 90 a4
> 24637040 a8 ac bd 42 5a 68 39 31 41 59 26 53 59 0d 9a b4
> --
> 40302720 ff b1 24 80 10 ff ff 17 72 45 38 50 90 24 cb c5
> 40302740 90 42 5a 68 39 31 41 59 26 53 59 42 05 ae 5e 05
> .....
> Suggested solution:
> private void initBlock() throws IOException {
> char magic0 = bsGetUByte();
> char magic1 = bsGetUByte();
> char magic2 = bsGetUByte();
> char magic3 = bsGetUByte();
> char magic4 = bsGetUByte();
> char magic5 = bsGetUByte();
> if( magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
> && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90 )
>
> {
> if( complete() ) // end of file);
> {
> return;
> } else
> {
> magic0 = bsGetUByte();
> magic1 = bsGetUByte();
> magic2 = bsGetUByte();
> magic3 = bsGetUByte();
> magic4 = bsGetUByte();
> magic5 = bsGetUByte();
> }
> }
> if (magic0 != 0x31 || // '1'
> magic1 != 0x41 || // 'A'
> magic2 != 0x59 || // 'Y'
> magic3 != 0x26 || // '&'
> magic4 != 0x53 || // 'S'
> magic5 != 0x59 // 'Y'
> ) {
> this.currentState = EOF;
> throw new IOException("bad block header");
> } else {
> this.storedBlockCRC = bsGetInt();
> this.blockRandomised = bsR(1) == 1;
> /**
> * Allocate data here instead in constructor, so we do not
> allocate
> * it if the input file is empty.
> */
> if (this.data == null) {
> this.data = new Data(this.blockSize100k);
> }
> // currBlockNo++;
> getAndMoveToFrontDecode();
> this.crc.initialiseCRC();
> this.currentState = START_BLOCK_STATE;
> }
> }
> private boolean
> complete() throws IOException
> {
> boolean result = false;
> this.storedCombinedCRC = bsGetInt();
> try
> {
> if (in.available() == 0 )
> {
> throw new IOException( "EOF" );
> }
> checkMagicChar('B', "first");
> checkMagicChar('Z', "second");
> checkMagicChar('h', "third");
> int blockSize = this.in.read();
> if ((blockSize < '1') || (blockSize > '9')) {
> throw new IOException("Stream is not BZip2 formatted: illegal
> "
> + "blocksize " + (char) blockSize);
> }
> this.blockSize100k = blockSize - '0';
> this.bsLive = 0;
> this.bsBuff = 0;
> } catch( IOException e )
> {
> this.currentState = EOF;
>
> result = true;
> }
>
> this.data = null;
> if (this.storedCombinedCRC != this.computedCombinedCRC) {
> throw new IOException("BZip2 CRC error");
> }
> this.computedCombinedCRC = 0;
> return result;
> }
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira