Hello - Me and a co-worker actually got burned by this code twice in 1 week - once via Lucene 2.2 ( http://svn.apache.org/repos/asf/lucene/java/tags/lucene_2_2_0/src/java/org/apache/lucene/index/FieldsReader.java) and again via a copy of the same source (moved to https://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/CompressionTools.java) we were using elsewhere. Due to an edge case misuse of the java.util.zip.Inflater API, an empty byte array sends {...@link org.apache.lucene.document.CompressionTools#decompress} into a tight, infinite loop. I realize this is degenerate case, but it happened to us 2 totally different ways -- a more robust behavior like returning the input empty array, or throwing an exception would be preferable. The following source demonstrates the issue and a couple trivial solution alternatives.
I was scanning bugs -- this should solve https://issues.apache.org/jira/browse/LUCENE-772 Kind regards, - Luke import java.util.zip.Inflater; import java.util.zip.Deflater; import java.util.zip.DataFormatException; import java.io.ByteArrayOutputStream; /** Demonstrates bad empty input edge case behavior of {...@link org.apache.lucene.document.CompressionTools#decompress} */ class Infiniflater { /** Compresses the specified byte range using the * specified compressionLevel (constants are defined in * java.util.zip.Deflater). */ public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) { /* Create an expandable byte array to hold the compressed data. * You cannot use an array that's the same size as the orginal because * there is no guarantee that the compressed data will be smaller than * the uncompressed data. */ ByteArrayOutputStream bos = new ByteArrayOutputStream(length); Deflater compressor = new Deflater(); try { compressor.setLevel(compressionLevel); compressor.setInput(value, offset, length); compressor.finish(); // Compress the data final byte[] buf = new byte[1024]; while (!compressor.finished()) { int count = compressor.deflate(buf); bos.write(buf, 0, count); } } finally { compressor.end(); } return bos.toByteArray(); } /** Decompress the byte array previously returned by * compress */ public static byte[] decompress(byte[] value) throws DataFormatException { // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length); Inflater decompressor = new Inflater(); try { decompressor.setInput(value); // Decompress the data final byte[] buf = new byte[1024]; //*** original form with potential for inifinite loop below //while (!decompressor.finished()) { // bos.write(buf, 0, count); //} //*** simple refined guard condition //while (!decompressor.finished() && ! decompressor.needsInput()) { // bos.write(buf, 0, count); //} //*** slightly more efficient (less synchronization (within decompressor.needsInput()) for instance) while (!decompressor.finished()) { int count = decompressor.inflate(buf); if (count == 0 && decompressor.needsInput()) { break; } bos.write(buf, 0, count); } } finally { decompressor.end(); } return bos.toByteArray(); } public static void main(String[] args) throws Exception { // simplest demonstration of case which will cause infinite loop on empty byte array input final Inflater decompressor = new Inflater(); final byte[] noBytes = new byte[0]; decompressor.setInput(noBytes); final boolean notFinished = false == decompressor.finished(); System.err.println("notFinished?: "+notFinished); assert notFinished; // exercise methods to roundtrip empty array // since this "compresses" to 8 bytes, maybe we should just toss DataFormatException or IOException // if arg to decompress has length < 8 - would be better than current infinite loop final byte[] compressed = compress(noBytes, 0, noBytes.length, Deflater.BEST_COMPRESSION); assert compressed.length == 8 : "compressed.length: "+compressed.length; final byte[] decompressed = decompress(compressed); assert decompressed.length == 0 : "decompressed.length: "+decompressed.length; // spin condition final byte[] output = decompress(noBytes); assert output.length == 0; } }
