This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 36ef0b7 AVRO-2944: Handle unexpected EOF reading magic bytes in
DataFileReader (#1080)
36ef0b7 is described below
commit 36ef0b71c6c624bef8525704229a7791b51788d5
Author: Andrew Olson <[email protected]>
AuthorDate: Wed Feb 3 10:43:12 2021 -0600
AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader
(#1080)
* AVRO-2944: Handle unexpected EOF reading magic bytes in DataFileReader
* AVRO-2944: Fix unit test code formatting
Co-authored-by: Andrew Olson <[email protected]>
---
.../java/org/apache/avro/file/DataFileReader.java | 11 +++-
.../java/org/apache/avro/TestDataFileReader.java | 70 ++++++++++++++++++++++
2 files changed, 80 insertions(+), 1 deletion(-)
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
index f027852..7a23535 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.avro.file;
+import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.File;
@@ -58,7 +59,15 @@ public class DataFileReader<D> extends DataFileStream<D>
implements FileReader<D
// read magic header
byte[] magic = new byte[MAGIC.length];
in.seek(0);
- for (int c = 0; c < magic.length; c += in.read(magic, c, magic.length -
c)) {
+ int offset = 0;
+ int length = magic.length;
+ while (length > 0) {
+ int bytesRead = in.read(magic, offset, length);
+ if (bytesRead < 0)
+ throw new EOFException("Unexpected EOF with " + length + " bytes
remaining to read");
+
+ length -= bytesRead;
+ offset += bytesRead;
}
in.seek(0);
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
index 155ce6b..8393179 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java
@@ -20,8 +20,10 @@ package org.apache.avro;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
+import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileWriter;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
@@ -139,6 +141,55 @@ public class TestDataFileReader {
};
}
+ @Test(expected = EOFException.class)
+ // another regression test for bug AVRO-2944, testing EOF case
+ public void testInputStreamEOF() throws IOException {
+ // AVRO-2944 describes hanging/failure in reading Avro file with performing
+ // magic header check. This potentially happens with a defective input
stream
+ // where a -1 value is unexpectedly returned from a read.
+ Schema legacySchema = new
Schema.Parser().setValidate(false).setValidateDefaults(false)
+ .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": "
+ + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"],
\"default\": null}]}");
+ File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile();
+ try (DataFileWriter<?> w = new DataFileWriter<>(new
GenericDatumWriter<>())) {
+ w.create(legacySchema, f);
+ w.flush();
+ }
+
+ // Should throw an EOFException
+ DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>());
+ }
+
+ private SeekableInput eofInputStream(File f) throws IOException {
+ SeekableFileInput input = new SeekableFileInput(f);
+ return new SeekableInput() {
+ @Override
+ public void close() throws IOException {
+ input.close();
+ }
+
+ @Override
+ public void seek(long p) throws IOException {
+ input.seek(p);
+ }
+
+ @Override
+ public long tell() throws IOException {
+ return input.tell();
+ }
+
+ @Override
+ public long length() throws IOException {
+ return input.length();
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return -1;
+ }
+ };
+ }
+
@Test
public void testIgnoreSchemaValidationOnRead() throws IOException {
// This schema has an accent in the name and the default for the field
doesn't
@@ -161,4 +212,23 @@ public class TestDataFileReader {
}
}
+ @Test(expected = InvalidAvroMagicException.class)
+ public void testInvalidMagicLength() throws IOException {
+ File f = Files.createTempFile("testInvalidMagicLength", ".avro").toFile();
+ try (FileWriter w = new FileWriter(f)) {
+ w.write("-");
+ }
+
+ DataFileReader.openReader(new SeekableFileInput(f), new
GenericDatumReader<>());
+ }
+
+ @Test(expected = InvalidAvroMagicException.class)
+ public void testInvalidMagicBytes() throws IOException {
+ File f = Files.createTempFile("testInvalidMagicBytes", ".avro").toFile();
+ try (FileWriter w = new FileWriter(f)) {
+ w.write("invalid");
+ }
+
+ DataFileReader.openReader(new SeekableFileInput(f), new
GenericDatumReader<>());
+ }
}