This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new bb4f867c4 GH-3115: Fix int96 read issue in complex type (#3118)
bb4f867c4 is described below
commit bb4f867c4a0893e11a6a9d410c379cdad3058f19
Author: pratyush-sharma-2025 <[email protected]>
AuthorDate: Thu Jan 30 13:26:04 2025 +0100
GH-3115: Fix int96 read issue in complex type (#3118)
---
.../org/apache/parquet/avro/AvroRecordConverter.java | 2 +-
.../org/apache/parquet/avro/AvroSchemaConverter.java | 9 +++++----
.../apache/parquet/avro/TestArrayCompatibility.java | 20 ++++++++++++++++++++
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
index a82d0148c..a98deabf6 100644
---
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
+++
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
@@ -939,7 +939,7 @@ class AvroRecordConverter<T> extends
AvroConverters.AvroGroupConverter {
// 2-level lists and the result is checked to see if it matches the requested
// element type. This should always convert assuming 2-level lists because
// 2-level and 3-level can't be mixed.
- private static final AvroSchemaConverter CONVERTER = new
AvroSchemaConverter(true);
+ private static final AvroSchemaConverter CONVERTER = new
AvroSchemaConverter(true, true);
/**
* Returns whether the given type is the element type of a list or is a
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 033a80d8f..9632fc175 100644
---
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -90,20 +90,21 @@ public class AvroSchemaConverter {
private final Set<String> pathsToInt96;
public AvroSchemaConverter() {
- this(ADD_LIST_ELEMENT_RECORDS_DEFAULT);
+ this(ADD_LIST_ELEMENT_RECORDS_DEFAULT, READ_INT96_AS_FIXED_DEFAULT);
}
/**
* Constructor used by {@link AvroRecordConverter#isElementType}, which
always
- * uses the 2-level list conversion.
+ * uses the 2-level list conversion and reads INT96 as 12 byte array.
*
* @param assumeRepeatedIsListElement whether to assume 2-level lists
+ * @param readInt96AsFixed whether to read Parquet INT96 as 12 byte array.
*/
- AvroSchemaConverter(boolean assumeRepeatedIsListElement) {
+ AvroSchemaConverter(boolean assumeRepeatedIsListElement, boolean
readInt96AsFixed) {
this.assumeRepeatedIsListElement = assumeRepeatedIsListElement;
this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
- this.readInt96AsFixed = READ_INT96_AS_FIXED_DEFAULT;
+ this.readInt96AsFixed = readInt96AsFixed;
this.pathsToInt96 = Collections.emptySet();
}
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
index fd4cf2011..085f4925e 100644
---
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
+++
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
@@ -18,6 +18,7 @@
*/
package org.apache.parquet.avro;
+import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED;
import static org.apache.parquet.avro.AvroTestUtil.array;
import static org.apache.parquet.avro.AvroTestUtil.field;
import static org.apache.parquet.avro.AvroTestUtil.instance;
@@ -1136,6 +1137,25 @@ public class TestArrayCompatibility extends
DirectWriterTest {
avroSchema.getFields().get(0).schema()));
}
+ @Test
+ public void testIsElementTypeInt96Element() {
+ Configuration configuration = new Configuration();
+ configuration.setBoolean(READ_INT96_AS_FIXED, true);
+
+ MessageType parquetSchema = MessageTypeParser.parseMessageType("message
SchemaWithInt96 {\n"
+ + " optional group list (LIST) {\n"
+ + " repeated group list {\n"
+ + " optional int96 a_timestamp;\n"
+ + " }\n"
+ + " }\n"
+ + "}");
+ Schema avroSchema = new
AvroSchemaConverter(configuration).convert(parquetSchema);
+ Assert.assertTrue(AvroRecordConverter.isElementType(
+ parquetSchema.getType("list").asGroupType().getType("list"),
+ AvroSchemaConverter.getNonNull(avroSchema.getFields().get(0).schema())
+ .getElementType()));
+ }
+
@Test
public void testIsElementTypeOptionalRepeatedRecord() {
// Test `_tuple` style naming