This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new bb4f867c4 GH-3115: Fix int96 read issue in complex type (#3118)
bb4f867c4 is described below

commit bb4f867c4a0893e11a6a9d410c379cdad3058f19
Author: pratyush-sharma-2025 <[email protected]>
AuthorDate: Thu Jan 30 13:26:04 2025 +0100

    GH-3115: Fix int96 read issue in complex type (#3118)
---
 .../org/apache/parquet/avro/AvroRecordConverter.java |  2 +-
 .../org/apache/parquet/avro/AvroSchemaConverter.java |  9 +++++----
 .../apache/parquet/avro/TestArrayCompatibility.java  | 20 ++++++++++++++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git 
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java 
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
index a82d0148c..a98deabf6 100644
--- 
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
+++ 
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
@@ -939,7 +939,7 @@ class AvroRecordConverter<T> extends 
AvroConverters.AvroGroupConverter {
   // 2-level lists and the result is checked to see if it matches the requested
   // element type. This should always convert assuming 2-level lists because
   // 2-level and 3-level can't be mixed.
-  private static final AvroSchemaConverter CONVERTER = new 
AvroSchemaConverter(true);
+  private static final AvroSchemaConverter CONVERTER = new 
AvroSchemaConverter(true, true);
 
   /**
    * Returns whether the given type is the element type of a list or is a
diff --git 
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java 
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 033a80d8f..9632fc175 100644
--- 
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++ 
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -90,20 +90,21 @@ public class AvroSchemaConverter {
   private final Set<String> pathsToInt96;
 
   public AvroSchemaConverter() {
-    this(ADD_LIST_ELEMENT_RECORDS_DEFAULT);
+    this(ADD_LIST_ELEMENT_RECORDS_DEFAULT, READ_INT96_AS_FIXED_DEFAULT);
   }
 
   /**
    * Constructor used by {@link AvroRecordConverter#isElementType}, which 
always
-   * uses the 2-level list conversion.
+   * uses the 2-level list conversion and reads INT96 as 12 byte array.
    *
    * @param assumeRepeatedIsListElement whether to assume 2-level lists
+   * @param readInt96AsFixed whether to read Parquet INT96 as 12 byte array.
    */
-  AvroSchemaConverter(boolean assumeRepeatedIsListElement) {
+  AvroSchemaConverter(boolean assumeRepeatedIsListElement, boolean 
readInt96AsFixed) {
     this.assumeRepeatedIsListElement = assumeRepeatedIsListElement;
     this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
     this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
-    this.readInt96AsFixed = READ_INT96_AS_FIXED_DEFAULT;
+    this.readInt96AsFixed = readInt96AsFixed;
     this.pathsToInt96 = Collections.emptySet();
   }
 
diff --git 
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
 
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
index fd4cf2011..085f4925e 100644
--- 
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
+++ 
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestArrayCompatibility.java
@@ -18,6 +18,7 @@
  */
 package org.apache.parquet.avro;
 
+import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED;
 import static org.apache.parquet.avro.AvroTestUtil.array;
 import static org.apache.parquet.avro.AvroTestUtil.field;
 import static org.apache.parquet.avro.AvroTestUtil.instance;
@@ -1136,6 +1137,25 @@ public class TestArrayCompatibility extends 
DirectWriterTest {
         avroSchema.getFields().get(0).schema()));
   }
 
+  @Test
+  public void testIsElementTypeInt96Element() {
+    Configuration configuration = new Configuration();
+    configuration.setBoolean(READ_INT96_AS_FIXED, true);
+
+    MessageType parquetSchema = MessageTypeParser.parseMessageType("message 
SchemaWithInt96 {\n"
+        + "  optional group list (LIST) {\n"
+        + "    repeated group list {\n"
+        + "      optional int96 a_timestamp;\n"
+        + "    }\n"
+        + "  }\n"
+        + "}");
+    Schema avroSchema = new 
AvroSchemaConverter(configuration).convert(parquetSchema);
+    Assert.assertTrue(AvroRecordConverter.isElementType(
+        parquetSchema.getType("list").asGroupType().getType("list"),
+        AvroSchemaConverter.getNonNull(avroSchema.getFields().get(0).schema())
+            .getElementType()));
+  }
+
   @Test
   public void testIsElementTypeOptionalRepeatedRecord() {
     // Test `_tuple` style naming

Reply via email to