This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 700d645e9539 [SPARK-52666][SQL] Map User Defined Type to correct 
MutableValue in SpecificInternalRow
700d645e9539 is described below

commit 700d645e953983e0b38c96779803cb6d13dad561
Author: Kent Yao <[email protected]>
AuthorDate: Thu Jul 3 16:24:38 2025 -0700

    [SPARK-52666][SQL] Map User Defined Type to correct MutableValue in 
SpecificInternalRow
    
    ### What changes were proposed in this pull request?
    Map User Defined Type to correct MutableValue in SpecificInternalRow to Fix:
    
    ```java
    
    Caused by: java.lang.IllegalArgumentException: Spark type: ... doesn't 
match the type: ... in column vector
            at 
org.apache.spark.sql.execution.datasources.parquet.ParquetColumnVector.<init>(ParquetColumnVector.java:80)
            at 
org.apache.spark.sql.execution.datasources.parquet.ParquetColumnVector.<init>(ParquetColumnVector.java:139)
    
    ```
    
    ### Why are the changes needed?
    Add UDT missing features
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    
    - New tests in UT
    - Manual test
    
    ```
    
    spark-sql (default)> select submissionTime from bbb;
    25/07/03 09:55:57 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 4) 
(10.242.151.176 executor 0): java.lang.ClassCastException: class 
org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to class 
org.apache.spark.sql.catalyst.expressions.MutableLong 
(org.apache.spark.sql.catalyst.expressions.MutableAny and 
org.apache.spark.sql.catalyst.expressions.MutableLong are in unnamed module of 
loader 'app')
            at 
org.apache.spark.sql.catalyst.expressions.SpecificInternalRow.setLong(SpecificInternalRow.scala:304)
            at 
org.apache.spark.sql.execution.datasources.orc.OrcDeserializer$RowUpdater.setLong(OrcDeserializer.scala:282)
    spark-sql (default)> select submissionTime from bbb;
    Thu Jul 03 11:18:39 CST 2025
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #51356 from yaooqinn/SPARK-52666.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/catalyst/expressions/SpecificInternalRow.scala     | 4 ++++
 .../test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala   | 9 ++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
index 971cfcae8e47..1f755df0516f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.annotation.tailrec
+
 import org.apache.spark.sql.types._
 
 /**
@@ -192,6 +194,7 @@ final class MutableAny extends MutableValue {
  */
 final class SpecificInternalRow(val values: Array[MutableValue]) extends 
BaseGenericInternalRow {
 
+  @tailrec
   private[this] def dataTypeToMutableValue(dataType: DataType): MutableValue = 
dataType match {
     // We use INT for DATE and YearMonthIntervalType internally
     case IntegerType | DateType | _: YearMonthIntervalType => new MutableInt
@@ -203,6 +206,7 @@ final class SpecificInternalRow(val values: 
Array[MutableValue]) extends BaseGen
     case BooleanType => new MutableBoolean
     case ByteType => new MutableByte
     case ShortType => new MutableShort
+    case udt: UserDefinedType[_] => dataTypeToMutableValue(udt.sqlType)
     case _ => new MutableAny
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 6d8264d3ad56..546696e32544 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -22,7 +22,7 @@ import java.util.Arrays
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
-import org.apache.spark.sql.catalyst.expressions.{Cast, 
CodegenObjectFactoryMode, ExpressionEvalHelper, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Cast, 
CodegenObjectFactoryMode, ExpressionEvalHelper, Literal, SpecificInternalRow}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -312,4 +312,11 @@ class UserDefinedTypeSuite extends QueryTest with 
SharedSparkSession with Parque
       }
     }
   }
+
+  test("SPARK-52666: Map UDT to correct MutableValue in SpecificInternalRow") {
+    val udt = new YearUDT()
+    val row = new SpecificInternalRow(Seq(udt))
+    row.setInt(0, udt.serialize(Year.of(2018)))
+    assert(row.getInt(0) == 2018)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to