This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 700d645e9539 [SPARK-52666][SQL] Map User Defined Type to correct
MutableValue in SpecificInternalRow
700d645e9539 is described below
commit 700d645e953983e0b38c96779803cb6d13dad561
Author: Kent Yao <[email protected]>
AuthorDate: Thu Jul 3 16:24:38 2025 -0700
[SPARK-52666][SQL] Map User Defined Type to correct MutableValue in
SpecificInternalRow
### What changes were proposed in this pull request?
Map User Defined Type to correct MutableValue in SpecificInternalRow to Fix:
```java
Caused by: java.lang.IllegalArgumentException: Spark type: ... doesn't
match the type: ... in column vector
at
org.apache.spark.sql.execution.datasources.parquet.ParquetColumnVector.<init>(ParquetColumnVector.java:80)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetColumnVector.<init>(ParquetColumnVector.java:139)
```
### Why are the changes needed?
Add UDT missing features
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- New tests in UT
- Manual test
```
spark-sql (default)> select submissionTime from bbb;
25/07/03 09:55:57 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 4)
(10.242.151.176 executor 0): java.lang.ClassCastException: class
org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to class
org.apache.spark.sql.catalyst.expressions.MutableLong
(org.apache.spark.sql.catalyst.expressions.MutableAny and
org.apache.spark.sql.catalyst.expressions.MutableLong are in unnamed module of
loader 'app')
at
org.apache.spark.sql.catalyst.expressions.SpecificInternalRow.setLong(SpecificInternalRow.scala:304)
at
org.apache.spark.sql.execution.datasources.orc.OrcDeserializer$RowUpdater.setLong(OrcDeserializer.scala:282)
spark-sql (default)> select submissionTime from bbb;
Thu Jul 03 11:18:39 CST 2025
```
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #51356 from yaooqinn/SPARK-52666.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/expressions/SpecificInternalRow.scala | 4 ++++
.../test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala | 9 ++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
index 971cfcae8e47..1f755df0516f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.expressions
+import scala.annotation.tailrec
+
import org.apache.spark.sql.types._
/**
@@ -192,6 +194,7 @@ final class MutableAny extends MutableValue {
*/
final class SpecificInternalRow(val values: Array[MutableValue]) extends
BaseGenericInternalRow {
+ @tailrec
private[this] def dataTypeToMutableValue(dataType: DataType): MutableValue =
dataType match {
// We use INT for DATE and YearMonthIntervalType internally
case IntegerType | DateType | _: YearMonthIntervalType => new MutableInt
@@ -203,6 +206,7 @@ final class SpecificInternalRow(val values:
Array[MutableValue]) extends BaseGen
case BooleanType => new MutableBoolean
case ByteType => new MutableByte
case ShortType => new MutableShort
+ case udt: UserDefinedType[_] => dataTypeToMutableValue(udt.sqlType)
case _ => new MutableAny
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 6d8264d3ad56..546696e32544 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -22,7 +22,7 @@ import java.util.Arrays
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.CatalystTypeConverters
-import org.apache.spark.sql.catalyst.expressions.{Cast,
CodegenObjectFactoryMode, ExpressionEvalHelper, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Cast,
CodegenObjectFactoryMode, ExpressionEvalHelper, Literal, SpecificInternalRow}
import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
@@ -312,4 +312,11 @@ class UserDefinedTypeSuite extends QueryTest with
SharedSparkSession with Parque
}
}
}
+
+ test("SPARK-52666: Map UDT to correct MutableValue in SpecificInternalRow") {
+ val udt = new YearUDT()
+ val row = new SpecificInternalRow(Seq(udt))
+ row.setInt(0, udt.serialize(Year.of(2018)))
+ assert(row.getInt(0) == 2018)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]