This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2235fa9590f [SPARK-45827][SQL] Use StructNullableTypeConverter for
Variant
2235fa9590f is described below
commit 2235fa9590f2bb955c2824a5559746636a8b904d
Author: cashmand <[email protected]>
AuthorDate: Tue Nov 21 09:21:31 2023 +0800
[SPARK-45827][SQL] Use StructNullableTypeConverter for Variant
### What changes were proposed in this pull request?
Add a small fix for #43707. Since Variant is represented in columnar form
as a struct, it must use `StructNullableTypeConverter` so that nulls are set
properly in child column vectors.
### Why are the changes needed?
Fixes a potential when setting nulls in Variant columns.
### Does this PR introduce _any_ user-facing change?
No, Variant is not released yet.
### How was this patch tested?
Updated existing unit test to test Variant. It fails without the fix.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43911 from cashmand/SPARK-45827-fixnulls.
Authored-by: cashmand <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/execution/Columnar.scala | 2 +-
.../execution/vectorized/ColumnarBatchSuite.scala | 20 +++++++++++++++++---
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 7c117e0cace..111851094a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -279,7 +279,7 @@ private object RowToColumnConverter {
if (nullable) {
dataType match {
- case CalendarIntervalType => new StructNullableTypeConverter(core)
+ case CalendarIntervalType | VariantType => new
StructNullableTypeConverter(core)
case st: StructType => new StructNullableTypeConverter(core)
case _ => new BasicNullableTypeConverter(core)
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 933447354fd..97ad2c1f5bf 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.util.ArrowUtils
import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch,
ColumnarBatchRow, ColumnVector}
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
import org.apache.spark.util.ArrayImplicits._
@ExtendedSQLTest
@@ -1650,6 +1650,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
StructField("int_to_int", MapType(IntegerType, IntegerType)) ::
StructField("binary", BinaryType) ::
StructField("ts_ntz", TimestampNTZType) ::
+ StructField("variant", VariantType) ::
Nil)
var mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
mapBuilder.put(1, 10)
@@ -1664,6 +1665,9 @@ class ColumnarBatchSuite extends SparkFunSuite {
val tsNTZ2 =
DateTimeUtils.localDateTimeToMicros(LocalDateTime.parse(tsString2.replace(" ",
"T")))
+ val variantVal1 = new VariantVal(Array[Byte](1, 2, 3), Array[Byte](4, 5))
+ val variantVal2 = new VariantVal(Array[Byte](6), Array[Byte](7, 8))
+
val row1 = new GenericInternalRow(Array[Any](
UTF8String.fromString("a string"),
true,
@@ -1681,7 +1685,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
new GenericInternalRow(Array[Any](5.asInstanceOf[Any], 10)),
mapBuilder.build(),
"Spark SQL".getBytes(),
- tsNTZ1
+ tsNTZ1,
+ variantVal1
))
mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
@@ -1704,7 +1709,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
new GenericInternalRow(Array[Any](20.asInstanceOf[Any], null)),
mapBuilder.build(),
"Parquet".getBytes(),
- tsNTZ2
+ tsNTZ2,
+ variantVal2
))
val row3 = new GenericInternalRow(Array[Any](
@@ -1724,6 +1730,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
null,
null,
null,
+ null,
null
))
@@ -1852,6 +1859,13 @@ class ColumnarBatchSuite extends SparkFunSuite {
assert(columns(16).getLong(0) == tsNTZ1)
assert(columns(16).getLong(1) == tsNTZ2)
assert(columns(16).isNullAt(2))
+
+ assert(columns(17).dataType() == VariantType)
+ assert(columns(17).getVariant(0).debugString() ==
variantVal1.debugString())
+ assert(columns(17).getVariant(1).debugString() ==
variantVal2.debugString())
+ assert(columns(17).isNullAt(2))
+ assert(columns(17).getChild(0).isNullAt(2))
+ assert(columns(17).getChild(1).isNullAt(2))
} finally {
batch.close()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]