(spark) branch master updated: [SPARK-45827][SQL] Use StructNullableTypeConverter for Variant

wenchen Mon, 20 Nov 2023 17:21:52 -0800

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 2235fa9590f [SPARK-45827][SQL] Use StructNullableTypeConverter for 
Variant
2235fa9590f is described below

commit 2235fa9590f2bb955c2824a5559746636a8b904d
Author: cashmand <[email protected]>
AuthorDate: Tue Nov 21 09:21:31 2023 +0800

    [SPARK-45827][SQL] Use StructNullableTypeConverter for Variant
    
    ### What changes were proposed in this pull request?
    
    Add a small fix for #43707. Since Variant is represented in columnar form 
as a struct, it must use `StructNullableTypeConverter` so that nulls are set 
properly in child column vectors.
    
    ### Why are the changes needed?
    
    Fixes a potential when setting nulls in Variant columns.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, Variant is not released yet.
    
    ### How was this patch tested?
    
    Updated existing unit test to test Variant. It fails without the fix.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43911 from cashmand/SPARK-45827-fixnulls.
    
    Authored-by: cashmand <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../org/apache/spark/sql/execution/Columnar.scala    |  2 +-
 .../execution/vectorized/ColumnarBatchSuite.scala    | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 7c117e0cace..111851094a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -279,7 +279,7 @@ private object RowToColumnConverter {
 
     if (nullable) {
       dataType match {
-        case CalendarIntervalType => new StructNullableTypeConverter(core)
+        case CalendarIntervalType | VariantType => new 
StructNullableTypeConverter(core)
         case st: StructType => new StructNullableTypeConverter(core)
         case _ => new BasicNullableTypeConverter(core)
       }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 933447354fd..97ad2c1f5bf 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, 
ColumnarBatchRow, ColumnVector}
 import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
 @ExtendedSQLTest
@@ -1650,6 +1650,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
         StructField("int_to_int", MapType(IntegerType, IntegerType)) ::
         StructField("binary", BinaryType) ::
         StructField("ts_ntz", TimestampNTZType) ::
+        StructField("variant", VariantType) ::
         Nil)
     var mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
     mapBuilder.put(1, 10)
@@ -1664,6 +1665,9 @@ class ColumnarBatchSuite extends SparkFunSuite {
     val tsNTZ2 =
       
DateTimeUtils.localDateTimeToMicros(LocalDateTime.parse(tsString2.replace(" ", 
"T")))
 
+    val variantVal1 = new VariantVal(Array[Byte](1, 2, 3), Array[Byte](4, 5))
+    val variantVal2 = new VariantVal(Array[Byte](6), Array[Byte](7, 8))
+
     val row1 = new GenericInternalRow(Array[Any](
       UTF8String.fromString("a string"),
       true,
@@ -1681,7 +1685,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
       new GenericInternalRow(Array[Any](5.asInstanceOf[Any], 10)),
       mapBuilder.build(),
       "Spark SQL".getBytes(),
-      tsNTZ1
+      tsNTZ1,
+      variantVal1
     ))
 
     mapBuilder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
@@ -1704,7 +1709,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
       new GenericInternalRow(Array[Any](20.asInstanceOf[Any], null)),
       mapBuilder.build(),
       "Parquet".getBytes(),
-      tsNTZ2
+      tsNTZ2,
+      variantVal2
     ))
 
     val row3 = new GenericInternalRow(Array[Any](
@@ -1724,6 +1730,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       null,
       null,
       null,
+      null,
       null
     ))
 
@@ -1852,6 +1859,13 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(columns(16).getLong(0) == tsNTZ1)
       assert(columns(16).getLong(1) == tsNTZ2)
       assert(columns(16).isNullAt(2))
+
+      assert(columns(17).dataType() == VariantType)
+      assert(columns(17).getVariant(0).debugString() == 
variantVal1.debugString())
+      assert(columns(17).getVariant(1).debugString() == 
variantVal2.debugString())
+      assert(columns(17).isNullAt(2))
+      assert(columns(17).getChild(0).isNullAt(2))
+      assert(columns(17).getChild(1).isNullAt(2))
     } finally {
       batch.close()
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-45827][SQL] Use StructNullableTypeConverter for Variant

Reply via email to