[spark] branch master updated: [SPARK-42796][SQL] Support accessing TimestampNTZ columns in CachedBatch

gurwls223 Tue, 14 Mar 2023 18:48:16 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new e251bfaea3f [SPARK-42796][SQL] Support accessing TimestampNTZ columns 
in CachedBatch
e251bfaea3f is described below

commit e251bfaea3fa8bc8da00ac226ffa23e0b677ab71
Author: Gengliang Wang <gengli...@apache.org>
AuthorDate: Wed Mar 15 10:47:52 2023 +0900

    [SPARK-42796][SQL] Support accessing TimestampNTZ columns in CachedBatch
    
    ### What changes were proposed in this pull request?
    
    Support accessing TimestampNTZ columns in CachedBatch
    
    ### Why are the changes needed?
    
    Implement a missing feature for TimestampNTZ type
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, TimestampNTZ type is not released yet.
    
    ### How was this patch tested?
    
    New UT
    
    Closes #40426 from gengliangwang/ColumnAccessor.
    
    Authored-by: Gengliang Wang <gengli...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../sql/execution/columnar/ColumnAccessor.scala    |  3 +-
 .../execution/vectorized/ColumnVectorSuite.scala   | 45 ++++++++++++++--------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 770b2442e40..d36dd89f66e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -140,7 +140,8 @@ private[sql] object ColumnAccessor {
       case ByteType => new ByteColumnAccessor(buf)
       case ShortType => new ShortColumnAccessor(buf)
       case IntegerType | DateType | _: YearMonthIntervalType => new 
IntColumnAccessor(buf)
-      case LongType | TimestampType | _: DayTimeIntervalType => new 
LongColumnAccessor(buf)
+      case LongType | TimestampType | TimestampNTZType | _: 
DayTimeIntervalType =>
+        new LongColumnAccessor(buf)
       case FloatType => new FloatColumnAccessor(buf)
       case DoubleType => new DoubleColumnAccessor(buf)
       case StringType => new StringColumnAccessor(buf)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 910e3e682de..5e06eb729ea 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -148,6 +148,20 @@ class ColumnVectorSuite extends SparkFunSuite {
     }
   }
 
+  testVectors("timestamp_ntz", 10, TimestampNTZType) { testVector =>
+    (0 until 10).foreach { i =>
+      testVector.appendLong(i)
+    }
+
+    val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
+
+    (0 until 10).foreach { i =>
+      assert(array.get(i, TimestampNTZType) === i)
+      assert(arrayCopy.get(i, TimestampNTZType) === i)
+    }
+  }
+
   testVectors("float", 10, FloatType) { testVector =>
     (0 until 10).foreach { i =>
       testVector.appendFloat(i.toFloat)
@@ -502,25 +516,26 @@ class ColumnVectorSuite extends SparkFunSuite {
   }
 
   test("CachedBatch long Apis") {
-    val dataType = LongType
-    val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
-    val row = new SpecificInternalRow(Array(dataType))
+    Seq(LongType, TimestampType, TimestampNTZType).foreach { dataType =>
+      val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
+      val row = new SpecificInternalRow(Array(dataType))
 
-    row.setNullAt(0)
-    columnBuilder.appendFrom(row, 0)
-    for (i <- 1 until 16) {
-      row.setLong(0, i.toLong)
+      row.setNullAt(0)
       columnBuilder.appendFrom(row, 0)
-    }
+      for (i <- 1 until 16) {
+        row.setLong(0, i.toLong)
+        columnBuilder.appendFrom(row, 0)
+      }
 
-    withVectors(16, dataType) { testVector =>
-      val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
-      ColumnAccessor.decompress(columnAccessor, testVector, 16)
+      withVectors(16, dataType) { testVector =>
+        val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
+        ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0))
-      for (i <- 1 until 16) {
-        assert(testVector.isNullAt(i) == false)
-        assert(testVector.getLong(i) == i.toLong)
+        assert(testVector.isNullAt(0))
+        for (i <- 1 until 16) {
+          assert(testVector.isNullAt(i) == false)
+          assert(testVector.getLong(i) == i.toLong)
+        }
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-42796][SQL] Support accessing TimestampNTZ columns in CachedBatch

Reply via email to