This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new e7caab1da [GLUTEN-6961][VL][feat] Add decimal write support for 
ArrowWritableColumnVector (#6962)
e7caab1da is described below

commit e7caab1da4b9d144b1c9550e44f0ac2d1177f039
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Aug 29 17:04:51 2024 +0800

    [GLUTEN-6961][VL][feat] Add decimal write support for 
ArrowWritableColumnVector (#6962)
    
    Closes #6961
---
 .../gluten/vectorized/ArrowColumnVectorTest.java   | 50 ++++++++++++++++++++++
 cpp/velox/tests/VeloxRowToColumnarTest.cc          | 50 +++++++++++++++++++++-
 .../vectorized/ArrowWritableColumnVector.java      | 14 +++++-
 3 files changed, 112 insertions(+), 2 deletions(-)

diff --git 
a/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java
 
b/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java
new file mode 100644
index 000000000..11330544d
--- /dev/null
+++ 
b/backends-velox/src/test/java/org/apache/gluten/vectorized/ArrowColumnVectorTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.vectorized;
+
+import org.apache.spark.sql.execution.vectorized.MutableColumnarRow;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.util.TaskResources$;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ArrowColumnVectorTest {
+
+  @Test
+  public void testWriteByMutableColumnarRow() {
+    TaskResources$.MODULE$.runUnsafe(
+        () -> {
+          final ArrowWritableColumnVector[] columns = newArrowColumns("a 
decimal(20, 1)", 20);
+          MutableColumnarRow row = new MutableColumnarRow(columns);
+          Decimal decimal = new Decimal();
+          decimal.set(234, 20, 1);
+          row.setDecimal(0, decimal, 20);
+          Assert.assertEquals(row.getDecimal(0, 20, 1), decimal);
+          return null;
+        });
+  }
+
+  private static ArrowWritableColumnVector[] newArrowColumns(String schema, 
int numRows) {
+    ArrowWritableColumnVector[] columns =
+        ArrowWritableColumnVector.allocateColumns(numRows, 
StructType.fromDDL(schema));
+    for (ArrowWritableColumnVector col : columns) {
+      col.setValueCount(numRows);
+    }
+    return columns;
+  }
+}
diff --git a/cpp/velox/tests/VeloxRowToColumnarTest.cc 
b/cpp/velox/tests/VeloxRowToColumnarTest.cc
index c784dbd59..0d11dd4ac 100644
--- a/cpp/velox/tests/VeloxRowToColumnarTest.cc
+++ b/cpp/velox/tests/VeloxRowToColumnarTest.cc
@@ -87,10 +87,58 @@ TEST_F(VeloxRowToColumnarTest, allTypes) {
       makeNullableFlatVector<bool>(
           {std::nullopt, true, false, std::nullopt, true, true, false, true, 
std::nullopt, std::nullopt}),
       makeFlatVector<velox::StringView>(
-          {"alice0", "bob1", "alice2", "bob3", "Alice4", "Bob5", "AlicE6", 
"boB7", "ALICE8", "BOB9"}),
+          {"alice0",
+           "bob1",
+           "alice2",
+           "bob3",
+           "Alice4",
+           "Bob5123456789098766notinline",
+           "AlicE6",
+           "boB7",
+           "ALICE8",
+           "BOB9"}),
       makeNullableFlatVector<velox::StringView>(
           {"alice", "bob", std::nullopt, std::nullopt, "Alice", "Bob", 
std::nullopt, "alicE", std::nullopt, "boB"}),
   });
   testRowVectorEqual(vector);
 }
+
+TEST_F(VeloxRowToColumnarTest, bigint) {
+  auto vector = makeRowVector({
+      makeNullableFlatVector<int64_t>({1, 2, 3, std::nullopt, 4, std::nullopt, 
5, 6, std::nullopt, 7}),
+  });
+  testRowVectorEqual(vector);
+}
+
+TEST_F(VeloxRowToColumnarTest, decimal) {
+  auto vector = makeRowVector({
+      makeNullableFlatVector<int128_t>(
+          {123456, HugeInt::build(1045, 1789), 3678, std::nullopt, 4, 
std::nullopt, 5, 687987, std::nullopt, 7},
+          DECIMAL(38, 2)),
+      makeNullableFlatVector<int64_t>(
+          {178987, 2, 3, std::nullopt, 4, std::nullopt, 5, 6, std::nullopt, 
7}, DECIMAL(12, 3)),
+  });
+  testRowVectorEqual(vector);
+}
+
+TEST_F(VeloxRowToColumnarTest, timestamp) {
+  auto vector = makeRowVector({
+      makeNullableFlatVector<Timestamp>(
+          {Timestamp(-946684800, 0),
+           Timestamp(-7266, 0),
+           Timestamp(0, 0),
+           Timestamp(946684800, 0),
+           Timestamp(9466848000, 0),
+           Timestamp(94668480000, 0),
+           Timestamp(946729316, 0),
+           Timestamp(946729316, 0),
+           Timestamp(946729316, 0),
+           Timestamp(7266, 0),
+           Timestamp(-50049331200, 0),
+           Timestamp(253405036800, 0),
+           Timestamp(-62480037600, 0),
+           std::nullopt}),
+  });
+  testRowVectorEqual(vector);
+}
 } // namespace gluten
diff --git 
a/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
 
b/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
index dfd570deb..336d33771 100644
--- 
a/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
+++ 
b/gluten-data/src/main/java/org/apache/gluten/vectorized/ArrowWritableColumnVector.java
@@ -1256,7 +1256,11 @@ public final class ArrowWritableColumnVector extends 
WritableColumnVectorShim {
     }
 
     void setNotNull(int rowId) {
-      throw new UnsupportedOperationException();
+      // Arrow Java library doesn't usually expose this API from its vectors. 
So we have to
+      // allow no-op here than throwing exceptions which could fail caller. 
And basically it's
+      // acceptable because finally Spark will set value after this method 
returned,
+      // During which Arrow Java will set the validity buffer anyway. As if 
the call to
+      // `setNotNull` is just deferred.
     }
 
     void setNulls(int rowId, int count) {
@@ -1745,6 +1749,14 @@ public final class ArrowWritableColumnVector extends 
WritableColumnVectorShim {
     final void setBytes(int rowId, BigDecimal value) {
       writer.setSafe(rowId, value);
     }
+
+    final void setBytes(int rowId, int count, byte[] src, int srcIndex) {
+      if (count == src.length && srcIndex == 0) {
+        writer.setBigEndianSafe(rowId, src);
+        return;
+      }
+      throw new UnsupportedOperationException();
+    }
   }
 
   private static class StringWriter extends ArrowVectorWriter {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to