This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 3f95dbffd [GLUTEN-5414] [VL] Fix and enable arrow native memory pool 
track in CSV scan (#5683)
3f95dbffd is described below

commit 3f95dbffd79d225af596427494ff7a4690935275
Author: Jin Chengcheng <[email protected]>
AuthorDate: Sat May 11 14:50:39 2024 +0800

    [GLUTEN-5414] [VL] Fix and enable arrow native memory pool track in CSV 
scan (#5683)
---
 ep/build-velox/src/modify_arrow.patch              | 27 ++++++++++++++++++++++
 .../scala/org/apache/gluten/utils/ArrowUtil.scala  |  4 ++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/ep/build-velox/src/modify_arrow.patch 
b/ep/build-velox/src/modify_arrow.patch
index 64d92725d..5814958a9 100644
--- a/ep/build-velox/src/modify_arrow.patch
+++ b/ep/build-velox/src/modify_arrow.patch
@@ -30,6 +30,33 @@ index a24f272fe..e25f78c85 100644
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc 
b/java/dataset/src/main/cpp/jni_wrapper.cc
+index d2d976677..d7dd01ecd 100644
+--- a/java/dataset/src/main/cpp/jni_wrapper.cc
++++ b/java/dataset/src/main/cpp/jni_wrapper.cc
+@@ -126,20 +126,14 @@ class ReserveFromJava : public 
arrow::dataset::jni::ReservationListener {
+       : vm_(vm), java_reservation_listener_(java_reservation_listener) {}
+ 
+   arrow::Status OnReservation(int64_t size) override {
+-    JNIEnv* env;
+-    if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
+-      return arrow::Status::Invalid("JNIEnv was not attached to current 
thread");
+-    }
++    JNIEnv* env = arrow::dataset::jni::GetEnvOrAttach(vm_);
+     env->CallObjectMethod(java_reservation_listener_, reserve_memory_method, 
size);
+     RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+     return arrow::Status::OK();
+   }
+ 
+   arrow::Status OnRelease(int64_t size) override {
+-    JNIEnv* env;
+-    if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
+-      return arrow::Status::Invalid("JNIEnv was not attached to current 
thread");
+-    }
++    JNIEnv* env = arrow::dataset::jni::GetEnvOrAttach(vm_);
+     env->CallObjectMethod(java_reservation_listener_, 
unreserve_memory_method, size);
+     RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+     return arrow::Status::OK();
 diff --git a/java/pom.xml b/java/pom.xml
 index a8328576b..57f282c6c 100644
 --- a/java/pom.xml
diff --git a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala 
b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
index 4579e015b..26bebcfae 100644
--- a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
+++ b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.utils
 
 import org.apache.gluten.exception.SchemaMismatchException
 import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
+import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool
 import org.apache.gluten.vectorized.ArrowWritableColumnVector
 
 import org.apache.spark.internal.Logging
@@ -33,7 +34,6 @@ import org.apache.spark.sql.vectorized.{ColumnarBatch, 
ColumnVector}
 
 import org.apache.arrow.c.{ArrowSchema, CDataDictionaryProvider, Data}
 import org.apache.arrow.dataset.file.{FileFormat, FileSystemDatasetFactory}
-import org.apache.arrow.dataset.jni.NativeMemoryPool
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch
 import org.apache.arrow.vector.types.pojo.{ArrowType, Field, Schema}
@@ -144,7 +144,7 @@ object ArrowUtil extends Logging {
     val allocator = ArrowBufferAllocators.contextInstance()
     val factory = new FileSystemDatasetFactory(
       allocator,
-      NativeMemoryPool.getDefault, // TODO: wait to change
+      ArrowNativeMemoryPool.arrowPool("FileSystemDatasetFactory"),
       format,
       rewriteUri(encodedUri))
     factory


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to