This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 3f95dbffd [GLUTEN-5414] [VL] Fix and enable arrow native memory pool
track in CSV scan (#5683)
3f95dbffd is described below
commit 3f95dbffd79d225af596427494ff7a4690935275
Author: Jin Chengcheng <[email protected]>
AuthorDate: Sat May 11 14:50:39 2024 +0800
[GLUTEN-5414] [VL] Fix and enable arrow native memory pool track in CSV
scan (#5683)
---
ep/build-velox/src/modify_arrow.patch | 27 ++++++++++++++++++++++
.../scala/org/apache/gluten/utils/ArrowUtil.scala | 4 ++--
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/ep/build-velox/src/modify_arrow.patch
b/ep/build-velox/src/modify_arrow.patch
index 64d92725d..5814958a9 100644
--- a/ep/build-velox/src/modify_arrow.patch
+++ b/ep/build-velox/src/modify_arrow.patch
@@ -30,6 +30,33 @@ index a24f272fe..e25f78c85 100644
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc
b/java/dataset/src/main/cpp/jni_wrapper.cc
+index d2d976677..d7dd01ecd 100644
+--- a/java/dataset/src/main/cpp/jni_wrapper.cc
++++ b/java/dataset/src/main/cpp/jni_wrapper.cc
+@@ -126,20 +126,14 @@ class ReserveFromJava : public
arrow::dataset::jni::ReservationListener {
+ : vm_(vm), java_reservation_listener_(java_reservation_listener) {}
+
+ arrow::Status OnReservation(int64_t size) override {
+- JNIEnv* env;
+- if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
+- return arrow::Status::Invalid("JNIEnv was not attached to current
thread");
+- }
++ JNIEnv* env = arrow::dataset::jni::GetEnvOrAttach(vm_);
+ env->CallObjectMethod(java_reservation_listener_, reserve_memory_method,
size);
+ RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+ return arrow::Status::OK();
+ }
+
+ arrow::Status OnRelease(int64_t size) override {
+- JNIEnv* env;
+- if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
+- return arrow::Status::Invalid("JNIEnv was not attached to current
thread");
+- }
++ JNIEnv* env = arrow::dataset::jni::GetEnvOrAttach(vm_);
+ env->CallObjectMethod(java_reservation_listener_,
unreserve_memory_method, size);
+ RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+ return arrow::Status::OK();
diff --git a/java/pom.xml b/java/pom.xml
index a8328576b..57f282c6c 100644
--- a/java/pom.xml
diff --git a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
index 4579e015b..26bebcfae 100644
--- a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
+++ b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.utils
import org.apache.gluten.exception.SchemaMismatchException
import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
+import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool
import org.apache.gluten.vectorized.ArrowWritableColumnVector
import org.apache.spark.internal.Logging
@@ -33,7 +34,6 @@ import org.apache.spark.sql.vectorized.{ColumnarBatch,
ColumnVector}
import org.apache.arrow.c.{ArrowSchema, CDataDictionaryProvider, Data}
import org.apache.arrow.dataset.file.{FileFormat, FileSystemDatasetFactory}
-import org.apache.arrow.dataset.jni.NativeMemoryPool
import org.apache.arrow.memory.BufferAllocator
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch
import org.apache.arrow.vector.types.pojo.{ArrowType, Field, Schema}
@@ -144,7 +144,7 @@ object ArrowUtil extends Logging {
val allocator = ArrowBufferAllocators.contextInstance()
val factory = new FileSystemDatasetFactory(
allocator,
- NativeMemoryPool.getDefault, // TODO: wait to change
+ ArrowNativeMemoryPool.arrowPool("FileSystemDatasetFactory"),
format,
rewriteUri(encodedUri))
factory
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]