This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 0e4f87014 [VL] [BUG fix] Make the hasNext method can be called multi
times (#5545)
0e4f87014 is described below
commit 0e4f870145ec414c3abb227c3175d6edb5dbcf39
Author: JiaKe <[email protected]>
AuthorDate: Sat Apr 27 03:16:49 2024 +0800
[VL] [BUG fix] Make the hasNext method can be called multi times (#5545)
Currently, the iterator is designed to close once the hasNext() method
returns false. Therefore, if hasNext() is called a second time, it will throw
exception here. This PR adds a protectInvocationFlow() method to ensure that
the iterator's hasNext() can be safely called multiple times without causing an
exception.
---
.../scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala | 1 +
.../scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala | 1 +
.../org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala | 1 +
cpp/core/jni/JniWrapper.cc | 3 ++-
.../org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala | 1 +
.../src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala | 1 +
6 files changed, 7 insertions(+), 1 deletion(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index 5b0918976..5a165116c 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -225,6 +225,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
Iterators
.wrap(nativeResultIterator.asScala)
+ .protectInvocationFlow()
.recycleIterator {
updateNativeMetrics(nativeResultIterator.getMetrics)
nativeResultIterator.close()
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
index 32358b079..22ca020d1 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
@@ -231,6 +231,7 @@ object RowToVeloxColumnarExec {
}
Iterators
.wrap(res)
+ .protectInvocationFlow()
.recycleIterator {
jniWrapper.close(r2cHandle)
}
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
index ce210b3bf..88678cb5e 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
+++
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
@@ -272,6 +272,7 @@ class ColumnarCachedBatchSerializer extends
CachedBatchSerializer with SQLConfHe
}
}
})
+ .protectInvocationFlow()
.recycleIterator {
jniWrapper.close(deserializerHandle)
}
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 748eb5831..7363a9da0 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -449,7 +449,8 @@ JNIEXPORT jboolean JNICALL
Java_org_apache_gluten_vectorized_ColumnarBatchOutIte
auto iter = ctx->objectStore()->retrieve<ResultIterator>(iterHandle);
if (iter == nullptr) {
- std::string errorMessage = "failed to get batch iterator";
+ std::string errorMessage =
+ "When hasNext() is called on a closed iterator, an exception is
thrown. To prevent this, consider using the protectInvocationFlow() method when
creating the iterator in scala side. This will allow the hasNext() method to be
called multiple times without issue.";
throw gluten::GlutenException(errorMessage);
}
return iter->hasNext();
diff --git
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
index c3e39d834..ac94bfa89 100644
---
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
+++
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
@@ -74,6 +74,7 @@ case class ColumnarBuildSideRelation(output: Seq[Attribute],
batches: Array[Arra
ColumnarBatches.create(Runtimes.contextInstance(), handle)
}
})
+ .protectInvocationFlow()
.recycleIterator {
jniWrapper.close(serializeHandle)
}
diff --git
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
index 25a2938a8..fd0cf45c8 100644
---
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
+++
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
@@ -67,6 +67,7 @@ object ExecUtil {
row
}
})
+ .protectInvocationFlow()
.recycleIterator {
jniWrapper.nativeClose(c2rHandle)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]