This is an automated email from the ASF dual-hosted git repository.

felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 0e4f87014 [VL] [BUG fix] Make the hasNext method can be called multi 
times (#5545)
0e4f87014 is described below

commit 0e4f870145ec414c3abb227c3175d6edb5dbcf39
Author: JiaKe <[email protected]>
AuthorDate: Sat Apr 27 03:16:49 2024 +0800

    [VL] [BUG fix] Make the hasNext method can be called multi times (#5545)
    
    Currently, the iterator is designed to close once the hasNext() method 
returns false. Therefore, if hasNext() is called a second time, it will throw 
exception here. This PR adds a protectInvocationFlow() method to ensure that 
the iterator's hasNext() can be safely called multiple times without causing an 
exception.
---
 .../scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala   | 1 +
 .../scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala     | 1 +
 .../org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala | 1 +
 cpp/core/jni/JniWrapper.cc                                             | 3 ++-
 .../org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala     | 1 +
 .../src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala | 1 +
 6 files changed, 7 insertions(+), 1 deletion(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index 5b0918976..5a165116c 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -225,6 +225,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
 
     Iterators
       .wrap(nativeResultIterator.asScala)
+      .protectInvocationFlow()
       .recycleIterator {
         updateNativeMetrics(nativeResultIterator.getMetrics)
         nativeResultIterator.close()
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
index 32358b079..22ca020d1 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
@@ -231,6 +231,7 @@ object RowToVeloxColumnarExec {
     }
     Iterators
       .wrap(res)
+      .protectInvocationFlow()
       .recycleIterator {
         jniWrapper.close(r2cHandle)
       }
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
index ce210b3bf..88678cb5e 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
+++ 
b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
@@ -272,6 +272,7 @@ class ColumnarCachedBatchSerializer extends 
CachedBatchSerializer with SQLConfHe
               }
             }
           })
+          .protectInvocationFlow()
           .recycleIterator {
             jniWrapper.close(deserializerHandle)
           }
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 748eb5831..7363a9da0 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -449,7 +449,8 @@ JNIEXPORT jboolean JNICALL 
Java_org_apache_gluten_vectorized_ColumnarBatchOutIte
 
   auto iter = ctx->objectStore()->retrieve<ResultIterator>(iterHandle);
   if (iter == nullptr) {
-    std::string errorMessage = "failed to get batch iterator";
+    std::string errorMessage =
+        "When hasNext() is called on a closed iterator, an exception is 
thrown. To prevent this, consider using the protectInvocationFlow() method when 
creating the iterator in scala side. This will allow the hasNext() method to be 
called multiple times without issue.";
     throw gluten::GlutenException(errorMessage);
   }
   return iter->hasNext();
diff --git 
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
 
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
index c3e39d834..ac94bfa89 100644
--- 
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
+++ 
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala
@@ -74,6 +74,7 @@ case class ColumnarBuildSideRelation(output: Seq[Attribute], 
batches: Array[Arra
           ColumnarBatches.create(Runtimes.contextInstance(), handle)
         }
       })
+      .protectInvocationFlow()
       .recycleIterator {
         jniWrapper.close(serializeHandle)
       }
diff --git 
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
 
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
index 25a2938a8..fd0cf45c8 100644
--- 
a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
+++ 
b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
@@ -67,6 +67,7 @@ object ExecUtil {
           row
         }
       })
+      .protectInvocationFlow()
       .recycleIterator {
         jniWrapper.nativeClose(c2rHandle)
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to