zhangfengcdt commented on code in PR #362:
URL: https://github.com/apache/sedona-db/pull/362#discussion_r2607815351


##########
rust/sedona-spatial-join/src/build_index.rs:
##########
@@ -33,14 +33,68 @@ use crate::{
     spatial_predicate::SpatialPredicate,
 };
 
-pub(crate) async fn build_index(
+/// Sequential version of build_index that doesn't spawn tasks.
+/// Used in execution contexts without async runtime support (e.g., 
Spark/Comet JNI)
+pub async fn build_index_seq(
     context: Arc<TaskContext>,
     build_schema: SchemaRef,
     build_streams: Vec<SendableRecordBatchStream>,
     spatial_predicate: SpatialPredicate,
     join_type: JoinType,
     probe_threads_count: usize,
     metrics: ExecutionPlanMetricsSet,
+) -> Result<SpatialIndex> {
+    build_index_impl(
+        context,
+        build_schema,
+        build_streams,
+        spatial_predicate,
+        join_type,
+        probe_threads_count,
+        metrics,
+        false, // concurrent = false
+    )
+    .await
+}
+
+/// Concurrent version of build_index that spawns tasks for parallel 
collection.
+pub async fn build_index(
+    context: Arc<TaskContext>,
+    build_schema: SchemaRef,
+    build_streams: Vec<SendableRecordBatchStream>,
+    spatial_predicate: SpatialPredicate,
+    join_type: JoinType,
+    probe_threads_count: usize,
+    metrics: ExecutionPlanMetricsSet,
+) -> Result<SpatialIndex> {
+    build_index_impl(
+        context,
+        build_schema,
+        build_streams,
+        spatial_predicate,
+        join_type,
+        probe_threads_count,
+        metrics,
+        true, // concurrent = true
+    )
+    .await
+}
+
+/// Internal implementation of build_index with configurable concurrency.
+///
+/// # Arguments
+/// * `concurrent` - If true, uses `collect_all` which spawns tasks for 
parallel collection.
+///   If false, collects partitions sequentially (for JNI/embedded contexts).
+#[allow(clippy::too_many_arguments)]
+async fn build_index_impl(
+    context: Arc<TaskContext>,
+    build_schema: SchemaRef,
+    build_streams: Vec<SendableRecordBatchStream>,
+    spatial_predicate: SpatialPredicate,
+    join_type: JoinType,
+    probe_threads_count: usize,
+    metrics: ExecutionPlanMetricsSet,
+    concurrent: bool,

Review Comment:
   Cool! I've changed the code to get concurrent_build_side_collection directly 
in the build_index function and it looks much cleaner now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to