Kontinuation commented on code in PR #277:
URL: https://github.com/apache/sedona-db/pull/277#discussion_r2501695361


##########
rust/sedona-spatial-join/src/index/spatial_index_builder.rs:
##########
@@ -0,0 +1,304 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::BooleanBufferBuilder;
+use arrow_schema::SchemaRef;
+use datafusion_physical_plan::metrics::{self, ExecutionPlanMetricsSet, 
MetricBuilder};
+use sedona_common::SpatialJoinOptions;
+use sedona_expr::statistics::GeoStatistics;
+
+use datafusion_common::{utils::proxy::VecAllocExt, Result};
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, 
MemoryReservation};
+use datafusion_expr::JoinType;
+use futures::StreamExt;
+use geo_index::rtree::{sort::HilbertSort, RTree, RTreeBuilder};
+use parking_lot::Mutex;
+use std::sync::{atomic::AtomicUsize, Arc};
+
+use crate::{
+    collect::{BuildPartition, BuildSideBatch},
+    index::{knn_adapter::KnnComponents, spatial_index::SpatialIndex},
+    operand_evaluator::create_operand_evaluator,
+    refine::create_refiner,
+    spatial_predicate::SpatialPredicate,
+    utils::{
+        concurrent_reservation::ConcurrentReservation, 
join_utils::need_produce_result_in_final,
+    },
+};
+
+// Type aliases for better readability
+type SpatialRTree = RTree<f32>;
+type DataIdToBatchPos = Vec<(i32, i32)>;
+type RTreeBuildResult = (SpatialRTree, DataIdToBatchPos);
+
+/// Rough estimate for in-memory size of the rtree per rect in bytes
+const RTREE_MEMORY_ESTIMATE_PER_RECT: usize = 60;
+
+/// The prealloc size for the refiner reservation. This is used to reduce the 
frequency of growing
+/// the reservation when updating the refiner memory reservation.
+const REFINER_RESERVATION_PREALLOC_SIZE: usize = 10 * 1024 * 1024; // 10MB
+
+/// Builder for constructing a SpatialIndex from geometry batches.
+///
+/// This builder handles:
+/// 1. Accumulating geometry batches to be indexed
+/// 2. Building the spatial R-tree index
+/// 3. Setting up memory tracking and visited bitmaps
+/// 4. Configuring prepared geometries based on execution mode
+pub(crate) struct SpatialIndexBuilder {
+    schema: SchemaRef,
+    spatial_predicate: SpatialPredicate,
+    options: SpatialJoinOptions,
+    join_type: JoinType,
+    probe_threads_count: usize,
+    metrics: SpatialJoinBuildMetrics,
+
+    /// Batches to be indexed
+    indexed_batches: Vec<BuildSideBatch>,
+    /// Memory reservation for tracking the memory usage of the spatial index
+    reservation: MemoryReservation,
+
+    /// Statistics for indexed geometries
+    stats: GeoStatistics,
+
+    /// Memory pool for managing the memory usage of the spatial index
+    memory_pool: Arc<dyn MemoryPool>,
+}
+
+/// Metrics for the build phase of the spatial join.
+#[derive(Clone, Debug, Default)]
+pub(crate) struct SpatialJoinBuildMetrics {
+    /// Total time for collecting build-side of join
+    pub(crate) build_time: metrics::Time,
+    /// Memory used by the spatial-index in bytes
+    pub(crate) build_mem_used: metrics::Gauge,
+}
+
+impl SpatialJoinBuildMetrics {
+    pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self {
+        Self {
+            build_time: MetricBuilder::new(metrics).subset_time("build_time", 
partition),
+            build_mem_used: 
MetricBuilder::new(metrics).gauge("build_mem_used", partition),
+        }
+    }
+}
+
+impl SpatialIndexBuilder {
+    /// Create a new builder with the given configuration.
+    pub fn new(

Review Comment:
   I decided to leave it as-is. Two fields of it need to be initialized with 
default values. I'd like to keep this new method to initialize the fields 
consistently. I've tried switching to builder pattern. However, applying the 
builder pattern to a builder also looks strange.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to