Copilot commented on code in PR #534:
URL: https://github.com/apache/sedona-db/pull/534#discussion_r2712183097
##########
rust/sedona-spatial-join/src/index/spatial_index_builder.rs:
##########
@@ -121,19 +115,55 @@ impl SpatialIndexBuilder {
indexed_batches: Vec::new(),
reservation,
stats: GeoStatistics::empty(),
- memory_pool,
+ memory_used: 0,
})
}
+ /// Estimate the amount of memory required by the R-tree index and
evaluating spatial predicates.
+ /// The estimated memory usage does not include the memory required for
holding the build side
+ /// batches.
+ pub fn estimate_extra_memory_usage(
+ geo_stats: &GeoStatistics,
+ spatial_predicate: &SpatialPredicate,
+ options: &SpatialJoinOptions,
+ ) -> usize {
+ // Estimate the amount of memory needed by the refiner
+ let num_geoms = geo_stats.total_geometries().unwrap_or(0) as usize;
+ let refiner = create_refiner(
+ options.spatial_library,
+ spatial_predicate,
+ options.clone(),
+ num_geoms,
+ geo_stats.clone(),
+ );
+ let refiner_mem_usage = refiner.estimate_max_memory_usage(geo_stats);
+
+ let knn_components_mem_usage =
+ if matches!(spatial_predicate,
SpatialPredicate::KNearestNeighbors(_)) {
+ KnnComponents::estimate_max_memory_usage(geo_stats)
+ } else {
+ 0
+ };
+
+ // Estimate the amount of memory needed for the R-tree
+ let rtree_mem_usage = num_geoms * RTREE_MEMORY_ESTIMATE_PER_RECT;
+
+ // Estimate the amount of memory needed for auxiliary data structures,
such as
+ // batch_pos_vec and geom_idx_vec.
+ let auxiliary = num_geoms * 16;
Review Comment:
The magic number `16` should be extracted to a named constant with
documentation explaining what auxiliary data structures it accounts for
(batch_pos_vec and geom_idx_vec).
##########
rust/sedona-spatial-join/src/index/build_side_collector.rs:
##########
@@ -115,12 +128,23 @@ impl BuildSideBatchesCollector {
in_mem_batches.push(build_side_batch);
}
+ let geo_statistics = analyzer.finish();
+ let extra_mem = SpatialIndexBuilder::estimate_extra_memory_usage(
+ &geo_statistics,
+ &self.spatial_predicate,
+ &self.spatial_join_options,
+ );
+
+ // Try to grow the reservation with a safety buffer to leave room for
additional data structures
+ let additional_reservation = extra_mem + (extra_mem +
reservation.size()) / 5;
Review Comment:
The magic number `/5` (20% safety buffer) should be extracted to a named
constant with documentation explaining the rationale for this specific
percentage.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]