zhangfengcdt commented on code in PR #169:
URL: https://github.com/apache/sedona-db/pull/169#discussion_r2399198849
##########
rust/sedona-spatial-join/src/index.rs:
##########
@@ -937,6 +903,109 @@ async fn collect_build_partition(
/// Rough estimate for in-memory size of the rtree per rect in bytes
const RTREE_MEMORY_ESTIMATE_PER_RECT: usize = 60;
+/// Shared KNN components that can be reused across queries
+struct KnnComponents {
+ euclidean_metric: EuclideanDistance,
+ haversine_metric: HaversineDistance,
+ /// Pre-allocated vector for geometry cache - lock-free access
+ /// Indexed by rtree data index for O(1) access
+ geometry_cache: Vec<OnceCell<Geometry<f64>>>,
+}
+
+impl KnnComponents {
+ fn new(
+ cache_size: usize,
+ indexed_batches: &[IndexedBatch],
+ memory_pool: Arc<dyn MemoryPool>,
+ ) -> datafusion_common::Result<Self> {
+ // Create memory consumer and reservation for geometry cache
+ let consumer = MemoryConsumer::new("SpatialJoinKnnGeometryCache");
+ let mut memory_reservation = consumer.register(&memory_pool);
+
+ // Estimate maximum possible memory usage based on WKB sizes
+ let estimated_memory =
Self::estimate_max_memory_usage(indexed_batches);
+ memory_reservation.try_grow(estimated_memory)?;
+
+ // Pre-allocate OnceCell vector
+ let geometry_cache = (0..cache_size).map(|_|
OnceCell::new()).collect();
+
+ Ok(Self {
+ euclidean_metric: EuclideanDistance,
+ haversine_metric: HaversineDistance::default(),
+ geometry_cache,
+ })
+ }
+
+ /// Estimate the maximum memory usage for decoded geometries based on WKB
sizes
+ fn estimate_max_memory_usage(indexed_batches: &[IndexedBatch]) -> usize {
+ let mut total_wkb_size = 0;
+
+ for batch in indexed_batches {
+ for wkb in batch.geom_array.wkbs().iter().flatten() {
+ total_wkb_size += wkb.buf().len();
+ }
+ }
+ total_wkb_size
Review Comment:
Using the wkb size is conservative and we are using the same approach as
before.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]