Kontinuation commented on code in PR #169:
URL: https://github.com/apache/sedona-db/pull/169#discussion_r2392227047


##########
rust/sedona-spatial-join/src/index.rs:
##########
@@ -937,6 +905,150 @@ async fn collect_build_partition(
 /// Rough estimate for in-memory size of the rtree per rect in bytes
 const RTREE_MEMORY_ESTIMATE_PER_RECT: usize = 60;
 
+/// Shared KNN components that can be reused across queries
+struct KnnComponents {
+    euclidean_metric: EuclideanDistance,
+    haversine_metric: HaversineDistance,
+    /// Pre-allocated vector for geometry cache - lock-free access
+    /// Indexed by rtree data index for O(1) access
+    geometry_cache: Vec<OnceCell<Geometry<f64>>>,
+}
+
+impl KnnComponents {
+    fn new(
+        cache_size: usize,
+        indexed_batches: &[IndexedBatch],
+        memory_pool: Arc<dyn MemoryPool>,
+    ) -> datafusion_common::Result<Self> {
+        // Create memory consumer and reservation for geometry cache
+        let consumer = MemoryConsumer::new("SpatialJoinKnnGeometryCache");
+        let mut memory_reservation = consumer.register(&memory_pool);
+
+        // Estimate maximum possible memory usage based on WKB sizes
+        let estimated_memory = 
Self::estimate_max_memory_usage(indexed_batches);
+        memory_reservation.try_grow(estimated_memory)?;
+
+        // Pre-allocate OnceCell vector
+        let geometry_cache = (0..cache_size).map(|_| 
OnceCell::new()).collect();
+
+        Ok(Self {
+            euclidean_metric: EuclideanDistance,
+            haversine_metric: HaversineDistance::default(),
+            geometry_cache,
+        })
+    }
+
+    /// Estimate the maximum memory usage for decoded geometries based on WKB 
sizes
+    fn estimate_max_memory_usage(indexed_batches: &[IndexedBatch]) -> usize {
+        let mut total_wkb_size = 0;
+
+        for batch in indexed_batches {
+            for wkb in batch.geom_array.wkbs().iter().flatten() {
+                total_wkb_size += wkb.buf().len();
+            }
+        }
+        total_wkb_size
+    }
+}
+
+/// Enhanced indexed distance metric adapter for SedonaDB KNN queries.
+/// This adapter provides on-demand WKB decoding and geometry caching for 
efficient
+/// distance calculations with support for both Euclidean and Haversine 
distance metrics.
+struct SedonaKnnAdapter<'a> {
+    indexed_batches: &'a [IndexedBatch],
+    data_id_to_batch_pos: &'a [(i32, i32)],
+    distance_metric: &'a dyn DistanceMetric<f32>,
+    // Reference to KNN components for cache and memory tracking
+    knn_components: &'a KnnComponents,

Review Comment:
   This is a shared reference owned by the spatial index, so the geometry cache 
was correctly reused. My previous review comment was incorrect. This should be 
OK.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to