Re: [PR] chore(rust/sedona-spatial-join): Evaluate spatial predicate operands in EvaluateOperandBatchStream [sedona-db]

via GitHub Thu, 15 Jan 2026 18:19:08 -0800


Copilot commented on code in PR #521:
URL: https://github.com/apache/sedona-db/pull/521#discussion_r2696620010



##########
rust/sedona-spatial-join/src/utils/arrow_utils.rs:
##########
@@ -15,11 +15,146 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{Array, ArrayData, RecordBatch};
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayData, BinaryViewArray, ListArray, RecordBatch, 
StringViewArray};
+use arrow_array::make_array;
 use arrow_array::ArrayRef;
+use arrow_array::StructArray;
 use arrow_schema::{ArrowError, DataType};
 use datafusion_common::Result;
 
+/// Reconstruct `batch` to organize the payload buffers of each 
`StringViewArray` and
+/// `BinaryViewArray` in sequential order by calling `gc()` on them.
+///
+/// Note this is a workaround until 
<https://github.com/apache/arrow-rs/issues/7185> is
+/// available.
+///
+/// # Rationale
+///
+/// The `interleave` kernel does not reconstruct the inner buffers of view 
arrays by default,
+/// leading to non-sequential payload locations. A single payload buffer might 
be shared by
+/// multiple `RecordBatch`es or multiple rows in the same batch might 
reference scattered
+/// locations in a large buffer.
+///
+/// When writing each batch to disk, the writer has to write all referenced 
buffers. This
+/// causes extra disk reads and writes, and potentially execution failure 
(e.g. No space left
+/// on device).
+///
+/// # Example
+///
+/// Before interleaving:
+/// batch1 -> buffer1 (large)
+/// batch2 -> buffer2 (large)
+///
+/// interleaved_batch -> buffer1 (sparse access)
+///                   -> buffer2 (sparse access)
+///
+/// Then when spilling the interleaved batch, the writer has to write both 
buffer1 and buffer2
+/// entirely, even if only a few bytes are used.
+pub(crate) fn compact_batch(batch: RecordBatch) -> Result<RecordBatch> {
+    let mut new_columns: Vec<Arc<dyn Array>> = 
Vec::with_capacity(batch.num_columns());
+    let mut arr_mutated = false;
+
+    for array in batch.columns() {
+        let (new_array, mutated) = compact_array(Arc::clone(array))?;
+        new_columns.push(new_array);
+        arr_mutated |= mutated;
+    }
+
+    if arr_mutated {
+        Ok(RecordBatch::try_new(batch.schema(), new_columns)?)
+    } else {
+        Ok(batch)
+    }
+}
+
+/// Recursively compacts view arrays in `array` by calling `gc()` on them.
+/// Returns a tuple of the potentially new array and a boolean indicating
+/// whether any compaction was performed.
+pub(crate) fn compact_array(array: ArrayRef) -> Result<(ArrayRef, bool)> {
+    if let Some(view_array) = array.as_any().downcast_ref::<StringViewArray>() 
{
+        return Ok((Arc::new(view_array.gc()), true));
+    }
+    if let Some(view_array) = array.as_any().downcast_ref::<BinaryViewArray>() 
{
+        return Ok((Arc::new(view_array.gc()), true));
+    }
+
+    // Fast path for non-nested arrays
+    if !array.data_type().is_nested() {
+        return Ok((array, false));
+    }
+
+    // Avoid ArrayData -> ArrayRef roundtrips for commonly used data types,
+    // including StructArray and ListArray.
+
+    if let Some(struct_array) = array.as_any().downcast_ref::<StructArray>() {
+        let mut mutated = false;
+        let mut new_columns: Vec<ArrayRef> = 
Vec::with_capacity(struct_array.num_columns());
+        for col in struct_array.columns() {
+            let (new_col, col_mutated) = compact_array(Arc::clone(col))?;
+            mutated |= col_mutated;
+            new_columns.push(new_col);
+        }
+
+        if !mutated {
+            return Ok((array, false));
+        }
+
+        let rebuilt = StructArray::new(
+            struct_array.fields().clone(),
+            new_columns,
+            struct_array.nulls().cloned(),
+        );
+        return Ok((Arc::new(rebuilt), true));
+    }
+
+    if let Some(list_array) = array.as_any().downcast_ref::<ListArray>() {
+        let (new_values, mutated) = 
compact_array(list_array.values().clone())?;
+        if !mutated {
+            return Ok((array, false));
+        }
+
+        let DataType::List(field) = list_array.data_type() else {
+            // Defensive: this downcast should only succeed for DataType::List.
+            return Ok((array, false));

Review Comment:
   The comment states this is defensive, but it's unclear what condition would 
trigger this path given the downcast on line 112 already confirms this is a 
ListArray. Consider clarifying whether this handles edge cases like LargeList 
or FixedSizeList, or if it's truly unreachable.
   ```suggestion
           // `ListArray` is expected to always have `DataType::List`. If this 
is ever not
           // the case, it indicates an internal inconsistency rather than a 
normal edge
           // case like `LargeList` or `FixedSizeList` (which use different 
array types).
           let field = match list_array.data_type() {
               DataType::List(field) => field,
               other => {
                   debug_assert!(
                       false,
                       "ListArray with non-List DataType: {:?}",
                       other
                   );
                   // Defensive: fall back to returning the original array to 
avoid panicking
                   // in release builds if invariants are somehow violated.
                   return Ok((array, false));
               }
   ```



##########
rust/sedona-spatial-join/src/evaluated_batch/evaluated_batch_stream/evaluate.rs:
##########
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use arrow_array::RecordBatch;
+use arrow_schema::{DataType, SchemaRef};
+use datafusion_common::Result;
+use datafusion_physical_plan::{metrics, SendableRecordBatchStream};
+use futures::{Stream, StreamExt};
+
+use crate::evaluated_batch::{
+    evaluated_batch_stream::{EvaluatedBatchStream, 
SendableEvaluatedBatchStream},
+    EvaluatedBatch,
+};
+use crate::operand_evaluator::{EvaluatedGeometryArray, OperandEvaluator};
+use crate::utils::arrow_utils::compact_batch;
+
+/// An evaluator that can evaluate geometry expressions on record batches
+/// and produces evaluated geometry arrays.
+trait Evaluator: Unpin {
+    fn evaluate(&self, batch: &RecordBatch) -> Result<EvaluatedGeometryArray>;
+}
+
+/// An evaluator for build-side geometry expressions.
+struct BuildSideEvaluator {
+    evaluator: Arc<dyn OperandEvaluator>,
+}
+
+impl Evaluator for BuildSideEvaluator {
+    fn evaluate(&self, batch: &RecordBatch) -> Result<EvaluatedGeometryArray> {
+        self.evaluator.evaluate_build(batch)
+    }
+}
+
+/// An evaluator for probe-side geometry expressions.
+struct ProbeSideEvaluator {
+    evaluator: Arc<dyn OperandEvaluator>,
+}
+
+impl Evaluator for ProbeSideEvaluator {
+    fn evaluate(&self, batch: &RecordBatch) -> Result<EvaluatedGeometryArray> {
+        self.evaluator.evaluate_probe(batch)
+    }
+}
+
+/// Wraps a `SendableRecordBatchStream` and evaluates the probe-side geometry
+/// expression eagerly so downstream consumers can operate on 
`EvaluatedBatch`s.
+struct EvaluateOperandBatchStream<E: Evaluator> {
+    inner: SendableRecordBatchStream,
+    evaluator: E,
+    evaluation_time: metrics::Time,
+    gc_view_arrays: bool,
+}
+
+impl<E: Evaluator> EvaluateOperandBatchStream<E> {
+    fn new(
+        inner: SendableRecordBatchStream,
+        evaluator: E,
+        evaluation_time: metrics::Time,
+        gc_view_arrays: bool,
+    ) -> Self {
+        let gc_view_arrays = gc_view_arrays && 
schema_contains_view_types(&inner.schema());
+        Self {
+            inner,
+            evaluator,
+            evaluation_time,
+            gc_view_arrays,
+        }
+    }
+}
+
+/// Checks if the schema contains any view types (Utf8View or BinaryView).
+fn schema_contains_view_types(schema: &SchemaRef) -> bool {
+    schema
+        .flattened_fields()
+        .iter()
+        .any(|field| matches!(field.data_type(), DataType::Utf8View | 
DataType::BinaryView))
+}

Review Comment:
   The `schema_contains_view_types` function lacks test coverage. Consider 
adding a test that verifies the function correctly identifies schemas with and 
without view types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] chore(rust/sedona-spatial-join): Evaluate spatial predicate operands in EvaluateOperandBatchStream [sedona-db]

Reply via email to