This is an automated email from the ASF dual-hosted git repository.

kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new c6446798 fix(rust/sedona-pointcloud): stabilize flaky 
round_robin_partitioning test (#690)
c6446798 is described below

commit c64467981e8d803983d3cf06168835622d306a2a
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Fri Mar 6 17:56:37 2026 +0800

    fix(rust/sedona-pointcloud): stabilize flaky round_robin_partitioning test 
(#690)
    
    ## Problem
    
    We observed that the following test is flaky in recent GitHub Workflow runs:
    
    ```
    failures:
    
    ---- las::opener::tests::round_robin_partitioning stdout ----
    
    thread 'las::opener::tests::round_robin_partitioning' (43727) panicked at 
rust/sedona-pointcloud/src/las/opener.rs:306:9:
    assertion `left == right` failed
      left: [RecordBatch { schema: Schema { fields: [Field { name: "geometry", 
data_type: Binary, metadata: {"ARROW:extension:name": "geoarrow.wkb"} }, Field 
{ name: "intensity", data_type: UInt16, nullable: true }, Field { name: 
"return_number", data_type: UInt8 }, Field { name: "number_of_returns", 
data_type: UInt8 }, Field { name: "is_synthetic", data_type: Boolean }, Field { 
name: "is_key_point", data_type: Boolean }, Field { name: "is_withheld", 
data_type: Boolean }, Field { name: "i [...]
    [
    ```
    
    See 
https://github.com/apache/sedona-db/actions/runs/22707309733/job/65836933921?pr=682
 for an example.
    
    ## Solution
    
    - The `round_robin_partitioning` test was flaky because it compared 
`Vec<RecordBatch>` with `assert_eq!`, which is order-sensitive. With 
round-robin partitioning, chunks are distributed across concurrent partitions 
whose output order is non-deterministic.
    - Fix by concatenating all batches into a single `RecordBatch` and sorting 
by the `geometry` column before comparing, making the assertion independent of 
batch boundaries and partition ordering.
    - Single-column sort suffices because all rows within each cluster in the 
test data are identical (see `generate.py`), so ties are indistinguishable.
---
 rust/sedona-pointcloud/src/las/opener.rs | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/rust/sedona-pointcloud/src/las/opener.rs 
b/rust/sedona-pointcloud/src/las/opener.rs
index 941788ef..7727369a 100644
--- a/rust/sedona-pointcloud/src/las/opener.rs
+++ b/rust/sedona-pointcloud/src/las/opener.rs
@@ -280,6 +280,26 @@ mod tests {
 
     #[tokio::test]
     async fn round_robin_partitioning() {
+        use datafusion_common::arrow::compute::{
+            concat_batches, sort_to_indices, take_record_batch,
+        };
+
+        /// Concatenate batches and sort by the column at `sort_col` index for
+        /// order-independent comparison.
+        fn concat_and_sort(
+            batches: &[arrow_array::RecordBatch],
+            sort_col: usize,
+        ) -> arrow_array::RecordBatch {
+            assert!(
+                !batches.is_empty(),
+                "expected at least one RecordBatch, but the query returned 
none"
+            );
+            let schema = batches[0].schema();
+            let combined = concat_batches(&schema, batches).unwrap();
+            let indices = sort_to_indices(combined.column(sort_col), None, 
None).unwrap();
+            take_record_batch(&combined, &indices).unwrap()
+        }
+
         // file with two clusters, one at 0.5 one at 1.0
         let path = "tests/data/large.laz";
 
@@ -303,6 +323,13 @@ mod tests {
             .collect()
             .await
             .unwrap();
-        assert_eq!(result1, result2);
+
+        // Compare content independent of batch boundaries and partition 
ordering.
+        // Sort by the geometry column (index 0) to get a deterministic row 
order.
+        // Single-column sort suffices because all rows within each cluster in 
the
+        // test data are identical (see generate.py), so ties are 
indistinguishable.
+        let batch1 = concat_and_sort(&result1, 0);
+        let batch2 = concat_and_sort(&result2, 0);
+        assert_eq!(batch1, batch2);
     }
 }

Reply via email to