This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 39c8d905 chore: Use mixed-line-ending pre-commit to enforce LF line 
ending (#621)
39c8d905 is described below

commit 39c8d905cd358cfdcb33df4003a7154cc58c7723
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Sun Feb 15 12:08:32 2026 +0900

    chore: Use mixed-line-ending pre-commit to enforce LF line ending (#621)
---
 .pre-commit-config.yaml              |   2 +
 rust/sedona-functions/src/st_dump.rs | 820 +++++++++++++++++------------------
 2 files changed, 412 insertions(+), 410 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84422c3f..91e26f8e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,6 +28,8 @@ repos:
         # R snapshot test files may have arbitrary file endings based on test 
results
         exclude: "_snaps"
       - id: fix-byte-order-marker
+      - id: mixed-line-ending
+        args: [--fix=lf]
       - id: trailing-whitespace
 
   - repo: https://github.com/codespell-project/codespell
diff --git a/rust/sedona-functions/src/st_dump.rs 
b/rust/sedona-functions/src/st_dump.rs
index aafbc4f6..537211d5 100644
--- a/rust/sedona-functions/src/st_dump.rs
+++ b/rust/sedona-functions/src/st_dump.rs
@@ -1,410 +1,410 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-use arrow_array::{
-    builder::{BinaryBuilder, NullBufferBuilder, OffsetBufferBuilder, 
UInt32Builder},
-    ListArray, StructArray,
-};
-use arrow_schema::{DataType, Field, Fields};
-use datafusion_common::error::Result;
-use datafusion_expr::{
-    scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, 
Volatility,
-};
-use geo_traits::{
-    GeometryCollectionTrait, GeometryTrait, GeometryType, 
MultiLineStringTrait, MultiPointTrait,
-    MultiPolygonTrait,
-};
-use sedona_common::sedona_internal_err;
-use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
-use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
-use sedona_schema::{
-    datatypes::{SedonaType, WKB_GEOMETRY},
-    matchers::ArgMatcher,
-};
-use std::{io::Write, sync::Arc};
-
-use crate::executor::WkbExecutor;
-
-/// ST_Dump() scalar UDF
-///
-/// Native implementation to get all the points of a geometry as MULTIPOINT
-pub fn st_dump_udf() -> SedonaScalarUDF {
-    SedonaScalarUDF::new(
-        "st_dump",
-        vec![Arc::new(STDump)],
-        Volatility::Immutable,
-        Some(st_dump_doc()),
-    )
-}
-
-fn st_dump_doc() -> Documentation {
-    Documentation::builder(
-        DOC_SECTION_OTHER,
-        "Extracts the components of a geometry.",
-        "ST_Dump (geom: Geometry)",
-    )
-    .with_argument("geom", "geometry: Input geometry")
-    .with_sql_example("SELECT ST_Dump(ST_GeomFromWKT('MULTIPOINT (0 1, 2 3, 4 
5)'))")
-    .build()
-}
-
-#[derive(Debug)]
-struct STDump;
-
-// A builder for a list of the structs
-struct STDumpBuilder {
-    path_array_builder: UInt32Builder,
-    path_array_offsets_builder: OffsetBufferBuilder<i32>,
-    geom_builder: BinaryBuilder,
-    struct_offsets_builder: OffsetBufferBuilder<i32>,
-    null_builder: NullBufferBuilder,
-    parent_path: Vec<u32>,
-}
-
-impl STDumpBuilder {
-    fn new(num_iter: usize) -> Self {
-        let path_array_builder = UInt32Builder::with_capacity(num_iter);
-        let path_array_offsets_builder = OffsetBufferBuilder::new(num_iter);
-        let geom_builder =
-            BinaryBuilder::with_capacity(num_iter, WKB_MIN_PROBABLE_BYTES * 
num_iter);
-        let struct_offsets_builder = OffsetBufferBuilder::new(num_iter);
-        let null_builder = NullBufferBuilder::new(num_iter);
-
-        Self {
-            path_array_builder,
-            path_array_offsets_builder,
-            geom_builder,
-            struct_offsets_builder,
-            null_builder,
-            parent_path: Vec::new(), // Reusable buffer to avoid allocation 
per row
-        }
-    }
-
-    // This appends both path and geom at once.
-    fn append_single_struct(&mut self, cur_index: Option<u32>, wkb: &[u8]) -> 
Result<()> {
-        self.path_array_builder.append_slice(&self.parent_path);
-        if let Some(cur_index) = cur_index {
-            self.path_array_builder.append_value(cur_index);
-            self.path_array_offsets_builder
-                .push_length(self.parent_path.len() + 1);
-        } else {
-            self.path_array_offsets_builder
-                .push_length(self.parent_path.len());
-        }
-
-        self.geom_builder.write_all(wkb)?;
-        self.geom_builder.append_value([]);
-
-        Ok(())
-    }
-
-    fn append_structs(&mut self, wkb: &wkb::reader::Wkb<'_>) -> Result<i32> {
-        match wkb.as_type() {
-            GeometryType::Point(point) => {
-                self.append_single_struct(None, point.buf())?;
-                Ok(1)
-            }
-            GeometryType::LineString(line_string) => {
-                self.append_single_struct(None, line_string.buf())?;
-                Ok(1)
-            }
-            GeometryType::Polygon(polygon) => {
-                self.append_single_struct(None, polygon.buf())?;
-                Ok(1)
-            }
-            GeometryType::MultiPoint(multi_point) => {
-                for (index, point) in multi_point.points().enumerate() {
-                    self.append_single_struct(Some((index + 1) as _), 
point.buf())?;
-                }
-                Ok(multi_point.num_points() as _)
-            }
-            GeometryType::MultiLineString(multi_line_string) => {
-                for (index, line_string) in 
multi_line_string.line_strings().enumerate() {
-                    self.append_single_struct(Some((index + 1) as _), 
line_string.buf())?;
-                }
-                Ok(multi_line_string.num_line_strings() as _)
-            }
-            GeometryType::MultiPolygon(multi_polygon) => {
-                for (index, polygon) in multi_polygon.polygons().enumerate() {
-                    self.append_single_struct(Some((index + 1) as _), 
polygon.buf())?;
-                }
-                Ok(multi_polygon.num_polygons() as _)
-            }
-            GeometryType::GeometryCollection(geometry_collection) => {
-                let mut num_geometries: i32 = 0;
-
-                self.parent_path.push(0); // add an index for the next nested 
level
-
-                for geometry in geometry_collection.geometries() {
-                    // increment the index
-                    if let Some(index) = self.parent_path.last_mut() {
-                        *index += 1;
-                    }
-                    num_geometries += self.append_structs(geometry)?;
-                }
-
-                self.parent_path.truncate(self.parent_path.len() - 1); // 
clear the index before returning to the upper level
-
-                Ok(num_geometries)
-            }
-            _ => sedona_internal_err!("Invalid geometry type"),
-        }
-    }
-
-    fn append(&mut self, wkb: &wkb::reader::Wkb<'_>) -> Result<()> {
-        self.parent_path.clear();
-
-        let num_geometries = self.append_structs(wkb)?;
-        self.null_builder.append(true);
-        self.struct_offsets_builder
-            .push_length(num_geometries as usize);
-        Ok(())
-    }
-
-    fn append_null(&mut self) {
-        self.path_array_offsets_builder.push_length(0);
-        self.geom_builder.append_null();
-        self.struct_offsets_builder.push_length(1);
-        self.null_builder.append(false);
-    }
-
-    fn finish(mut self) -> ListArray {
-        let path_array = Arc::new(self.path_array_builder.finish());
-        let path_offsets = self.path_array_offsets_builder.finish();
-        let geom_array = self.geom_builder.finish();
-
-        let path_field = Arc::new(Field::new("item", DataType::UInt32, true));
-        let path_list = ListArray::new(path_field, path_offsets, path_array, 
None);
-
-        let fields = Fields::from(vec![
-            Field::new(
-                "path",
-                DataType::List(Arc::new(Field::new("item", DataType::UInt32, 
true))),
-                true,
-            ),
-            WKB_GEOMETRY.to_storage_field("geom", true).unwrap(),
-        ]);
-        let struct_array = StructArray::try_new(
-            fields.clone(),
-            vec![Arc::new(path_list), Arc::new(geom_array)],
-            None,
-        )
-        .unwrap();
-        let struct_offsets = self.struct_offsets_builder.finish();
-        let struct_field = Arc::new(Field::new("item", 
DataType::Struct(fields), true));
-        let nulls = self.null_builder.finish();
-        ListArray::new(struct_field, struct_offsets, Arc::new(struct_array), 
nulls)
-    }
-}
-
-impl SedonaScalarKernel for STDump {
-    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
-        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
geometry_dump_type());
-        matcher.match_args(args)
-    }
-
-    fn invoke_batch(
-        &self,
-        arg_types: &[SedonaType],
-        args: &[ColumnarValue],
-    ) -> Result<ColumnarValue> {
-        let executor = WkbExecutor::new(arg_types, args);
-
-        let mut builder = STDumpBuilder::new(executor.num_iterations());
-
-        executor.execute_wkb_void(|maybe_wkb| {
-            if let Some(wkb) = maybe_wkb {
-                builder.append(&wkb)?;
-            } else {
-                builder.append_null();
-            }
-
-            Ok(())
-        })?;
-
-        executor.finish(Arc::new(builder.finish()))
-    }
-}
-
-fn geometry_dump_fields() -> Fields {
-    let path = Field::new(
-        "path",
-        DataType::List(Field::new("item", DataType::UInt32, true).into()),
-        true,
-    );
-    let geom = WKB_GEOMETRY.to_storage_field("geom", true).unwrap();
-    vec![path, geom].into()
-}
-
-fn geometry_dump_type() -> SedonaType {
-    let fields = geometry_dump_fields();
-    let struct_type = DataType::Struct(fields);
-
-    SedonaType::Arrow(DataType::List(Field::new("item", struct_type, 
true).into()))
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow_array::{Array, ArrayRef, ListArray, StructArray, UInt32Array};
-    use datafusion_expr::ScalarUDF;
-    use rstest::rstest;
-    use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
-    use sedona_testing::{
-        compare::assert_array_equal, create::create_array, 
testers::ScalarUdfTester,
-    };
-
-    use super::*;
-
-    #[test]
-    fn udf_metadata() {
-        let st_dump_udf: ScalarUDF = st_dump_udf().into();
-        assert_eq!(st_dump_udf.name(), "st_dump");
-        assert!(st_dump_udf.documentation().is_some());
-    }
-
-    #[rstest]
-    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
-        let tester = ScalarUdfTester::new(st_dump_udf().into(), 
vec![sedona_type.clone()]);
-
-        let input = create_array(
-            &[
-                Some("POINT (1 2)"),
-                Some("LINESTRING (1 1, 2 2)"),
-                Some("POLYGON ((1 1, 2 2, 2 1, 1 1))"),
-                Some("MULTIPOINT (1 1, 2 2)"),
-                Some("MULTILINESTRING ((1 1, 2 2), EMPTY, (3 3, 4 4))"),
-                Some("MULTIPOLYGON (((1 1, 2 2, 2 1, 1 1)), EMPTY, ((3 3, 4 4, 
4 3, 3 3)))"),
-                Some("GEOMETRYCOLLECTION (POINT (1 2), MULTILINESTRING ((1 1, 
2 2), EMPTY, (3 3, 4 4)), LINESTRING (1 1, 2 2))"),
-                Some("GEOMETRYCOLLECTION (POINT (1 2), GEOMETRYCOLLECTION 
(MULTILINESTRING ((1 1, 2 2), EMPTY, (3 3, 4 4)), LINESTRING (1 1, 2 2)))"),
-            ],
-            &sedona_type,
-        );
-        let result = tester.invoke_array(input).unwrap();
-        assert_dump_row(&result, 0, &[(&[], Some("POINT (1 2)"))]);
-        assert_dump_row(&result, 1, &[(&[], Some("LINESTRING (1 1, 2 2)"))]);
-        assert_dump_row(&result, 2, &[(&[], Some("POLYGON ((1 1, 2 2, 2 1, 1 
1))"))]);
-        assert_dump_row(
-            &result,
-            3,
-            &[(&[1], Some("POINT (1 1)")), (&[2], Some("POINT (2 2)"))],
-        );
-        assert_dump_row(
-            &result,
-            4,
-            &[
-                (&[1], Some("LINESTRING (1 1, 2 2)")),
-                (&[2], Some("LINESTRING EMPTY")),
-                (&[3], Some("LINESTRING (3 3, 4 4)")),
-            ],
-        );
-        assert_dump_row(
-            &result,
-            5,
-            &[
-                (&[1], Some("POLYGON ((1 1, 2 2, 2 1, 1 1))")),
-                (&[2], Some("POLYGON EMPTY")),
-                (&[3], Some("POLYGON ((3 3, 4 4, 4 3, 3 3)))")),
-            ],
-        );
-        assert_dump_row(
-            &result,
-            6,
-            &[
-                (&[1], Some("POINT (1 2)")),
-                (&[2, 1], Some("LINESTRING (1 1, 2 2)")),
-                (&[2, 2], Some("LINESTRING EMPTY")),
-                (&[2, 3], Some("LINESTRING (3 3, 4 4)")),
-                (&[3], Some("LINESTRING (1 1, 2 2)")),
-            ],
-        );
-        assert_dump_row(
-            &result,
-            7,
-            &[
-                (&[1], Some("POINT (1 2)")),
-                (&[2, 1, 1], Some("LINESTRING (1 1, 2 2)")),
-                (&[2, 1, 2], Some("LINESTRING EMPTY")),
-                (&[2, 1, 3], Some("LINESTRING (3 3, 4 4)")),
-                (&[2, 2], Some("LINESTRING (1 1, 2 2)")),
-            ],
-        );
-
-        let null_input = create_array(&[None], &sedona_type);
-        let result = tester.invoke_array(null_input).unwrap();
-        assert_dump_row_null(&result, 0);
-    }
-
-    fn assert_dump_row(result: &ArrayRef, row: usize, expected: &[(&[u32], 
Option<&str>)]) {
-        let list_array = result
-            .as_ref()
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .expect("result should be a ListArray");
-        assert!(
-            !list_array.is_null(row),
-            "row {row} should not be null in dump result"
-        );
-        let dumped = list_array.value(row);
-        let dumped = dumped
-            .as_ref()
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .expect("list elements should be StructArray");
-        assert_eq!(dumped.len(), expected.len());
-
-        let path_array = dumped
-            .column(0)
-            .as_ref()
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .expect("path should be a ListArray");
-        assert_eq!(path_array.len(), expected.len());
-        for (i, (expected_path, _)) in expected.iter().enumerate() {
-            let path_array_value = path_array.value(i);
-            let path_values = path_array_value
-                .as_ref()
-                .as_any()
-                .downcast_ref::<UInt32Array>()
-                .expect("path values should be UInt32Array");
-            assert_eq!(
-                path_values.len(),
-                expected_path.len(),
-                "unexpected path length at index {i}"
-            );
-            for (j, expected_value) in expected_path.iter().enumerate() {
-                assert_eq!(
-                    path_values.value(j),
-                    *expected_value,
-                    "unexpected path value at index {i}:{j}"
-                );
-            }
-        }
-
-        let expected_geom_values: Vec<Option<&str>> =
-            expected.iter().map(|(_, geom)| *geom).collect();
-        let expected_geom_array = create_array(&expected_geom_values, 
&WKB_GEOMETRY);
-        assert_array_equal(dumped.column(1), &expected_geom_array);
-    }
-
-    fn assert_dump_row_null(result: &ArrayRef, row: usize) {
-        let list_array = result
-            .as_ref()
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .expect("result should be a ListArray");
-        assert!(list_array.is_null(row), "row {row} should be null");
-    }
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_array::{
+    builder::{BinaryBuilder, NullBufferBuilder, OffsetBufferBuilder, 
UInt32Builder},
+    ListArray, StructArray,
+};
+use arrow_schema::{DataType, Field, Fields};
+use datafusion_common::error::Result;
+use datafusion_expr::{
+    scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, 
Volatility,
+};
+use geo_traits::{
+    GeometryCollectionTrait, GeometryTrait, GeometryType, 
MultiLineStringTrait, MultiPointTrait,
+    MultiPolygonTrait,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+use std::{io::Write, sync::Arc};
+
+use crate::executor::WkbExecutor;
+
+/// ST_Dump() scalar UDF
+///
+/// Native implementation to get all the points of a geometry as MULTIPOINT
+pub fn st_dump_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "st_dump",
+        vec![Arc::new(STDump)],
+        Volatility::Immutable,
+        Some(st_dump_doc()),
+    )
+}
+
+fn st_dump_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Extracts the components of a geometry.",
+        "ST_Dump (geom: Geometry)",
+    )
+    .with_argument("geom", "geometry: Input geometry")
+    .with_sql_example("SELECT ST_Dump(ST_GeomFromWKT('MULTIPOINT (0 1, 2 3, 4 
5)'))")
+    .build()
+}
+
+#[derive(Debug)]
+struct STDump;
+
+// A builder for a list of the structs
+struct STDumpBuilder {
+    path_array_builder: UInt32Builder,
+    path_array_offsets_builder: OffsetBufferBuilder<i32>,
+    geom_builder: BinaryBuilder,
+    struct_offsets_builder: OffsetBufferBuilder<i32>,
+    null_builder: NullBufferBuilder,
+    parent_path: Vec<u32>,
+}
+
+impl STDumpBuilder {
+    fn new(num_iter: usize) -> Self {
+        let path_array_builder = UInt32Builder::with_capacity(num_iter);
+        let path_array_offsets_builder = OffsetBufferBuilder::new(num_iter);
+        let geom_builder =
+            BinaryBuilder::with_capacity(num_iter, WKB_MIN_PROBABLE_BYTES * 
num_iter);
+        let struct_offsets_builder = OffsetBufferBuilder::new(num_iter);
+        let null_builder = NullBufferBuilder::new(num_iter);
+
+        Self {
+            path_array_builder,
+            path_array_offsets_builder,
+            geom_builder,
+            struct_offsets_builder,
+            null_builder,
+            parent_path: Vec::new(), // Reusable buffer to avoid allocation 
per row
+        }
+    }
+
+    // This appends both path and geom at once.
+    fn append_single_struct(&mut self, cur_index: Option<u32>, wkb: &[u8]) -> 
Result<()> {
+        self.path_array_builder.append_slice(&self.parent_path);
+        if let Some(cur_index) = cur_index {
+            self.path_array_builder.append_value(cur_index);
+            self.path_array_offsets_builder
+                .push_length(self.parent_path.len() + 1);
+        } else {
+            self.path_array_offsets_builder
+                .push_length(self.parent_path.len());
+        }
+
+        self.geom_builder.write_all(wkb)?;
+        self.geom_builder.append_value([]);
+
+        Ok(())
+    }
+
+    fn append_structs(&mut self, wkb: &wkb::reader::Wkb<'_>) -> Result<i32> {
+        match wkb.as_type() {
+            GeometryType::Point(point) => {
+                self.append_single_struct(None, point.buf())?;
+                Ok(1)
+            }
+            GeometryType::LineString(line_string) => {
+                self.append_single_struct(None, line_string.buf())?;
+                Ok(1)
+            }
+            GeometryType::Polygon(polygon) => {
+                self.append_single_struct(None, polygon.buf())?;
+                Ok(1)
+            }
+            GeometryType::MultiPoint(multi_point) => {
+                for (index, point) in multi_point.points().enumerate() {
+                    self.append_single_struct(Some((index + 1) as _), 
point.buf())?;
+                }
+                Ok(multi_point.num_points() as _)
+            }
+            GeometryType::MultiLineString(multi_line_string) => {
+                for (index, line_string) in 
multi_line_string.line_strings().enumerate() {
+                    self.append_single_struct(Some((index + 1) as _), 
line_string.buf())?;
+                }
+                Ok(multi_line_string.num_line_strings() as _)
+            }
+            GeometryType::MultiPolygon(multi_polygon) => {
+                for (index, polygon) in multi_polygon.polygons().enumerate() {
+                    self.append_single_struct(Some((index + 1) as _), 
polygon.buf())?;
+                }
+                Ok(multi_polygon.num_polygons() as _)
+            }
+            GeometryType::GeometryCollection(geometry_collection) => {
+                let mut num_geometries: i32 = 0;
+
+                self.parent_path.push(0); // add an index for the next nested 
level
+
+                for geometry in geometry_collection.geometries() {
+                    // increment the index
+                    if let Some(index) = self.parent_path.last_mut() {
+                        *index += 1;
+                    }
+                    num_geometries += self.append_structs(geometry)?;
+                }
+
+                self.parent_path.truncate(self.parent_path.len() - 1); // 
clear the index before returning to the upper level
+
+                Ok(num_geometries)
+            }
+            _ => sedona_internal_err!("Invalid geometry type"),
+        }
+    }
+
+    fn append(&mut self, wkb: &wkb::reader::Wkb<'_>) -> Result<()> {
+        self.parent_path.clear();
+
+        let num_geometries = self.append_structs(wkb)?;
+        self.null_builder.append(true);
+        self.struct_offsets_builder
+            .push_length(num_geometries as usize);
+        Ok(())
+    }
+
+    fn append_null(&mut self) {
+        self.path_array_offsets_builder.push_length(0);
+        self.geom_builder.append_null();
+        self.struct_offsets_builder.push_length(1);
+        self.null_builder.append(false);
+    }
+
+    fn finish(mut self) -> ListArray {
+        let path_array = Arc::new(self.path_array_builder.finish());
+        let path_offsets = self.path_array_offsets_builder.finish();
+        let geom_array = self.geom_builder.finish();
+
+        let path_field = Arc::new(Field::new("item", DataType::UInt32, true));
+        let path_list = ListArray::new(path_field, path_offsets, path_array, 
None);
+
+        let fields = Fields::from(vec![
+            Field::new(
+                "path",
+                DataType::List(Arc::new(Field::new("item", DataType::UInt32, 
true))),
+                true,
+            ),
+            WKB_GEOMETRY.to_storage_field("geom", true).unwrap(),
+        ]);
+        let struct_array = StructArray::try_new(
+            fields.clone(),
+            vec![Arc::new(path_list), Arc::new(geom_array)],
+            None,
+        )
+        .unwrap();
+        let struct_offsets = self.struct_offsets_builder.finish();
+        let struct_field = Arc::new(Field::new("item", 
DataType::Struct(fields), true));
+        let nulls = self.null_builder.finish();
+        ListArray::new(struct_field, struct_offsets, Arc::new(struct_array), 
nulls)
+    }
+}
+
+impl SedonaScalarKernel for STDump {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
geometry_dump_type());
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+
+        let mut builder = STDumpBuilder::new(executor.num_iterations());
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            if let Some(wkb) = maybe_wkb {
+                builder.append(&wkb)?;
+            } else {
+                builder.append_null();
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn geometry_dump_fields() -> Fields {
+    let path = Field::new(
+        "path",
+        DataType::List(Field::new("item", DataType::UInt32, true).into()),
+        true,
+    );
+    let geom = WKB_GEOMETRY.to_storage_field("geom", true).unwrap();
+    vec![path, geom].into()
+}
+
+fn geometry_dump_type() -> SedonaType {
+    let fields = geometry_dump_fields();
+    let struct_type = DataType::Struct(fields);
+
+    SedonaType::Arrow(DataType::List(Field::new("item", struct_type, 
true).into()))
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::{Array, ArrayRef, ListArray, StructArray, UInt32Array};
+    use datafusion_expr::ScalarUDF;
+    use rstest::rstest;
+    use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+    use sedona_testing::{
+        compare::assert_array_equal, create::create_array, 
testers::ScalarUdfTester,
+    };
+
+    use super::*;
+
+    #[test]
+    fn udf_metadata() {
+        let st_dump_udf: ScalarUDF = st_dump_udf().into();
+        assert_eq!(st_dump_udf.name(), "st_dump");
+        assert!(st_dump_udf.documentation().is_some());
+    }
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        let tester = ScalarUdfTester::new(st_dump_udf().into(), 
vec![sedona_type.clone()]);
+
+        let input = create_array(
+            &[
+                Some("POINT (1 2)"),
+                Some("LINESTRING (1 1, 2 2)"),
+                Some("POLYGON ((1 1, 2 2, 2 1, 1 1))"),
+                Some("MULTIPOINT (1 1, 2 2)"),
+                Some("MULTILINESTRING ((1 1, 2 2), EMPTY, (3 3, 4 4))"),
+                Some("MULTIPOLYGON (((1 1, 2 2, 2 1, 1 1)), EMPTY, ((3 3, 4 4, 
4 3, 3 3)))"),
+                Some("GEOMETRYCOLLECTION (POINT (1 2), MULTILINESTRING ((1 1, 
2 2), EMPTY, (3 3, 4 4)), LINESTRING (1 1, 2 2))"),
+                Some("GEOMETRYCOLLECTION (POINT (1 2), GEOMETRYCOLLECTION 
(MULTILINESTRING ((1 1, 2 2), EMPTY, (3 3, 4 4)), LINESTRING (1 1, 2 2)))"),
+            ],
+            &sedona_type,
+        );
+        let result = tester.invoke_array(input).unwrap();
+        assert_dump_row(&result, 0, &[(&[], Some("POINT (1 2)"))]);
+        assert_dump_row(&result, 1, &[(&[], Some("LINESTRING (1 1, 2 2)"))]);
+        assert_dump_row(&result, 2, &[(&[], Some("POLYGON ((1 1, 2 2, 2 1, 1 
1))"))]);
+        assert_dump_row(
+            &result,
+            3,
+            &[(&[1], Some("POINT (1 1)")), (&[2], Some("POINT (2 2)"))],
+        );
+        assert_dump_row(
+            &result,
+            4,
+            &[
+                (&[1], Some("LINESTRING (1 1, 2 2)")),
+                (&[2], Some("LINESTRING EMPTY")),
+                (&[3], Some("LINESTRING (3 3, 4 4)")),
+            ],
+        );
+        assert_dump_row(
+            &result,
+            5,
+            &[
+                (&[1], Some("POLYGON ((1 1, 2 2, 2 1, 1 1))")),
+                (&[2], Some("POLYGON EMPTY")),
+                (&[3], Some("POLYGON ((3 3, 4 4, 4 3, 3 3)))")),
+            ],
+        );
+        assert_dump_row(
+            &result,
+            6,
+            &[
+                (&[1], Some("POINT (1 2)")),
+                (&[2, 1], Some("LINESTRING (1 1, 2 2)")),
+                (&[2, 2], Some("LINESTRING EMPTY")),
+                (&[2, 3], Some("LINESTRING (3 3, 4 4)")),
+                (&[3], Some("LINESTRING (1 1, 2 2)")),
+            ],
+        );
+        assert_dump_row(
+            &result,
+            7,
+            &[
+                (&[1], Some("POINT (1 2)")),
+                (&[2, 1, 1], Some("LINESTRING (1 1, 2 2)")),
+                (&[2, 1, 2], Some("LINESTRING EMPTY")),
+                (&[2, 1, 3], Some("LINESTRING (3 3, 4 4)")),
+                (&[2, 2], Some("LINESTRING (1 1, 2 2)")),
+            ],
+        );
+
+        let null_input = create_array(&[None], &sedona_type);
+        let result = tester.invoke_array(null_input).unwrap();
+        assert_dump_row_null(&result, 0);
+    }
+
+    fn assert_dump_row(result: &ArrayRef, row: usize, expected: &[(&[u32], 
Option<&str>)]) {
+        let list_array = result
+            .as_ref()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("result should be a ListArray");
+        assert!(
+            !list_array.is_null(row),
+            "row {row} should not be null in dump result"
+        );
+        let dumped = list_array.value(row);
+        let dumped = dumped
+            .as_ref()
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .expect("list elements should be StructArray");
+        assert_eq!(dumped.len(), expected.len());
+
+        let path_array = dumped
+            .column(0)
+            .as_ref()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("path should be a ListArray");
+        assert_eq!(path_array.len(), expected.len());
+        for (i, (expected_path, _)) in expected.iter().enumerate() {
+            let path_array_value = path_array.value(i);
+            let path_values = path_array_value
+                .as_ref()
+                .as_any()
+                .downcast_ref::<UInt32Array>()
+                .expect("path values should be UInt32Array");
+            assert_eq!(
+                path_values.len(),
+                expected_path.len(),
+                "unexpected path length at index {i}"
+            );
+            for (j, expected_value) in expected_path.iter().enumerate() {
+                assert_eq!(
+                    path_values.value(j),
+                    *expected_value,
+                    "unexpected path value at index {i}:{j}"
+                );
+            }
+        }
+
+        let expected_geom_values: Vec<Option<&str>> =
+            expected.iter().map(|(_, geom)| *geom).collect();
+        let expected_geom_array = create_array(&expected_geom_values, 
&WKB_GEOMETRY);
+        assert_array_equal(dumped.column(1), &expected_geom_array);
+    }
+
+    fn assert_dump_row_null(result: &ArrayRef, row: usize) {
+        let list_array = result
+            .as_ref()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("result should be a ListArray");
+        assert!(list_array.is_null(row), "row {row} should be null");
+    }
+}

Reply via email to