This is an automated email from the ASF dual-hosted git repository.
kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 7eac903e feat(rust/sedona-raster-functions): add RS_GeoReference
function (#601)
7eac903e is described below
commit 7eac903e08c84ae9aa6396d2ff1e8101d1e6b8df
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Tue Feb 17 16:32:12 2026 +0800
feat(rust/sedona-raster-functions): add RS_GeoReference function (#601)
## Summary
- Add `RS_GeoReference(raster, format)` function that returns the
georeference metadata of a raster as a string in GDAL or ESRI format (default:
GDAL)
- Supports both 1-arg (default GDAL) and 2-arg (explicit format) variants
- Includes benchmark for both format variants
---
.../benches/native-raster-functions.rs | 8 +
rust/sedona-raster-functions/src/lib.rs | 1 +
rust/sedona-raster-functions/src/register.rs | 1 +
.../sedona-raster-functions/src/rs_georeference.rs | 329 +++++++++++++++++++++
4 files changed, 339 insertions(+)
diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs
b/rust/sedona-raster-functions/benches/native-raster-functions.rs
index b9215c12..5eead05d 100644
--- a/rust/sedona-raster-functions/benches/native-raster-functions.rs
+++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs
@@ -23,6 +23,14 @@ fn criterion_benchmark(c: &mut Criterion) {
benchmark::scalar(c, &f, "native-raster", "rs_convexhull", Raster(64, 64));
benchmark::scalar(c, &f, "native-raster", "rs_crs", Raster(64, 64));
benchmark::scalar(c, &f, "native-raster", "rs_envelope", Raster(64, 64));
+ benchmark::scalar(c, &f, "native-raster", "rs_georeference", Raster(64,
64));
+ benchmark::scalar(
+ c,
+ &f,
+ "native-raster",
+ "rs_georeference",
+ BenchmarkArgs::ArrayScalar(Raster(64, 64), String("ESRI".to_string())),
+ );
benchmark::scalar(c, &f, "native-raster", "rs_height", Raster(64, 64));
benchmark::scalar(c, &f, "native-raster", "rs_numbands", Raster(64, 64));
benchmark::scalar(
diff --git a/rust/sedona-raster-functions/src/lib.rs
b/rust/sedona-raster-functions/src/lib.rs
index c48dbb19..e7c63b03 100644
--- a/rust/sedona-raster-functions/src/lib.rs
+++ b/rust/sedona-raster-functions/src/lib.rs
@@ -20,6 +20,7 @@ pub mod register;
pub mod rs_convexhull;
pub mod rs_envelope;
pub mod rs_example;
+pub mod rs_georeference;
pub mod rs_geotransform;
pub mod rs_numbands;
pub mod rs_rastercoordinate;
diff --git a/rust/sedona-raster-functions/src/register.rs
b/rust/sedona-raster-functions/src/register.rs
index ab5f8990..6f5e2baa 100644
--- a/rust/sedona-raster-functions/src/register.rs
+++ b/rust/sedona-raster-functions/src/register.rs
@@ -41,6 +41,7 @@ pub fn default_function_set() -> FunctionSet {
crate::rs_convexhull::rs_convexhull_udf,
crate::rs_envelope::rs_envelope_udf,
crate::rs_example::rs_example_udf,
+ crate::rs_georeference::rs_georeference_udf,
crate::rs_geotransform::rs_rotation_udf,
crate::rs_geotransform::rs_scalex_udf,
crate::rs_geotransform::rs_scaley_udf,
diff --git a/rust/sedona-raster-functions/src/rs_georeference.rs
b/rust/sedona-raster-functions/src/rs_georeference.rs
new file mode 100644
index 00000000..38f48eef
--- /dev/null
+++ b/rust/sedona-raster-functions/src/rs_georeference.rs
@@ -0,0 +1,329 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::{sync::Arc, vec};
+
+use crate::executor::RasterExecutor;
+use arrow_array::builder::StringBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::Array;
+use arrow_schema::DataType;
+use datafusion_common::error::Result;
+use datafusion_common::DataFusionError;
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_raster::traits::RasterRef;
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+/// RS_GeoReference() scalar UDF implementation
+///
+/// Returns the georeference metadata of raster as a string in GDAL or ESRI
format
+pub fn rs_georeference_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "rs_georeference",
+ vec![
+ Arc::new(RsGeoReferenceOneArg {}),
+ Arc::new(RsGeoReferenceTwoArg {}),
+ ],
+ Volatility::Immutable,
+ Some(rs_georeference_doc()),
+ )
+}
+
+fn rs_georeference_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns the georeference metadata of raster as a string in GDAL or
ESRI format as commonly seen in a world file. Default is GDAL if not specified.
Both formats output six lines: scalex, skewy, skewx, scaley, upperleftx,
upperlefty. In GDAL format the upper-left coordinates refer to the corner of
the upper-left pixel, while in ESRI format they are shifted to the center of
the upper-left pixel.".to_string(),
+ "RS_GeoReference(raster: Raster, format: String = 'GDAL')".to_string(),
+ )
+ .with_argument("raster", "Raster: Input raster")
+ .with_argument("format", "String: Output format, either 'GDAL' (default)
or 'ESRI'. GDAL reports the upper-left corner of the upper-left pixel; ESRI
shifts the coordinates to the center of the upper-left pixel.")
+ .with_sql_example("SELECT RS_GeoReference(RS_Example())".to_string())
+ .build()
+}
+
+/// Format type for GeoReference output as commonly seen in a
+/// [world file](https://en.wikipedia.org/wiki/World_file).
+///
+/// Both formats output six lines: scalex, skewy, skewx, scaley, upperleftx,
upperlefty.
+/// The difference is how the upper-left coordinate is reported:
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum GeoReferenceFormat {
+ /// GDAL format: upperleftx and upperlefty are the coordinates of the
upper-left corner
+ /// of the upper-left pixel.
+ Gdal,
+ /// ESRI format: upperleftx and upperlefty are shifted to the center of
the upper-left
+ /// pixel, i.e. `upperleftx + scalex * 0.5` and `upperlefty + scaley *
0.5`.
+ Esri,
+}
+
+impl GeoReferenceFormat {
+ fn from_str(s: &str) -> Result<Self> {
+ match s.to_uppercase().as_str() {
+ "GDAL" => Ok(GeoReferenceFormat::Gdal),
+ "ESRI" => Ok(GeoReferenceFormat::Esri),
+ _ => Err(DataFusionError::Execution(format!(
+ "Invalid GeoReference format '{}'. Supported formats are
'GDAL' and 'ESRI'.",
+ s
+ ))),
+ }
+ }
+}
+
+/// Estimated bytes per georeference string for StringBuilder preallocation.
+/// Output is 6 lines of `{:.10}` formatted f64 values separated by newlines.
+/// Each value is at most ~20 bytes (e.g. "-12345678.1234567890"), giving
+/// 6 * 20 + 5 newlines = 125 bytes.
+const PREALLOC_BYTES_PER_GEOREF: usize = 125;
+
+/// One-argument kernel: RS_GeoReference(raster) - uses GDAL format by default
+#[derive(Debug)]
+struct RsGeoReferenceOneArg {}
+
+impl SedonaScalarKernel for RsGeoReferenceOneArg {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_raster()],
+ SedonaType::Arrow(DataType::Utf8),
+ );
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = RasterExecutor::new(arg_types, args);
+
+ let preallocate_bytes = PREALLOC_BYTES_PER_GEOREF *
executor.num_iterations();
+ let mut builder =
+ StringBuilder::with_capacity(executor.num_iterations(),
preallocate_bytes);
+
+ executor.execute_raster_void(|_i, raster_opt| {
+ format_georeference(raster_opt, GeoReferenceFormat::Gdal, &mut
builder)
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+/// Two-argument kernel: RS_GeoReference(raster, format)
+#[derive(Debug)]
+struct RsGeoReferenceTwoArg {}
+
+impl SedonaScalarKernel for RsGeoReferenceTwoArg {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_raster(), ArgMatcher::is_string()],
+ SedonaType::Arrow(DataType::Utf8),
+ );
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = RasterExecutor::new(arg_types, args);
+
+ // Expand the format parameter to an array
+ let format_array =
args[1].clone().into_array(executor.num_iterations())?;
+ let format_array = format_array.as_string::<i32>();
+
+ let preallocate_bytes = PREALLOC_BYTES_PER_GEOREF *
executor.num_iterations();
+ let mut builder =
+ StringBuilder::with_capacity(executor.num_iterations(),
preallocate_bytes);
+
+ executor.execute_raster_void(|i, raster_opt| {
+ if format_array.is_null(i) {
+ builder.append_null();
+ return Ok(());
+ }
+ let format = GeoReferenceFormat::from_str(format_array.value(i))?;
+ format_georeference(raster_opt, format, &mut builder)
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+/// Format the georeference metadata for a raster
+fn format_georeference(
+ raster_opt: Option<&sedona_raster::array::RasterRefImpl<'_>>,
+ format: GeoReferenceFormat,
+ builder: &mut StringBuilder,
+) -> Result<()> {
+ match raster_opt {
+ None => builder.append_null(),
+ Some(raster) => {
+ let metadata = raster.metadata();
+ let scale_x = metadata.scale_x();
+ let scale_y = metadata.scale_y();
+ let skew_x = metadata.skew_x();
+ let skew_y = metadata.skew_y();
+ let upper_left_x = metadata.upper_left_x();
+ let upper_left_y = metadata.upper_left_y();
+
+ let georeference = match format {
+ GeoReferenceFormat::Gdal => {
+ format!(
+ "{:.10}\n{:.10}\n{:.10}\n{:.10}\n{:.10}\n{:.10}",
+ scale_x, skew_y, skew_x, scale_y, upper_left_x,
upper_left_y
+ )
+ }
+ GeoReferenceFormat::Esri => {
+ let esri_upper_left_x = upper_left_x + scale_x * 0.5;
+ let esri_upper_left_y = upper_left_y + scale_y * 0.5;
+ format!(
+ "{:.10}\n{:.10}\n{:.10}\n{:.10}\n{:.10}\n{:.10}",
+ scale_x, skew_y, skew_x, scale_y, esri_upper_left_x,
esri_upper_left_y
+ )
+ }
+ };
+
+ builder.append_value(georeference);
+ }
+ }
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow_array::{Array, StringArray};
+ use datafusion_common::ScalarValue;
+ use datafusion_expr::ScalarUDF;
+ use sedona_schema::datatypes::RASTER;
+ use sedona_testing::compare::assert_array_equal;
+ use sedona_testing::rasters::generate_test_rasters;
+ use sedona_testing::testers::ScalarUdfTester;
+
+ #[test]
+ fn udf_metadata() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ assert_eq!(udf.name(), "rs_georeference");
+ assert!(udf.documentation().is_some());
+ }
+
+ #[test]
+ fn udf_georeference_gdal_default() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER]);
+
+ tester.assert_return_type(DataType::Utf8);
+
+ // Test with rasters (one-arg, default GDAL)
+ let rasters = generate_test_rasters(3, Some(1)).unwrap();
+ let result = tester.invoke_array(Arc::new(rasters.clone())).unwrap();
+
+ let expected: Arc<dyn Array> = Arc::new(StringArray::from(vec![
+
Some("0.1000000000\n0.0000000000\n0.0000000000\n-0.2000000000\n1.0000000000\n2.0000000000"),
+ None,
+
Some("0.2000000000\n0.0800000000\n0.0600000000\n-0.4000000000\n3.0000000000\n4.0000000000"),
+ ]));
+ assert_array_equal(&result, &expected);
+
+ // Test with explicit "GDAL" or "gdal" (two-arg)
+ for format in ["GDAL", "gdal"] {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER,
SedonaType::Arrow(DataType::Utf8)]);
+ let result = tester
+ .invoke_array_scalar(Arc::new(rasters.clone()), format)
+ .unwrap();
+ assert_array_equal(&result, &expected);
+ }
+ }
+
+ #[test]
+ fn udf_georeference_esri() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER,
SedonaType::Arrow(DataType::Utf8)]);
+
+ let expected: Arc<dyn Array> = Arc::new(StringArray::from(vec![
+
Some("0.1000000000\n0.0000000000\n0.0000000000\n-0.2000000000\n1.0500000000\n1.9000000000"),
+ None,
+
Some("0.2000000000\n0.0800000000\n0.0600000000\n-0.4000000000\n3.1000000000\n3.8000000000"),
+ ]));
+
+ for format in ["ESRI", "esri"] {
+ let rasters = generate_test_rasters(3, Some(1)).unwrap();
+ let result = tester
+ .invoke_array_scalar(Arc::new(rasters), format)
+ .unwrap();
+ assert_array_equal(&result, &expected);
+ }
+ }
+
+ #[test]
+ fn udf_georeference_null_scalar() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER]);
+
+ // Test with null scalar
+ let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+ tester.assert_scalar_result_equals(result, ScalarValue::Utf8(None));
+ }
+
+ #[test]
+ fn udf_georeference_with_array_format() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER,
SedonaType::Arrow(DataType::Utf8)]);
+
+ let rasters = generate_test_rasters(4, Some(1)).unwrap();
+ let formats = Arc::new(StringArray::from(vec![
+ Some("GDAL"), // explicit GDAL
+ Some("ESRI"), // won't matter since raster 1 is null
+ None, // null format -> NULL output
+ Some("ESRI"), // explicit ESRI
+ ]));
+
+ let result = tester
+ .invoke_arrays(vec![Arc::new(rasters), formats])
+ .unwrap();
+ let expected: Arc<dyn Array> = Arc::new(StringArray::from(vec![
+ // explicit GDAL
+
Some("0.1000000000\n0.0000000000\n0.0000000000\n-0.2000000000\n1.0000000000\n2.0000000000"),
+ // null raster
+ None,
+ // null format -> NULL output
+ None,
+ // explicit ESRI
+
Some("0.3000000000\n0.1200000000\n0.0900000000\n-0.6000000000\n4.1500000000\n4.7000000000"),
+ ]));
+ assert_array_equal(&result, &expected);
+ }
+
+ #[test]
+ fn udf_georeference_invalid_format() {
+ let udf: ScalarUDF = rs_georeference_udf().into();
+ let tester = ScalarUdfTester::new(udf, vec![RASTER,
SedonaType::Arrow(DataType::Utf8)]);
+
+ let rasters = generate_test_rasters(3, Some(1)).unwrap();
+ let result = tester.invoke_array_scalar(Arc::new(rasters), "INVALID");
+
+ assert!(result.is_err());
+ let err_msg = result.unwrap_err().to_string();
+ assert!(
+ err_msg.contains("Invalid GeoReference format"),
+ "Expected error about invalid format, got: {}",
+ err_msg
+ );
+ }
+}