Copilot commented on code in PR #630: URL: https://github.com/apache/sedona-db/pull/630#discussion_r2817610729
########## rust/sedona-raster-functions/src/rs_setsrid.rs: ########## @@ -0,0 +1,722 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, StringViewArray, StructArray}; +use arrow_buffer::NullBuffer; +use arrow_schema::DataType; +use datafusion_common::cast::{as_int64_array, as_string_view_array}; +use datafusion_common::error::Result; +use datafusion_common::{exec_err, DataFusionError, ScalarValue}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::transform::CrsEngine; +use sedona_schema::crs::{CachedCrsNormalization, CachedSRIDToCrs}; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::matchers::ArgMatcher; +use sedona_schema::raster::{raster_indices, RasterSchema}; + +/// RS_SetSRID() scalar UDF implementation +/// +/// An implementation of RS_SetSRID providing a scalar function implementation +/// based on an optional [CrsEngine]. If provided, it will be used to validate +/// the provided SRID (otherwise, all SRID input is applied without validation). +pub fn rs_set_srid_with_engine_udf( + engine: Option<Arc<dyn CrsEngine + Send + Sync>>, +) -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_setsrid", + vec![Arc::new(RsSetSrid { engine })], + Volatility::Immutable, + Some(rs_set_srid_doc()), + ) +} + +/// RS_SetCRS() scalar UDF implementation +/// +/// An implementation of RS_SetCRS providing a scalar function implementation +/// based on an optional [CrsEngine]. If provided, it will be used to validate +/// the provided CRS (otherwise, all CRS input is applied without validation). +pub fn rs_set_crs_with_engine_udf( + engine: Option<Arc<dyn CrsEngine + Send + Sync>>, +) -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_setcrs", + vec![Arc::new(RsSetCrs { engine })], + Volatility::Immutable, + Some(rs_set_crs_doc()), + ) +} + +/// RS_SetSRID() scalar UDF implementation without CRS validation +/// +/// See [rs_set_srid_with_engine_udf] for a validating version of this function +pub fn rs_set_srid_udf() -> SedonaScalarUDF { + rs_set_srid_with_engine_udf(None) +} + +/// RS_SetCRS() scalar UDF implementation without CRS validation +/// +/// See [rs_set_crs_with_engine_udf] for a validating version of this function +pub fn rs_set_crs_udf() -> SedonaScalarUDF { + rs_set_crs_with_engine_udf(None) +} + +fn rs_set_srid_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Set the spatial reference system identifier (SRID) of the raster".to_string(), + "RS_SetSRID(raster: Raster, srid: Integer)".to_string(), + ) + .with_argument("raster", "Raster: Input raster") + .with_argument("srid", "Integer: EPSG code to set (e.g., 4326)") + .with_sql_example("SELECT RS_SetSRID(RS_Example(), 3857)".to_string()) + .build() +} + +fn rs_set_crs_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Set the coordinate reference system (CRS) of the raster".to_string(), + "RS_SetCRS(raster: Raster, crs: String)".to_string(), + ) + .with_argument("raster", "Raster: Input raster") + .with_argument( + "crs", + "String: Coordinate reference system identifier (e.g., 'OGC:CRS84', 'EPSG:4326')", + ) + .with_sql_example("SELECT RS_SetCRS(RS_Example(), 'EPSG:3857')".to_string()) + .build() +} + +// --------------------------------------------------------------------------- +// RS_SetSRID kernel +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct RsSetSrid { + engine: Option<Arc<dyn CrsEngine + Send + Sync>>, +} + +impl SedonaScalarKernel for RsSetSrid { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + _arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + let raster_arg = &args[0]; + let srid_arg = &args[1]; + + let input_nulls = extract_input_nulls(srid_arg); + + // Convert SRID integer(s) to CRS string(s) + let crs_columnar = srid_to_crs_columnar(srid_arg, self.engine.as_ref())?; + + replace_raster_crs(raster_arg, &crs_columnar, input_nulls) + } +} + +// --------------------------------------------------------------------------- +// RS_SetCRS kernel +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct RsSetCrs { + engine: Option<Arc<dyn CrsEngine + Send + Sync>>, +} + +impl SedonaScalarKernel for RsSetCrs { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_string()], + SedonaType::Raster, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + _arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + let raster_arg = &args[0]; + let crs_arg = &args[1]; + + let input_nulls = extract_input_nulls(crs_arg); + + // Normalize the CRS string(s) — abbreviate PROJJSON to authority:code, map "0" to null + let crs_columnar = normalize_crs_columnar(crs_arg, self.engine.as_ref())?; + + replace_raster_crs(raster_arg, &crs_columnar, input_nulls) + } +} + +// --------------------------------------------------------------------------- +// Core: zero-copy CRS column swap +// --------------------------------------------------------------------------- + +/// Replace the CRS column of a raster StructArray with a new CRS value. +/// +/// This is a zero-copy operation for the metadata and bands columns: +/// we clone the Arc pointers for columns 0 (metadata) and 2 (bands), +/// and only rebuild column 1 (CRS) from the provided value. +/// +/// When `input_nulls` is provided, rows where the original SRID/CRS input was +/// null will have the entire raster nulled out (not just the CRS column). +fn replace_raster_crs( + raster_arg: &ColumnarValue, + crs_array: &StringViewArray, + input_nulls: Option<NullBuffer>, +) -> Result<ColumnarValue> { + match raster_arg { + ColumnarValue::Array(raster_array) => { + let raster_struct = raster_array + .as_any() + .downcast_ref::<StructArray>() + .ok_or_else(|| { + datafusion_common::DataFusionError::Internal( + "Expected StructArray for raster data".to_string(), + ) + })?; + + let num_rows = raster_struct.len(); + let new_crs: ArrayRef = broadcast_string_view(crs_array, num_rows); + let new_struct = swap_crs_column(raster_struct, new_crs)?; + + let input_nulls = input_nulls.map(|nulls| { + if nulls.len() == 1 && nulls.len() != num_rows { Review Comment: The condition on line 212 checks `nulls.len() == 1 && nulls.len() != num_rows`, but this is redundant. If `nulls.len() == 1` is true, then `nulls.len() != num_rows` is only true when `num_rows != 1`. The second part of the condition doesn't add meaningful logic - it should just be `if nulls.len() == 1 && num_rows != 1` to make the intent clearer. Alternatively, the condition should just be `if nulls.len() == 1` since the broadcasting is needed whenever the null buffer has length 1 (regardless of whether num_rows is 1 or not, though the latter case is a no-op). ```suggestion if nulls.len() == 1 && num_rows != 1 { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
