Kontinuation commented on code in PR #831: URL: https://github.com/apache/sedona-db/pull/831#discussion_r3231114792
########## rust/sedona-raster-gdal/src/rs_frompath.rs: ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! RS_FromPath UDF - Load out-db raster from file path. + +use std::sync::Arc; + +use arrow_array::Array; +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_schema::datatypes::{SedonaType, RASTER}; +use sedona_schema::matchers::ArgMatcher; + +use crate::gdal_common::with_gdal; +use crate::gdal_dataset_provider::configure_thread_local_options; +use crate::utils::append_as_outdb_raster; + +pub fn rs_from_path_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_frompath", + vec![Arc::new(RsFromPath)], + Volatility::Volatile, + ) +} Review Comment: Kept rs_frompath intentionally. RS_FromPath lowercases to rs_frompath, which matches the existing raster SQL naming convention such as RS_BandPath -> rs_bandpath. I also renamed the Rust helper to rs_frompath_udf and added a test to lock in the registered name. ########## rust/sedona-raster-gdal/src/utils.rs: ########## @@ -109,6 +104,68 @@ pub fn append_as_indb_raster(dataset: &Dataset, builder: &mut RasterBuilder) -> Ok(()) } +/// Append a raster source path as a single out-db raster to the provided [`RasterBuilder`]. +pub fn append_as_outdb_raster(gdal: &Gdal, path: &str, builder: &mut RasterBuilder) -> Result<()> { + let gdal_path = normalize_outdb_source_path(path); + let dataset = gdal + .open_ex_with_options( + &gdal_path, + DatasetOptions { + open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY, + ..Default::default() + }, + ) + .map_err(|e| { + exec_datafusion_err!( + "Failed to open raster file '{}'(GDAL path '{}'): {}", + path, + gdal_path, + e + ) + })?; Review Comment: Fixed in 2756654f. Updated the error message to include the missing space before the GDAL path segment. ########## rust/sedona/src/context.rs: ########## @@ -221,6 +221,10 @@ impl SedonaContext { Arc::new(RandomGeometryFunction::default()), ); + for udf in sedona_raster_gdal::all_gdal_udfs() { + out.ctx.register_udf(udf.into()); + } Review Comment: Leaving this as-is intentionally. Sedona-DB uses a trusted-query security model similar to PostGIS, where file and URL-backed access is available to users who can execute SQL. We do not plan to add an extra opt-in gate for RS_FromPath in this PR. ########## rust/sedona-raster-gdal/src/gdal_common.rs: ########## @@ -313,6 +288,54 @@ pub fn nodata_bytes_to_f64(nodata_bytes: Option<&[u8]>, band_type: &BandDataType bytes_to_f64(bytes, band_type).ok() } +/// Read a GDAL band's nodata value into a byte vector using the band's native type. +pub fn band_nodata_to_bytes(band: &RasterBand<'_>) -> Result<Option<Vec<u8>>> { + let band_type = gdal_to_band_data_type(band.band_type())?; + + Ok(match band_type { + BandDataType::UInt64 => band + .no_data_value_u64() + .map(|nodata| nodata.to_le_bytes().to_vec()), + BandDataType::Int64 => band + .no_data_value_i64() + .map(|nodata| nodata.to_le_bytes().to_vec()), + _ => band + .no_data_value() + .map(|nodata| nodata_f64_to_bytes(nodata, &band_type)), + }) +} Review Comment: Leaving this as-is intentionally. band_nodata_to_bytes is meant to stay self-contained over a GDAL band, and reading the band type from the band object is trivial here. We do not want to push that extraction responsibility onto callers for a negligible optimization. ########## rust/sedona-raster-gdal/src/rs_frompath.rs: ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! RS_FromPath UDF - Load out-db raster from file path. + +use std::sync::Arc; + +use arrow_array::Array; +use arrow_schema::DataType; +use datafusion_common::cast::as_string_array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::builder::RasterBuilder; +use sedona_schema::datatypes::{SedonaType, RASTER}; +use sedona_schema::matchers::ArgMatcher; + +use crate::gdal_common::with_gdal; +use crate::gdal_dataset_provider::configure_thread_local_options; +use crate::utils::append_as_outdb_raster; + +pub fn rs_from_path_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_frompath", + vec![Arc::new(RsFromPath)], + Volatility::Volatile, + ) +} + +#[derive(Debug)] +pub(crate) struct RsFromPath; + +impl SedonaScalarKernel for RsFromPath { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + ArgMatcher::new(vec![ArgMatcher::is_string()], RASTER).match_args(args) + } + + fn invoke_batch_from_args( + &self, + _arg_types: &[SedonaType], + args: &[ColumnarValue], + _return_type: &SedonaType, + _num_rows: usize, + config_options: Option<&ConfigOptions>, + ) -> Result<ColumnarValue> { + with_gdal(|gdal| { + configure_thread_local_options(gdal, config_options)?; + + let paths = args[0].cast_to(&DataType::Utf8, None)?.into_array(1)?; + let path_array = as_string_array(&paths)?; + + let len = path_array.len(); + let mut builder = RasterBuilder::new(len); + for i in 0..len { + if path_array.is_null(i) { + builder.append_null()?; + } else { + let path = path_array.value(i); + append_as_outdb_raster(gdal, path, &mut builder)?; + } + } Review Comment: Fixed in 2756654f. Added unit tests for null propagation and invalid-path errors in rs_frompath.rs so the expected semantics are now locked in. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
