Copilot commented on code in PR #590:
URL: https://github.com/apache/sedona-db/pull/590#discussion_r2793083221
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value:
&serde_json::Value) -> Result<Crs> {
Ok(Some(Arc::new(projjson)))
}
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS
deserialization.
+pub struct CachedCrsToSRIDMapping {
+ cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl CachedCrsToSRIDMapping {
+ /// Create a new CachedCrsSRIDMapping with an empty cache.
+ pub fn new() -> Self {
+ Self {
+ cache: HashMap::new(),
+ }
+ }
+
+ /// Create a new CachedCrsSRIDMapping with an optional initial capacity
for the cache.
Review Comment:
The docstrings refer to `CachedCrsSRIDMapping` but the type is
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent:
`get_srid` currently errors when a CRS is present but has no SRID (it does not
“return 0” in that case). Please update the docs to match the actual type name
and the error/return semantics (or adjust the implementation to match the
stated contract).
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value:
&serde_json::Value) -> Result<Crs> {
Ok(Some(Arc::new(projjson)))
}
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS
deserialization.
+pub struct CachedCrsToSRIDMapping {
+ cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl CachedCrsToSRIDMapping {
+ /// Create a new CachedCrsSRIDMapping with an empty cache.
+ pub fn new() -> Self {
+ Self {
+ cache: HashMap::new(),
+ }
+ }
+
+ /// Create a new CachedCrsSRIDMapping with an optional initial capacity
for the cache.
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self {
+ cache: HashMap::with_capacity(capacity),
+ }
+ }
+
+ /// Get the SRID for a given CRS string, using the cache to avoid
expensive deserialization where possible.
+ /// Returns 0 for missing CRS or CRS that don't have an SRID. Errors if
the CRS string is invalid or if the
+ /// CRS can't be deserialized.
+ pub fn get_srid(&mut self, maybe_crs: Option<&str>) -> Result<u32> {
Review Comment:
The docstrings refer to `CachedCrsSRIDMapping` but the type is
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent:
`get_srid` currently errors when a CRS is present but has no SRID (it does not
“return 0” in that case). Please update the docs to match the actual type name
and the error/return semantics (or adjust the implementation to match the
stated contract).
##########
rust/sedona-raster/src/array.rs:
##########
@@ -481,6 +458,31 @@ impl<'a> RasterStructArray<'a> {
.as_any()
.downcast_ref::<StructArray>()
.unwrap();
+ let band_nodata_array = band_metadata_struct
+ .column(band_metadata_indices::NODATAVALUE)
+ .as_any()
+ .downcast_ref::<BinaryArray>()
+ .unwrap();
+ let band_storage_type_array = band_metadata_struct
+ .column(band_metadata_indices::STORAGE_TYPE)
+ .as_any()
+ .downcast_ref::<UInt32Array>()
+ .unwrap();
+ let band_datatype_array = band_metadata_struct
+ .column(band_metadata_indices::DATATYPE)
+ .as_any()
+ .downcast_ref::<UInt32Array>()
+ .unwrap();
+ let band_outdb_url_array = band_metadata_struct
+ .column(band_metadata_indices::OUTDB_URL)
+ .as_any()
+ .downcast_ref::<StringArray>()
+ .unwrap();
+ let band_outdb_band_id_array = band_metadata_struct
+ .column(band_metadata_indices::OUTDB_BAND_ID)
+ .as_any()
+ .downcast_ref::<UInt32Array>()
+ .unwrap();
Review Comment:
These `unwrap()`s will panic if the Arrow schema changes or if an unexpected
array type is encountered. Previously, similar downcasts returned an
`ArrowError::SchemaError` instead of crashing. Prefer propagating a structured
`ArrowError` (e.g., via `ok_or` / `ok_or_else`) so schema/type mismatches fail
gracefully rather than panicking.
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value:
&serde_json::Value) -> Result<Crs> {
Ok(Some(Arc::new(projjson)))
}
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS
deserialization.
+pub struct CachedCrsToSRIDMapping {
+ cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl CachedCrsToSRIDMapping {
+ /// Create a new CachedCrsSRIDMapping with an empty cache.
Review Comment:
The docstrings refer to `CachedCrsSRIDMapping` but the type is
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent:
`get_srid` currently errors when a CRS is present but has no SRID (it does not
“return 0” in that case). Please update the docs to match the actual type name
and the error/return semantics (or adjust the implementation to match the
stated contract).
##########
rust/sedona-raster-functions/src/rs_srid.rs:
##########
@@ -97,45 +96,17 @@ impl SedonaScalarKernel for RsSrid {
let executor = RasterExecutor::new(arg_types, args);
let mut builder =
UInt32Builder::with_capacity(executor.num_iterations());
+ let mut crs_to_srid_mapping =
+ CachedCrsToSRIDMapping::with_capacity(executor.num_iterations());
executor.execute_raster_void(|_i, raster_opt| {
- match raster_opt {
- None => builder.append_null(),
- Some(raster) => {
- match raster.crs() {
- None => {
- // When no CRS is set, SRID is 0
- builder.append_value(0);
- }
- Some(crs_str) => {
- let crs = deserialize_crs(crs_str).map_err(|e| {
- DataFusionError::Execution(format!(
- "Failed to deserialize CRS: {e}"
- ))
- })?;
-
- match crs {
- Some(crs_ref) => {
- let srid = crs_ref.srid().map_err(|e| {
- DataFusionError::Execution(format!(
- "Failed to get SRID from CRS: {e}"
- ))
- })?;
-
- match srid {
- Some(srid_val) =>
builder.append_value(srid_val),
- None => {
- return
Err(DataFusionError::Execution(
- "CRS has no SRID".to_string(),
- ))
- }
- }
- }
- None => builder.append_value(0),
- }
- }
- }
- }
- }
+ let Some(raster) = raster_opt else {
+ builder.append_null();
+ return Ok(());
+ };
+
+ let maybe_crs = raster.crs_str_ref();
Review Comment:
Calling `crs_str_ref()` ties this code to a concrete raster implementation
that provides that method, whereas `raster.crs()` is the trait-level API and
already returns `Option<&str>`. If the goal is zero-copy access, consider
adding an equivalent method to the `RasterRef` trait (or keeping `raster.crs()`
here and relying on the implementation’s zero-copy behavior) to avoid coupling
this UDF to a specific raster ref type.
```suggestion
let maybe_crs = raster.crs();
```
##########
rust/sedona-raster-functions/src/rs_srid.rs:
##########
@@ -167,26 +138,14 @@ impl SedonaScalarKernel for RsCrs {
StringBuilder::with_capacity(executor.num_iterations(),
preallocate_bytes);
executor.execute_raster_void(|_i, raster_opt| {
- match raster_opt {
- None => builder.append_null(),
- Some(raster) => match raster.crs() {
- None => builder.append_null(),
- Some(crs_str) => {
- let crs = deserialize_crs(crs_str).map_err(|e| {
- DataFusionError::Execution(format!("Failed to
deserialize CRS: {e}"))
- })?;
-
- let crs_string = crs
- .ok_or_else(|| {
- DataFusionError::Execution(
- "Failed to parse non-null CRS
string".to_string(),
- )
- })?
- .to_crs_string();
- builder.append_value(crs_string);
- }
- },
- }
+ let Some(raster) = raster_opt else {
+ builder.append_null();
+ return Ok(());
+ };
+
+ // This is similar to ST_CRS: if no CRS is set, return "0"
+ let crs_str = raster.crs().unwrap_or("0");
+ builder.append_value(crs_str);
Review Comment:
`RS_CRS` no longer parses/validates the CRS string (the previous
implementation deserialized and returned `to_crs_string()`), so invalid CRS
strings will now be returned as-is, and canonicalization is lost. If the
intended contract is to return a normalized/validated CRS representation,
consider keeping deserialization (and erroring on invalid CRS) while still
using the early-return structure; otherwise, this is a behavior change that
should be clearly documented as part of the UDF semantics.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]