Copilot commented on code in PR #590:
URL: https://github.com/apache/sedona-db/pull/590#discussion_r2793083221


##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value: 
&serde_json::Value) -> Result<Crs> {
     Ok(Some(Arc::new(projjson)))
 }
 
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS 
deserialization.
+pub struct CachedCrsToSRIDMapping {
+    cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CachedCrsToSRIDMapping {
+    /// Create a new CachedCrsSRIDMapping with an empty cache.
+    pub fn new() -> Self {
+        Self {
+            cache: HashMap::new(),
+        }
+    }
+
+    /// Create a new CachedCrsSRIDMapping with an optional initial capacity 
for the cache.

Review Comment:
   The docstrings refer to `CachedCrsSRIDMapping` but the type is 
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent: 
`get_srid` currently errors when a CRS is present but has no SRID (it does not 
“return 0” in that case). Please update the docs to match the actual type name 
and the error/return semantics (or adjust the implementation to match the 
stated contract).



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value: 
&serde_json::Value) -> Result<Crs> {
     Ok(Some(Arc::new(projjson)))
 }
 
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS 
deserialization.
+pub struct CachedCrsToSRIDMapping {
+    cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CachedCrsToSRIDMapping {
+    /// Create a new CachedCrsSRIDMapping with an empty cache.
+    pub fn new() -> Self {
+        Self {
+            cache: HashMap::new(),
+        }
+    }
+
+    /// Create a new CachedCrsSRIDMapping with an optional initial capacity 
for the cache.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            cache: HashMap::with_capacity(capacity),
+        }
+    }
+
+    /// Get the SRID for a given CRS string, using the cache to avoid 
expensive deserialization where possible.
+    /// Returns 0 for missing CRS or CRS that don't have an SRID. Errors if 
the CRS string is invalid or if the
+    /// CRS can't be deserialized.
+    pub fn get_srid(&mut self, maybe_crs: Option<&str>) -> Result<u32> {

Review Comment:
   The docstrings refer to `CachedCrsSRIDMapping` but the type is 
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent: 
`get_srid` currently errors when a CRS is present but has no SRID (it does not 
“return 0” in that case). Please update the docs to match the actual type name 
and the error/return semantics (or adjust the implementation to match the 
stated contract).



##########
rust/sedona-raster/src/array.rs:
##########
@@ -481,6 +458,31 @@ impl<'a> RasterStructArray<'a> {
             .as_any()
             .downcast_ref::<StructArray>()
             .unwrap();
+        let band_nodata_array = band_metadata_struct
+            .column(band_metadata_indices::NODATAVALUE)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+        let band_storage_type_array = band_metadata_struct
+            .column(band_metadata_indices::STORAGE_TYPE)
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap();
+        let band_datatype_array = band_metadata_struct
+            .column(band_metadata_indices::DATATYPE)
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap();
+        let band_outdb_url_array = band_metadata_struct
+            .column(band_metadata_indices::OUTDB_URL)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let band_outdb_band_id_array = band_metadata_struct
+            .column(band_metadata_indices::OUTDB_BAND_ID)
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap();

Review Comment:
   These `unwrap()`s will panic if the Arrow schema changes or if an unexpected 
array type is encountered. Previously, similar downcasts returned an 
`ArrowError::SchemaError` instead of crashing. Prefer propagating a structured 
`ArrowError` (e.g., via `ok_or` / `ok_or_else`) so schema/type mismatches fail 
gracefully rather than panicking.



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -101,6 +108,55 @@ pub fn deserialize_crs_from_obj(crs_value: 
&serde_json::Value) -> Result<Crs> {
     Ok(Some(Arc::new(projjson)))
 }
 
+/// Translating CRS into integer SRID with a cache to avoid expensive CRS 
deserialization.
+pub struct CachedCrsToSRIDMapping {
+    cache: HashMap<Cow<'static, str>, u32>,
+}
+
+impl Default for CachedCrsToSRIDMapping {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CachedCrsToSRIDMapping {
+    /// Create a new CachedCrsSRIDMapping with an empty cache.

Review Comment:
   The docstrings refer to `CachedCrsSRIDMapping` but the type is 
`CachedCrsToSRIDMapping`. Also, the behavior description is inconsistent: 
`get_srid` currently errors when a CRS is present but has no SRID (it does not 
“return 0” in that case). Please update the docs to match the actual type name 
and the error/return semantics (or adjust the implementation to match the 
stated contract).



##########
rust/sedona-raster-functions/src/rs_srid.rs:
##########
@@ -97,45 +96,17 @@ impl SedonaScalarKernel for RsSrid {
         let executor = RasterExecutor::new(arg_types, args);
         let mut builder = 
UInt32Builder::with_capacity(executor.num_iterations());
 
+        let mut crs_to_srid_mapping =
+            CachedCrsToSRIDMapping::with_capacity(executor.num_iterations());
         executor.execute_raster_void(|_i, raster_opt| {
-            match raster_opt {
-                None => builder.append_null(),
-                Some(raster) => {
-                    match raster.crs() {
-                        None => {
-                            // When no CRS is set, SRID is 0
-                            builder.append_value(0);
-                        }
-                        Some(crs_str) => {
-                            let crs = deserialize_crs(crs_str).map_err(|e| {
-                                DataFusionError::Execution(format!(
-                                    "Failed to deserialize CRS: {e}"
-                                ))
-                            })?;
-
-                            match crs {
-                                Some(crs_ref) => {
-                                    let srid = crs_ref.srid().map_err(|e| {
-                                        DataFusionError::Execution(format!(
-                                            "Failed to get SRID from CRS: {e}"
-                                        ))
-                                    })?;
-
-                                    match srid {
-                                        Some(srid_val) => 
builder.append_value(srid_val),
-                                        None => {
-                                            return 
Err(DataFusionError::Execution(
-                                                "CRS has no SRID".to_string(),
-                                            ))
-                                        }
-                                    }
-                                }
-                                None => builder.append_value(0),
-                            }
-                        }
-                    }
-                }
-            }
+            let Some(raster) = raster_opt else {
+                builder.append_null();
+                return Ok(());
+            };
+
+            let maybe_crs = raster.crs_str_ref();

Review Comment:
   Calling `crs_str_ref()` ties this code to a concrete raster implementation 
that provides that method, whereas `raster.crs()` is the trait-level API and 
already returns `Option<&str>`. If the goal is zero-copy access, consider 
adding an equivalent method to the `RasterRef` trait (or keeping `raster.crs()` 
here and relying on the implementation’s zero-copy behavior) to avoid coupling 
this UDF to a specific raster ref type.
   ```suggestion
               let maybe_crs = raster.crs();
   ```



##########
rust/sedona-raster-functions/src/rs_srid.rs:
##########
@@ -167,26 +138,14 @@ impl SedonaScalarKernel for RsCrs {
             StringBuilder::with_capacity(executor.num_iterations(), 
preallocate_bytes);
 
         executor.execute_raster_void(|_i, raster_opt| {
-            match raster_opt {
-                None => builder.append_null(),
-                Some(raster) => match raster.crs() {
-                    None => builder.append_null(),
-                    Some(crs_str) => {
-                        let crs = deserialize_crs(crs_str).map_err(|e| {
-                            DataFusionError::Execution(format!("Failed to 
deserialize CRS: {e}"))
-                        })?;
-
-                        let crs_string = crs
-                            .ok_or_else(|| {
-                                DataFusionError::Execution(
-                                    "Failed to parse non-null CRS 
string".to_string(),
-                                )
-                            })?
-                            .to_crs_string();
-                        builder.append_value(crs_string);
-                    }
-                },
-            }
+            let Some(raster) = raster_opt else {
+                builder.append_null();
+                return Ok(());
+            };
+
+            // This is similar to ST_CRS: if no CRS is set, return "0"
+            let crs_str = raster.crs().unwrap_or("0");
+            builder.append_value(crs_str);

Review Comment:
   `RS_CRS` no longer parses/validates the CRS string (the previous 
implementation deserialized and returned `to_crs_string()`), so invalid CRS 
strings will now be returned as-is, and canonicalization is lost. If the 
intended contract is to return a normalized/validated CRS representation, 
consider keeping deserialization (and erroring on invalid CRS) while still 
using the early-return structure; otherwise, this is a behavior change that 
should be clearly documented as part of the UDF semantics.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to