james-willis commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3236899182


##########
rust/sedona-raster/src/traits.rs:
##########
@@ -217,4 +777,208 @@ mod tests {
         let result = nodata_bytes_to_f64(&[1, 2, 3], &BandDataType::Float64);
         assert!(result.is_err());
     }
+
+    #[test]
+    fn test_nodata_as_f64_int64_loses_precision_above_2_pow_53() {
+        // Locks in the documented warning: nodata bytes for Int64 values
+        // beyond f64's 53-bit mantissa silently round on conversion.
+        // The expected f64 is hard-coded — deriving it via `as f64` would
+        // mean the test invokes the same primitive cast it claims to test.
+        let big = (1i64 << 53) + 1; // 2^53 + 1; not representable in f64
+        let bytes = big.to_le_bytes();
+        let val = nodata_bytes_to_f64(&bytes, &BandDataType::Int64).unwrap();
+        assert_eq!(val, 9007199254740992.0_f64);
+        assert_ne!(val as i64, big);
+    }

Review Comment:
   done



##########
rust/sedona-raster/src/array.rs:
##########
@@ -15,445 +15,431 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::borrow::Cow;
+
 use arrow_array::{
-    Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray, 
StringViewArray,
-    StructArray, UInt32Array, UInt64Array,
+    Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray, 
StringArray,
+    StringViewArray, StructArray, UInt32Array, UInt64Array,
 };
 use arrow_schema::ArrowError;
 
-use crate::traits::{
-    BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef, 
RasterMetadata, RasterRef,
-};
-use sedona_schema::raster::{
-    band_indices, band_metadata_indices, metadata_indices, raster_indices, 
BandDataType,
-    StorageType,
-};
+use crate::traits::{BandRef, Bands, NdBuffer, RasterRef, ViewEntry};
+use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
 
-/// Implement MetadataRef for RasterMetadata to allow direct use with builder
-impl MetadataRef for RasterMetadata {
-    fn width(&self) -> u64 {
-        self.width
-    }
-    fn height(&self) -> u64 {
-        self.height
-    }
-    fn upper_left_x(&self) -> f64 {
-        self.upperleft_x
-    }
-    fn upper_left_y(&self) -> f64 {
-        self.upperleft_y
-    }
-    fn scale_x(&self) -> f64 {
-        self.scale_x
-    }
-    fn scale_y(&self) -> f64 {
-        self.scale_y
-    }
-    fn skew_x(&self) -> f64 {
-        self.skew_x
-    }
-    fn skew_y(&self) -> f64 {
-        self.skew_y
-    }
-}
-
-/// Implementation of MetadataRef for Arrow StructArray
-struct MetadataRefImpl<'a> {
-    width_array: &'a UInt64Array,
-    height_array: &'a UInt64Array,
-    upper_left_x_array: &'a Float64Array,
-    upper_left_y_array: &'a Float64Array,
-    scale_x_array: &'a Float64Array,
-    scale_y_array: &'a Float64Array,
-    skew_x_array: &'a Float64Array,
-    skew_y_array: &'a Float64Array,
-    index: usize,
+/// Arrow-backed implementation of BandRef for a single band within a raster.
+///
+/// Today this handles only the canonical identity view: `view_entries` is
+/// synthesised from `source_shape`, `visible_shape == source_shape`,
+/// and `byte_strides` are plain C-order strides with `byte_offset = 0`.
+struct BandRefImpl<'a> {
+    dim_names_list: &'a ListArray,
+    dim_names_values: &'a StringArray,
+    source_shape_list: &'a ListArray,
+    source_shape_values: &'a UInt64Array,
+    nodata_array: &'a BinaryArray,
+    outdb_uri_array: &'a StringArray,
+    outdb_format_array: &'a StringViewArray,
+    data_array: &'a BinaryViewArray,
+    /// Absolute row index within the flattened bands arrays
+    band_row: usize,
+    /// Resolved at construction so accessors don't re-decode the discriminant.
+    data_type: BandDataType,
+    /// Per-visible-axis view, length = ndim. Always identity today.
+    view_entries: Vec<ViewEntry>,
+    /// Visible shape, length = ndim. Equals `source_shape` today.
+    visible_shape: Vec<u64>,
+    /// Byte strides per visible axis. C-order over `source_shape` today.
+    byte_strides: Vec<i64>,
+    /// Byte offset into `data` of the visible region's `[0,...,0]` element.
+    byte_offset: u64,
 }
 
-impl<'a> MetadataRef for MetadataRefImpl<'a> {
-    #[inline(always)]
-    fn width(&self) -> u64 {
-        self.width_array.value(self.index)
-    }
-
-    #[inline(always)]
-    fn height(&self) -> u64 {
-        self.height_array.value(self.index)
+impl<'a> BandRef for BandRefImpl<'a> {
+    fn ndim(&self) -> usize {
+        self.view_entries.len()
     }
 
-    #[inline(always)]
-    fn upper_left_x(&self) -> f64 {
-        self.upper_left_x_array.value(self.index)
+    fn dim_names(&self) -> Vec<&str> {
+        let start = self.dim_names_list.value_offsets()[self.band_row] as 
usize;
+        let end = self.dim_names_list.value_offsets()[self.band_row + 1] as 
usize;
+        (start..end)
+            .map(|i| self.dim_names_values.value(i))
+            .collect()
     }
 
-    #[inline(always)]
-    fn upper_left_y(&self) -> f64 {
-        self.upper_left_y_array.value(self.index)
+    fn shape(&self) -> &[u64] {
+        &self.visible_shape
     }
 
-    #[inline(always)]
-    fn scale_x(&self) -> f64 {
-        self.scale_x_array.value(self.index)
+    fn raw_source_shape(&self) -> &[u64] {
+        let start = self.source_shape_list.value_offsets()[self.band_row] as 
usize;
+        let end = self.source_shape_list.value_offsets()[self.band_row + 1] as 
usize;
+        &self.source_shape_values.values()[start..end]
     }
 
-    #[inline(always)]
-    fn scale_y(&self) -> f64 {
-        self.scale_y_array.value(self.index)
+    fn view(&self) -> &[ViewEntry] {
+        &self.view_entries
     }
 
-    #[inline(always)]
-    fn skew_x(&self) -> f64 {
-        self.skew_x_array.value(self.index)
+    fn data_type(&self) -> BandDataType {
+        self.data_type
     }
 
-    #[inline(always)]
-    fn skew_y(&self) -> f64 {
-        self.skew_y_array.value(self.index)
+    fn data(&self) -> &[u8] {
+        // Pre-N-D compatibility surface: returns the raw `data` column bytes
+        // verbatim. For InDb identity-view bands this is the row-major buffer
+        // callers from main expect. For OutDb it's `&[]` — same shape as
+        // main, which let callers see "no in-line bytes" without panicking.
+        self.data_array.value(self.band_row)
     }
-}
-
-/// Implementation of BandMetadataRef for Arrow StructArray
-struct BandMetadataRefImpl<'a> {
-    nodata_array: &'a BinaryArray,
-    storage_type_array: &'a UInt32Array,
-    datatype_array: &'a UInt32Array,
-    outdb_url_array: &'a StringArray,
-    outdb_band_id_array: &'a UInt32Array,
-    band_index: usize,
-}
 
-impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> {
-    fn nodata_value(&self) -> Option<&[u8]> {
-        if self.nodata_array.is_null(self.band_index) {
+    fn nodata(&self) -> Option<&[u8]> {
+        if self.nodata_array.is_null(self.band_row) {
             None
         } else {
-            Some(self.nodata_array.value(self.band_index))
+            Some(self.nodata_array.value(self.band_row))
         }
     }
 
-    fn storage_type(&self) -> Result<StorageType, ArrowError> {
-        let value = self.storage_type_array.value(self.band_index);
-        let storage_type = match value {
-            0 => StorageType::InDb,
-            1 => StorageType::OutDbRef,
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unknown storage type: {}",
-                    value
-                )))
-            }
-        };
-        Ok(storage_type)
-    }
-
-    fn data_type(&self) -> Result<BandDataType, ArrowError> {
-        let value = self.datatype_array.value(self.band_index);
-        let band_data_type = match value {
-            1 => BandDataType::UInt8,
-            2 => BandDataType::UInt16,
-            3 => BandDataType::Int16,
-            4 => BandDataType::UInt32,
-            5 => BandDataType::Int32,
-            6 => BandDataType::Float32,
-            7 => BandDataType::Float64,
-            8 => BandDataType::UInt64,
-            9 => BandDataType::Int64,
-            10 => BandDataType::Int8,
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unknown band data type: {}",
-                    self.datatype_array.value(self.band_index)
-                )))
-            }
-        };
-        Ok(band_data_type)
-    }
-
-    fn outdb_url(&self) -> Option<&str> {
-        if self.outdb_url_array.is_null(self.band_index) {
+    fn outdb_uri(&self) -> Option<&str> {
+        if self.outdb_uri_array.is_null(self.band_row) {
             None
         } else {
-            Some(self.outdb_url_array.value(self.band_index))
+            Some(self.outdb_uri_array.value(self.band_row))
         }
     }
 
-    fn outdb_band_id(&self) -> Option<u32> {
-        if self.outdb_band_id_array.is_null(self.band_index) {
+    fn outdb_format(&self) -> Option<&str> {
+        if self.outdb_format_array.is_null(self.band_row) {
             None
         } else {
-            Some(self.outdb_band_id_array.value(self.band_index))
+            Some(self.outdb_format_array.value(self.band_row))
         }
     }
-}
 
-/// Implementation of BandRef for accessing individual band data
-struct BandRefImpl<'a> {
-    band_metadata: BandMetadataRefImpl<'a>,
-    band_data: &'a [u8],
-}
+    fn is_indb(&self) -> bool {
+        !self.data_array.value(self.band_row).is_empty()
+    }
 
-impl<'a> BandRef for BandRefImpl<'a> {
-    fn metadata(&self) -> &dyn BandMetadataRef {
-        &self.band_metadata
+    fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError> {
+        if !self.is_indb() {
+            return Err(ArrowError::NotYetImplemented(
+                "OutDb byte access via nd_buffer() is not yet implemented; \
+                 backend-specific OutDb resolvers are tracked separately"
+                    .to_string(),
+            ));
+        }
+        // shape and strides are owned by NdBuffer (see its doc comment).
+        // Cloning here is cheap — both vecs are O(ndim), a handful of values.
+        Ok(NdBuffer {
+            buffer: self.data_array.value(self.band_row),
+            shape: self.visible_shape.clone(),
+            strides: self.byte_strides.clone(),
+            offset: self.byte_offset,
+            data_type: self.data_type,
+        })
     }
 
-    fn data(&self) -> &[u8] {
-        self.band_data
+    fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError> {
+        if !self.is_indb() {
+            return Err(ArrowError::NotYetImplemented(
+                "OutDb byte access via contiguous_data() is not yet 
implemented; \
+                 backend-specific OutDb resolvers are tracked separately"
+                    .to_string(),
+            ));
+        }
+        // Identity-view only today, so the data buffer is already row-major
+        // over the visible region.
+        Ok(Cow::Borrowed(self.data_array.value(self.band_row)))
     }
 }
 
-/// Implementation of BandsRef for accessing all bands in a raster
-struct BandsRefImpl<'a> {
+/// Arrow-backed implementation of RasterRef for a single raster row.
+///
+/// Holds flat references to the underlying Arrow arrays so the impl does
+/// not borrow from a `RasterStructArray` wrapper. That keeps
+/// `RasterStructArray::get(&self, ...)` callable without a `&'a self`
+/// constraint, which would otherwise force callers to hoist the
+/// `RasterStructArray` into a `let` binding.
+pub struct RasterRefImpl<'a> {
+    crs_array: &'a StringViewArray,
+    transform_list: &'a ListArray,
+    transform_values: &'a Float64Array,
+    spatial_dims_list: &'a ListArray,
+    spatial_dims_values: &'a StringViewArray,
+    spatial_shape_list: &'a ListArray,
+    spatial_shape_values: &'a Int64Array,
     bands_list: &'a ListArray,
-    raster_index: usize,
-    // Direct references to the metadata and data arrays
-    nodata_array: &'a BinaryArray,
-    storage_type_array: &'a UInt32Array,
-    datatype_array: &'a UInt32Array,
-    outdb_url_array: &'a StringArray,
-    outdb_band_id_array: &'a UInt32Array,
+    band_name_array: &'a StringArray,
+    band_dim_names_list: &'a ListArray,
+    band_dim_names_values: &'a StringArray,
+    band_source_shape_list: &'a ListArray,
+    band_source_shape_values: &'a UInt64Array,
+    band_datatype_array: &'a UInt32Array,
+    band_nodata_array: &'a BinaryArray,
+    band_view_list: &'a ListArray,
+    band_outdb_uri_array: &'a StringArray,
+    band_outdb_format_array: &'a StringViewArray,
     band_data_array: &'a BinaryViewArray,
+    raster_index: usize,
+}
+
+impl<'a> RasterRefImpl<'a> {
+    /// Returns the raw CRS string reference with the array's lifetime.
+    pub fn crs_str_ref(&self) -> Option<&'a str> {
+        if self.crs_array.is_null(self.raster_index) {
+            None
+        } else {
+            Some(self.crs_array.value(self.raster_index))
+        }
+    }
 }
 
-impl<'a> BandsRef for BandsRefImpl<'a> {
-    fn len(&self) -> usize {
+impl<'a> RasterRef for RasterRefImpl<'a> {
+    fn num_bands(&self) -> usize {
         self.bands_list.value_length(self.raster_index) as usize
     }
 
-    /// Get a specific band by number (1-based index)
-    fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError> 
{
-        if number == 0 {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Invalid band number {number}: band numbers must be 1-based"
-            )));
-        }
-        // By convention, band numbers are 1-based.
-        // Convert to zero-based index.
-        let index = number - 1;
-        if index >= self.len() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Band number {} is out of range: this raster has {} bands",
-                number,
-                self.len()
-            )));
-        }
+    fn bands(&self) -> Bands<'_> {
+        Bands::new(self)
+    }
 
+    fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>> {
+        if index >= self.num_bands() {
+            return None;
+        }
         let start = self.bands_list.value_offsets()[self.raster_index] as 
usize;
         let band_row = start + index;
 
-        let band_metadata = BandMetadataRefImpl {
-            nodata_array: self.nodata_array,
-            storage_type_array: self.storage_type_array,
-            datatype_array: self.datatype_array,
-            outdb_url_array: self.outdb_url_array,
-            outdb_band_id_array: self.outdb_band_id_array,
-            band_index: band_row,
-        };
-
-        let band_data = self.band_data_array.value(band_row);
+        // Read source shape slice.
+        let ss_start = self.band_source_shape_list.value_offsets()[band_row] 
as usize;
+        let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1] 
as usize;
+        let source_shape: &[u64] = 
&self.band_source_shape_values.values()[ss_start..ss_end];
 
-        Ok(Box::new(BandRefImpl {
-            band_metadata,
-            band_data,
-        }))
-    }
-
-    fn iter(&self) -> Box<dyn BandIterator<'_> + '_> {
-        Box::new(BandIteratorImpl {
-            bands: self,
-            current: 1, // Start at 1 for 1-based band numbering
-        })
-    }
-}
+        // Reject 0-D bands at the read boundary. Schema doesn't forbid them
+        // outright but every consumer assumes ndim >= 1.
+        if source_shape.is_empty() {
+            return None;
+        }
 
-/// Concrete implementation of BandIterator trait
-pub struct BandIteratorImpl<'a> {
-    bands: &'a dyn BandsRef,
-    current: usize,
-}
+        // Resolve data type up front; an unknown discriminant is a
+        // schema-corruption bug, not user data, so failing the band is
+        // appropriate.
+        let data_type_value = self.band_datatype_array.value(band_row);
+        let data_type = BandDataType::try_from_u32(data_type_value)?;
+
+        // Only the canonical identity view (null view row) is written today.
+        // A non-null view row would require the view → byte-stride composition
+        // path that is deferred to a follow-up; reject it here so callers see
+        // a clean "no band" rather than a panic.
+        if !self.band_view_list.is_null(band_row) {
+            return None;
+        }

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to