james-willis commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3236871677
##########
rust/sedona-raster/src/traits.rs:
##########
@@ -73,62 +108,550 @@ pub trait MetadataRef {
/// Y-direction skew/rotation
fn skew_y(&self) -> f64;
}
-/// Trait for accessing all bands in a raster
-pub trait BandsRef {
- /// Number of bands in the raster
- fn len(&self) -> usize;
- /// Check if no bands are present
- fn is_empty(&self) -> bool {
+
+impl MetadataRef for RasterMetadata {
+ fn width(&self) -> u64 {
+ self.width
+ }
+ fn height(&self) -> u64 {
+ self.height
+ }
+ fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+impl RasterMetadata {
+ pub fn width(&self) -> u64 {
+ self.width
+ }
+ pub fn height(&self) -> u64 {
+ self.height
+ }
+ pub fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ pub fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ pub fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ pub fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ pub fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ pub fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+/// Concrete band metadata returned by `BandRef::metadata()`.
+///
+/// Restored from the pre-N-D schema. The `outdb_url` and `outdb_band_id`
+/// fields are eagerly parsed from the N-D `outdb_uri` (which carries a
+/// `#band=N` fragment in the SedonaDB convention) so callers from the
+/// pre-N-D era keep compiling against the same field names.
+#[derive(Debug, Clone)]
+pub struct BandMetadata {
+ pub nodata_value: Option<Vec<u8>>,
+ pub storage_type: sedona_schema::raster::StorageType,
+ pub datatype: BandDataType,
+ pub outdb_url: Option<String>,
+ pub outdb_band_id: Option<u32>,
+}
+
+impl BandMetadata {
+ pub fn nodata_value(&self) -> Option<&[u8]> {
+ self.nodata_value.as_deref()
+ }
+ /// Returns the storage type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::storage_type()` signature — our shim
+ /// implementation never errors, but the signature is preserved so
+ /// existing `matches!(band.metadata().storage_type(), Ok(...))`
+ /// patterns from before the N-D refactor keep compiling.
+ pub fn storage_type(&self) -> Result<sedona_schema::raster::StorageType,
ArrowError> {
+ Ok(self.storage_type)
+ }
+ /// Returns the band data type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::data_type()` signature — see `storage_type()`.
+ pub fn data_type(&self) -> Result<BandDataType, ArrowError> {
+ Ok(self.datatype)
+ }
+ pub fn outdb_url(&self) -> Option<&str> {
+ self.outdb_url.as_deref()
+ }
+ pub fn outdb_band_id(&self) -> Option<u32> {
+ self.outdb_band_id
+ }
+ /// Nodata value interpreted as f64. Mirrors the pre-N-D
+ /// `BandMetadataRef::nodata_value_as_f64()`. Uses the lossless
+ /// conversion (errors on i64/u64 magnitudes > 2^53) so the shim
+ /// surface picks up the same correctness fix as
+ /// `BandRef::nodata_as_f64()`.
+ pub fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
+ let bytes = match self.nodata_value.as_deref() {
+ Some(b) => b,
+ None => return Ok(None),
+ };
+ nodata_bytes_to_f64_lossless(bytes, &self.datatype).map(Some)
+ }
+}
+
+/// Parse the SedonaDB `#band=N` fragment out of an out-DB URI.
+/// Returns `(base_url, band_id)`; band_id defaults to 1 if absent.
+/// Duplicated (intentionally — and minimally) from
+/// `sedona-raster-gdal::source_uri` because the shim lives in
+/// `sedona-raster` and can't reach across the crate boundary.
+fn split_outdb_band_fragment(uri: &str) -> (String, u32) {
+ if let Some(hash_pos) = uri.rfind('#') {
+ let (base, fragment) = uri.split_at(hash_pos);
+ let fragment = &fragment[1..]; // skip the '#'
+ if let Some(rest) = fragment.strip_prefix("band=") {
+ if let Ok(n) = rest.parse::<u32>() {
+ return (base.to_string(), n);
+ }
+ }
+ }
+ (uri.to_string(), 1)
+}
+
+/// Iteration view over a raster's bands. Returned by `RasterRef::bands()`.
+///
+/// Wraps a borrowed `&dyn RasterRef` and offers the `len()` / `band(1-based)`
+/// / `iter()` shape that callers used before the N-D refactor. New code can
+/// equivalently use `RasterRef::num_bands()` and `RasterRef::band(0-based)`
+/// directly; both call patterns coexist.
+pub struct Bands<'a> {
+ raster: &'a dyn RasterRef,
+}
+
+impl<'a> Bands<'a> {
+ /// Wrap a `&dyn RasterRef` for the legacy 1-based band-access surface.
+ pub fn new(raster: &'a dyn RasterRef) -> Self {
+ Self { raster }
+ }
+}
+
+impl<'a> Bands<'a> {
+ /// Number of bands in the raster.
+ pub fn len(&self) -> usize {
+ self.raster.num_bands()
+ }
+
+ /// True iff the raster has zero bands.
+ pub fn is_empty(&self) -> bool {
self.len() == 0
}
- /// Get a specific band by number (returns Error if out of bounds)
- /// By convention, band numbers are 1-based
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>;
- /// Iterator over all bands
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_>;
+
+ /// Look up a band by **1-based** number. Returns an error rather than
+ /// `None` so callers can use `?`. For 0-based access, use
+ /// `RasterRef::band` directly.
+ pub fn band(&self, number: usize) -> Result<Box<dyn BandRef + 'a>,
ArrowError> {
+ if number == 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid band number {number}: band numbers must be 1-based"
+ )));
+ }
+ self.raster.band(number - 1).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Band number {} is out of range: this raster has {} bands",
+ number,
+ self.raster.num_bands()
+ ))
+ })
+ }
+
+ /// Iterate over every band in 0-based order.
+ pub fn iter(&self) -> impl Iterator<Item = Box<dyn BandRef + 'a>> + 'a {
+ let raster = self.raster;
+ (0..raster.num_bands()).filter_map(move |i| raster.band(i))
+ }
}
-/// Trait for accessing individual band data
+/// Trait for accessing an N-dimensional raster (top level).
+///
+/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with
+/// a single flat interface. Bands are 0-indexed.
+pub trait RasterRef {
+ /// Number of bands/variables
+ fn num_bands(&self) -> usize;
+
+ /// Access a band by 0-based index
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>>;
+
+ /// 1-based band-access view used by callers from before the N-D
+ /// refactor. Implementers typically write `Bands::new(self)`.
+ fn bands(&self) -> Bands<'_>;
+
+ /// Band name (e.g., Zarr variable name). None for unnamed bands.
+ fn band_name(&self, index: usize) -> Option<&str>;
+
+ /// Fast path for band data type — reads the scalar `data_type` column
+ /// without materialising a full `BandRef`. UDFs that only need this
+ /// metadata field should prefer this over `band(i)?.data_type()`.
+ /// Returns None if `index` is out of range or the discriminant is invalid.
+ ///
+ /// The default implementation delegates to `band(i)`. Backends with a
+ /// flat columnar layout should override for the no-allocation fast path.
+ fn band_data_type(&self, index: usize) -> Option<BandDataType> {
+ self.band(index).map(|b| b.data_type())
+ }
+
+ /// Fast path for band outdb URI — reads the `outdb_uri` column without
+ /// materialising a `BandRef`. Returns None if the band has no URI or
+ /// if `index` is out of range.
+ ///
+ /// The default implementation must allocate a `Box<dyn BandRef>`; the
+ /// raster-array backend overrides it to read the column directly.
+ /// Default returns None because the borrow can't outlive the boxed band.
+ fn band_outdb_uri(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band outdb format — reads the `outdb_format` column
+ /// without materialising a `BandRef`. Default returns None for the
+ /// same lifetime reason as `band_outdb_uri`.
+ fn band_outdb_format(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band nodata bytes — reads the `nodata` column without
+ /// materialising a `BandRef`. Default returns None for the same
+ /// lifetime reason as `band_outdb_uri`.
+ fn band_nodata(&self, index: usize) -> Option<&[u8]> {
+ let _ = index;
+ None
+ }
+
+ /// CRS string (PROJJSON, WKT, or authority code). None if not set.
+ fn crs(&self) -> Option<&str>;
+
+ /// 6-element affine transform in GDAL GeoTransform order:
+ /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+ fn transform(&self) -> &[f64];
+
+ /// Eagerly-computed concrete metadata view (width, height, geotransform
+ /// scalars). Mirrors the pre-N-D `RasterRef::metadata()` accessor.
+ ///
+ /// Panics if `spatial_shape` lacks width/height or `transform` is the
+ /// wrong length — those are corrupt-schema cases that error cleanly
+ /// through the `width()`/`height()` trait methods, but the metadata
+ /// accessor predates that contract and is kept infallible for caller
+ /// ergonomics.
+ fn metadata(&self) -> RasterMetadata {
+ let width = self
+ .width()
+ .expect("raster has no width (spatial_shape missing); use width()?
for error handling");
+ let height = self
+ .height()
+ .expect("raster has no height; use height()? for error handling");
+ let t = self.transform();
+ if t.len() != 6 {
+ panic!("transform must be 6 elements, got {}", t.len());
+ }
+ RasterMetadata {
+ width,
+ height,
+ upperleft_x: t[0],
+ scale_x: t[1],
+ skew_x: t[2],
+ upperleft_y: t[3],
+ skew_y: t[4],
+ scale_y: t[5],
+ }
+ }
+
+ /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase
+ /// would extend to `["x","y","z"]`). Every band must contain each of these
+ /// names in its own `dim_names`, with matching sizes.
+ fn spatial_dims(&self) -> Vec<&str>;
+
+ /// Spatial dimension sizes, in the same order as `spatial_dims`. Today
+ /// `[width, height]`.
+ fn spatial_shape(&self) -> &[i64];
+
+ /// Name of the X spatial dimension (e.g., "x", "lon", "easting").
+ fn x_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().next().unwrap_or("x")
+ }
+
+ /// Name of the Y spatial dimension (e.g., "y", "lat", "northing").
+ fn y_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().nth(1).unwrap_or("y")
+ }
+
+ /// Width in pixels — size of the X spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` is empty or the X size is
+ /// negative; both are invariant violations rather than legitimate "no
+ /// value" states.
+ fn width(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.first() else {
+ return Err(ArrowError::InvalidArgumentError(
+ "raster has no width (spatial_shape is empty)".to_string(),
+ ));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster width must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Height in pixels — size of the Y spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` has fewer than two entries
+ /// or the Y size is negative.
+ fn height(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.get(1) else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster has no height (spatial_shape has {} entries, need >=
2)",
+ shape.len()
+ )));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster height must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Look up a band by name. Returns None if no band has that name.
+ fn band_by_name(&self, name: &str) -> Option<Box<dyn BandRef + '_>> {
+ (0..self.num_bands())
+ .find(|&i| self.band_name(i) == Some(name))
+ .and_then(|i| self.band(i))
+ }
+}
+
+/// Trait for accessing a single band/variable within an N-D raster.
+///
+/// This is the consumer interface. Implementations handle storage details
+/// Two data access paths:
+/// - `contiguous_data()` — flat row-major bytes for consumers that don't need
+/// stride awareness (most RS_* functions, GDAL boundary, serialization).
+/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware
+/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies.
pub trait BandRef {
- /// Band metadata accessor
- fn metadata(&self) -> &dyn BandMetadataRef;
- /// Raw band data as bytes (zero-copy access)
- fn data(&self) -> &[u8];
-}
-
-/// Trait for accessing individual band metadata
-pub trait BandMetadataRef {
- /// No-data value as raw bytes (None if null)
- fn nodata_value(&self) -> Option<&[u8]>;
- /// Storage type (InDb, OutDbRef, etc)
- fn storage_type(&self) -> Result<StorageType, ArrowError>;
- /// Band data type (UInt8, Float32, etc.)
- fn data_type(&self) -> Result<BandDataType, ArrowError>;
- /// OutDb URL (only used when storage_type == OutDbRef)
- fn outdb_url(&self) -> Option<&str>;
- /// OutDb band ID (only used when storage_type == OutDbRef)
- fn outdb_band_id(&self) -> Option<u32>;
-
- /// No-data value interpreted as f64.
+ // -- Dimension metadata --
+
+ /// Number of dimensions in this band
+ fn ndim(&self) -> usize;
+
+ /// Dimension names in order (e.g., `["time", "y", "x"]`)
+ fn dim_names(&self) -> Vec<&str>;
+
+ /// Visible shape — size of each dimension in the band's view, in
+ /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`.
+ /// This is what almost all consumers want; use `raw_source_shape()` only
+ /// when you need to address into the raw `data` buffer (e.g. FFI).
+ fn shape(&self) -> &[u64];
+
+ /// **Internal/FFI-only.** Natural C-order extent of the band's
+ /// underlying `data` buffer, indexed by *source* axis (not visible
+ /// axis). Almost every consumer wants `shape()` instead — that is the
+ /// region the band exposes, and is what you compare against
+ /// `spatial_shape`, iterate over for pixels, and compose further views
+ /// against. The two only agree when the band's view is the identity;
+ /// any slice, broadcast, or permutation makes them diverge.
+ ///
+ /// Use this only when you need to index directly into the raw `data`
+ /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you
+ /// also handle `view()` and the byte-stride layout from `nd_buffer()`.
+ fn raw_source_shape(&self) -> &[u64];
+
+ /// Per-visible-dimension view entries describing how the band's
+ /// visible axes map onto its `source_shape`. `view().len() == ndim()`.
+ /// See `ViewEntry` for per-entry semantics.
+ fn view(&self) -> &[ViewEntry];
+
+ /// Size of a named dimension (None if doesn't exist)
+ fn dim_size(&self, name: &str) -> Option<u64> {
+ let idx = self.dim_index(name)?;
+ Some(self.shape()[idx])
+ }
+
+ /// Index of a named dimension (None if doesn't exist)
+ fn dim_index(&self, name: &str) -> Option<usize> {
+ self.dim_names().iter().position(|n| *n == name)
+ }
+
+ /// True iff this band is shaped exactly like a legacy 2-D raster band:
+ /// `dim_names == ["y", "x"]` and the view is the identity over the
+ /// band's `raw_source_shape` (no slice, no broadcast, no permutation).
+ ///
+ /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while
+ /// they wait for an MDArray-aware port.
+ fn is_2d(&self) -> bool {
+ let dims = self.dim_names();
+ if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" {
+ return false;
+ }
+ let view = self.view();
+ let source_shape = self.raw_source_shape();
+ if view.len() != 2 || source_shape.len() != 2 {
+ return false;
+ }
+ view.iter().enumerate().all(|(i, v)| {
+ v.source_axis as usize == i
+ && v.start == 0
+ && v.step == 1
+ && v.steps >= 0
+ && v.steps as u64 == source_shape[i]
+ })
+ }
+
+ // -- Band metadata --
+
+ /// Data type for all elements in this band
+ fn data_type(&self) -> BandDataType;
+
+ /// Nodata value as raw bytes (None if not set)
+ fn nodata(&self) -> Option<&[u8]>;
+
+ /// OutDb URI — location of the external resource (e.g.
+ /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for
+ /// in-memory bands. Scheme resolution is delegated to an
+ /// `ObjectStoreRegistry`; it does *not* imply a format.
+ fn outdb_uri(&self) -> Option<&str> {
+ None
+ }
+
+ /// OutDb format — how to interpret the bytes at `outdb_uri`
+ /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's
+ /// `contiguous_data()` / `nd_buffer()` is authoritative.
+ fn outdb_format(&self) -> Option<&str> {
+ None
+ }
+
+ /// True if this band's bytes live in the `data` buffer (in-database).
+ /// False if the bytes must be fetched from `outdb_uri` (out-of-database).
+ ///
+ /// The discriminator is whether the `data` buffer is non-empty —
+ /// `outdb_uri` and `outdb_format` are orthogonal location/format hints
+ /// that may be set on either kind of band.
+ fn is_indb(&self) -> bool {
+ // Default: materialize via nd_buffer and check buffer emptiness.
+ // Concrete impls should override with a direct buffer check.
+ self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty())
+ }
+
+ /// Eagerly-computed concrete band metadata. Mirrors the pre-N-D
+ /// `BandRef::metadata()` accessor.
+ ///
+ /// `outdb_url` and `outdb_band_id` are parsed from `outdb_uri()`'s
+ /// SedonaDB `#band=N` fragment convention so callers that pattern-match
+ /// on those fields keep compiling.
+ fn metadata(&self) -> BandMetadata {
+ let is_indb = self.is_indb();
+ // Match the pre-N-D contract: outdb_url / outdb_band_id are only
+ // populated when storage_type is OutDbRef. PR-B's schema lets the
+ // URI hint coexist with InDb data; this surface hides that.
+ let (outdb_url, outdb_band_id) = if !is_indb {
+ match self.outdb_uri() {
+ Some(uri) => {
+ let (base, band) = split_outdb_band_fragment(uri);
+ (Some(base), Some(band))
+ }
+ None => (None, None),
+ }
+ } else {
+ (None, None)
+ };
+ BandMetadata {
+ nodata_value: self.nodata().map(|b| b.to_vec()),
+ storage_type: if is_indb {
+ sedona_schema::raster::StorageType::InDb
+ } else {
+ sedona_schema::raster::StorageType::OutDbRef
+ },
+ datatype: self.data_type(),
+ outdb_url,
+ outdb_band_id,
+ }
+ }
+
+ // -- Data access --
+
+ /// Raw backing buffer + visible-region layout. Triggers load for lazy
+ /// impls. The returned `NdBuffer` describes the band's view in
+ /// byte-stride terms — `shape` is the visible shape, `strides` and
+ /// `offset` are computed by composing the view with the source's
+ /// natural C-order byte strides. Strides may be zero (broadcast) or
+ /// negative (reverse iteration).
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError>;
+
+ /// Contiguous row-major bytes covering the *visible* region. Zero-copy
+ /// (`Cow::Borrowed`) when the view is full identity over a C-order
+ /// source buffer; copies into a new buffer when the view slices,
+ /// broadcasts, or permutes. Most RS_* functions use this.
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError>;
+
+ /// Pre-N-D compatibility shim: raw row-major bytes for InDb,
+ /// identity-view bands. Panics on anything else (OutDb, non-identity
+ /// view, or a `contiguous_data` error) — corresponds to main's
+ /// infallible `BandRef::data() -> &[u8]` which only ever ran against
+ /// identity-view InDb bands.
+ fn data(&self) -> &[u8] {
+ // Default impl forwards through nd_buffer's borrowed slice. This
+ // only borrows the underlying band buffer for identity-view InDb
+ // bands; everything else is a corrupt-shape call site.
+ self.nd_buffer()
+ .expect("BandRef::data() requires an in-db band with bytes")
+ .buffer
+ }
+
+ /// Nodata value interpreted as f64.
///
/// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on
- /// success, or an error when the raw bytes have an unexpected length for
- /// the band's data type.
- fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
- let bytes = match self.nodata_value() {
+ /// success, or an error when the raw bytes have an unexpected length
**or**
+ /// when the nodata value cannot be represented exactly in `f64`.
+ ///
+ /// 64-bit integer bands (`Int64`, `UInt64`) error rather than silently
+ /// rounding when the magnitude exceeds 2^53 — values outside
+ /// `[-9_007_199_254_740_992, 9_007_199_254_740_992]` can't round-trip
+ /// through `f64` and a rounded sentinel can collide with a real pixel
+ /// value. Use `nodata()` directly to recover the exact bytes when full
+ /// integer precision matters (e.g. when nodata is the type's extreme
+ /// value like `0xFF…FF`).
+ fn nodata_as_f64(&self) -> Result<Option<f64>, ArrowError> {
+ let bytes = match self.nodata() {
Some(b) => b,
None => return Ok(None),
};
- let dt = self.data_type()?;
- nodata_bytes_to_f64(bytes, &dt).map(Some)
+ nodata_bytes_to_f64_lossless(bytes, &self.data_type()).map(Some)
}
}
/// Convert raw nodata bytes to f64 given a [`BandDataType`].
///
/// The bytes are expected to be in little-endian order and exactly match the
/// byte size of the data type.
-fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result<f64,
ArrowError> {
+pub fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result<f64,
ArrowError> {
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]