james-willis commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3228757377
##########
rust/sedona-raster/src/traits.rs:
##########
@@ -15,120 +15,305 @@
// specific language governing permissions and limitations
// under the License.
+use std::borrow::Cow;
+
use arrow_schema::ArrowError;
+use sedona_schema::raster::BandDataType;
-use sedona_schema::raster::{BandDataType, StorageType};
-
-/// Metadata for a raster
-#[derive(Debug, Clone)]
-pub struct RasterMetadata {
- pub width: u64,
- pub height: u64,
- pub upperleft_x: f64,
- pub upperleft_y: f64,
- pub scale_x: f64,
- pub scale_y: f64,
- pub skew_x: f64,
- pub skew_y: f64,
+/// Zero-copy view into a band's N-D data buffer with layout metadata.
+///
+/// `shape`, `strides`, and `offset` describe the *visible* region in
+/// byte-stride terms — they are computed by composing the band's
+/// `source_shape` (the natural extent of `buffer`) with its `view`
+/// (the per-axis `(source_axis, start, step, steps)` slice spec). Stride
+/// can be zero (broadcast) or negative (reverse iteration), and may not be
+/// C-order. Consumers that need a flat row-major buffer should use
+/// `BandRef::contiguous_data()` instead.
+#[derive(Debug)]
+pub struct NdBuffer<'a> {
+ pub buffer: &'a [u8],
+ pub shape: &'a [u64],
+ pub strides: &'a [i64],
+ pub offset: u64,
+ pub data_type: BandDataType,
}
-/// Metadata for a single band
-#[derive(Debug, Clone)]
-pub struct BandMetadata {
- pub nodata_value: Option<Vec<u8>>,
- pub storage_type: StorageType,
- pub datatype: BandDataType,
- /// URL for OutDb reference (only used when storage_type == OutDbRef)
- pub outdb_url: Option<String>,
- /// Band ID within the OutDb resource (only used when storage_type ==
OutDbRef)
- pub outdb_band_id: Option<u32>,
+/// One per-dimension entry of a band's logical view. Describes how a
+/// visible axis maps onto an axis of the underlying source buffer.
+///
+/// - `source_axis`: index into the band's `source_shape` that this visible
+/// axis reads from. Across a band's full view, `source_axis` values must
+/// form a permutation of `0..ndim` — axis-dropping and axis-introducing
+/// views are not supported today.
+/// - `start`: starting index along the source axis (in elements, not bytes).
+/// - `step`: stride between consecutive visible elements along the source
+/// axis. `step == 0` means broadcast (the same source element is
+/// exposed `steps` times); negative `step` means reverse iteration.
+/// - `steps`: number of visible elements along this axis. `steps == 0` is
+/// allowed (empty axis).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ViewEntry {
+ pub source_axis: i64,
+ pub start: i64,
+ pub step: i64,
+ pub steps: i64,
}
-/// Trait for accessing complete raster data
+/// Trait for accessing an N-dimensional raster (top level).
+///
+/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with
+/// a single flat interface. Bands are 0-indexed.
pub trait RasterRef {
- /// Raster metadata accessor
- fn metadata(&self) -> &dyn MetadataRef;
- /// CRS accessor
+ /// Number of bands/variables
+ fn num_bands(&self) -> usize;
+
+ /// Access a band by 0-based index
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>>;
+
+ /// Band name (e.g., Zarr variable name). None for unnamed bands.
+ fn band_name(&self, index: usize) -> Option<&str>;
+
+ /// Fast path for band data type — reads the scalar `data_type` column
+ /// without materialising a full `BandRef`. UDFs that only need this
+ /// metadata field should prefer this over `band(i)?.data_type()`.
+ /// Returns None if `index` is out of range or the discriminant is invalid.
+ ///
+ /// The default implementation delegates to `band(i)`. Backends with a
+ /// flat columnar layout should override for the no-allocation fast path.
+ fn band_data_type(&self, index: usize) -> Option<BandDataType> {
+ self.band(index).map(|b| b.data_type())
+ }
+
+ /// Fast path for band outdb URI — reads the `outdb_uri` column without
+ /// materialising a `BandRef`. Returns None if the band has no URI or
+ /// if `index` is out of range.
+ ///
+ /// The default implementation must allocate a `Box<dyn BandRef>`; the
+ /// raster-array backend overrides it to read the column directly.
+ /// Default returns None because the borrow can't outlive the boxed band.
+ fn band_outdb_uri(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band outdb format — reads the `outdb_format` column
+ /// without materialising a `BandRef`. Default returns None for the
+ /// same lifetime reason as `band_outdb_uri`.
+ fn band_outdb_format(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band nodata bytes — reads the `nodata` column without
+ /// materialising a `BandRef`. Default returns None for the same
+ /// lifetime reason as `band_outdb_uri`.
+ fn band_nodata(&self, index: usize) -> Option<&[u8]> {
+ let _ = index;
+ None
+ }
+
+ /// CRS string (PROJJSON, WKT, or authority code). None if not set.
fn crs(&self) -> Option<&str>;
- /// Bands accessor
- fn bands(&self) -> &dyn BandsRef;
-}
-/// Trait for accessing raster metadata (dimensions, geotransform, bounding
box, etc.)
-pub trait MetadataRef {
- /// Width of the raster in pixels
- fn width(&self) -> u64;
- /// Height of the raster in pixels
- fn height(&self) -> u64;
- /// X coordinate of the upper-left corner
- fn upper_left_x(&self) -> f64;
- /// Y coordinate of the upper-left corner
- fn upper_left_y(&self) -> f64;
- /// X-direction pixel size (scale)
- fn scale_x(&self) -> f64;
- /// Y-direction pixel size (scale)
- fn scale_y(&self) -> f64;
- /// X-direction skew/rotation
- fn skew_x(&self) -> f64;
- /// Y-direction skew/rotation
- fn skew_y(&self) -> f64;
-}
-/// Trait for accessing all bands in a raster
-pub trait BandsRef {
- /// Number of bands in the raster
- fn len(&self) -> usize;
- /// Check if no bands are present
- fn is_empty(&self) -> bool {
- self.len() == 0
- }
- /// Get a specific band by number (returns Error if out of bounds)
- /// By convention, band numbers are 1-based
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>;
- /// Iterator over all bands
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_>;
+ /// 6-element affine transform in GDAL GeoTransform order:
+ /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+ fn transform(&self) -> &[f64];
+
+ /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase
+ /// would extend to `["x","y","z"]`). Every band must contain each of these
+ /// names in its own `dim_names`, with matching sizes.
+ fn spatial_dims(&self) -> Vec<&str>;
+
+ /// Spatial dimension sizes, in the same order as `spatial_dims`. Today
+ /// `[width, height]`.
+ fn spatial_shape(&self) -> &[i64];
+
+ /// Name of the X spatial dimension (e.g., "x", "lon", "easting").
+ fn x_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().next().unwrap_or("x")
+ }
+
+ /// Name of the Y spatial dimension (e.g., "y", "lat", "northing").
+ fn y_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().nth(1).unwrap_or("y")
+ }
+
+ /// Width in pixels — size of the X spatial dimension from the top-level
+ /// `spatial_shape`.
+ fn width(&self) -> Option<u64> {
+ self.spatial_shape().first().map(|&v| v as u64)
+ }
+
+ /// Height in pixels — size of the Y spatial dimension from the top-level
+ /// `spatial_shape`.
+ fn height(&self) -> Option<u64> {
+ self.spatial_shape().get(1).map(|&v| v as u64)
+ }
+
+ /// Look up a band by name. Returns None if no band has that name.
+ fn band_by_name(&self, name: &str) -> Option<Box<dyn BandRef + '_>> {
+ (0..self.num_bands())
+ .find(|&i| self.band_name(i) == Some(name))
+ .and_then(|i| self.band(i))
+ }
}
-/// Trait for accessing individual band data
+/// Trait for accessing a single band/variable within an N-D raster.
+///
+/// This is the consumer interface. Implementations handle storage details
+/// Two data access paths:
+/// - `contiguous_data()` — flat row-major bytes for consumers that don't need
+/// stride awareness (most RS_* functions, GDAL boundary, serialization).
+/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware
+/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies.
pub trait BandRef {
- /// Band metadata accessor
- fn metadata(&self) -> &dyn BandMetadataRef;
- /// Raw band data as bytes (zero-copy access)
- fn data(&self) -> &[u8];
-}
+ // -- Dimension metadata --
-/// Trait for accessing individual band metadata
-pub trait BandMetadataRef {
- /// No-data value as raw bytes (None if null)
- fn nodata_value(&self) -> Option<&[u8]>;
- /// Storage type (InDb, OutDbRef, etc)
- fn storage_type(&self) -> Result<StorageType, ArrowError>;
- /// Band data type (UInt8, Float32, etc.)
- fn data_type(&self) -> Result<BandDataType, ArrowError>;
- /// OutDb URL (only used when storage_type == OutDbRef)
- fn outdb_url(&self) -> Option<&str>;
- /// OutDb band ID (only used when storage_type == OutDbRef)
- fn outdb_band_id(&self) -> Option<u32>;
-
- /// No-data value interpreted as f64.
+ /// Number of dimensions in this band
+ fn ndim(&self) -> usize;
+
+ /// Dimension names in order (e.g., `["time", "y", "x"]`)
+ fn dim_names(&self) -> Vec<&str>;
+
+ /// Visible shape — size of each dimension in the band's view, in
+ /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`.
+ /// This is what almost all consumers want; use `raw_source_shape()` only
+ /// when you need to address into the raw `data` buffer (e.g. FFI).
+ fn shape(&self) -> &[u64];
+
+ /// **Internal/FFI-only.** Natural C-order extent of the band's
+ /// underlying `data` buffer, indexed by *source* axis (not visible
+ /// axis). Almost every consumer wants `shape()` instead — that is the
+ /// region the band exposes, and is what you compare against
+ /// `spatial_shape`, iterate over for pixels, and compose further views
+ /// against. The two only agree when the band's view is the identity;
+ /// any slice, broadcast, or permutation makes them diverge.
+ ///
+ /// Use this only when you need to index directly into the raw `data`
+ /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you
+ /// also handle `view()` and the byte-stride layout from `nd_buffer()`.
+ fn raw_source_shape(&self) -> &[u64];
+
+ /// Per-visible-dimension view entries describing how the band's
+ /// visible axes map onto its `source_shape`. `view().len() == ndim()`.
+ /// See `ViewEntry` for per-entry semantics.
+ fn view(&self) -> &[ViewEntry];
+
+ /// Size of a named dimension (None if doesn't exist)
+ fn dim_size(&self, name: &str) -> Option<u64> {
+ let idx = self.dim_index(name)?;
+ Some(self.shape()[idx])
+ }
+
+ /// Index of a named dimension (None if doesn't exist)
+ fn dim_index(&self, name: &str) -> Option<usize> {
+ self.dim_names().iter().position(|n| *n == name)
+ }
+
+ /// True iff this band is shaped exactly like a legacy 2-D raster band:
+ /// `dim_names == ["y", "x"]` and the view is the identity over the
+ /// band's `raw_source_shape` (no slice, no broadcast, no permutation).
+ ///
+ /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while
+ /// they wait for an MDArray-aware port.
+ fn is_2d(&self) -> bool {
+ let dims = self.dim_names();
+ if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" {
+ return false;
+ }
+ let view = self.view();
+ let source_shape = self.raw_source_shape();
+ if view.len() != 2 || source_shape.len() != 2 {
+ return false;
+ }
+ view.iter().enumerate().all(|(i, v)| {
+ v.source_axis as usize == i
+ && v.start == 0
+ && v.step == 1
+ && v.steps >= 0
+ && v.steps as u64 == source_shape[i]
+ })
+ }
+
+ // -- Band metadata --
+
+ /// Data type for all elements in this band
+ fn data_type(&self) -> BandDataType;
+
+ /// Nodata value as raw bytes (None if not set)
+ fn nodata(&self) -> Option<&[u8]>;
+
+ /// OutDb URI — location of the external resource (e.g.
+ /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for
+ /// in-memory bands. Scheme resolution is delegated to an
+ /// `ObjectStoreRegistry`; it does *not* imply a format.
+ fn outdb_uri(&self) -> Option<&str> {
+ None
+ }
+
+ /// OutDb format — how to interpret the bytes at `outdb_uri`
+ /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's
+ /// `contiguous_data()` / `nd_buffer()` is authoritative.
+ fn outdb_format(&self) -> Option<&str> {
+ None
+ }
+
+ /// True if this band's bytes live in the `data` buffer (in-database).
+ /// False if the bytes must be fetched from `outdb_uri` (out-of-database).
+ ///
+ /// The discriminator is whether the `data` buffer is non-empty —
+ /// `outdb_uri` and `outdb_format` are orthogonal location/format hints
+ /// that may be set on either kind of band.
+ fn is_indb(&self) -> bool {
+ // Default: materialize via nd_buffer and check buffer emptiness.
+ // Concrete impls should override with a direct buffer check.
+ self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty())
+ }
+
+ // -- Data access --
+
+ /// Raw backing buffer + visible-region layout. Triggers load for lazy
+ /// impls. The returned `NdBuffer` describes the band's view in
+ /// byte-stride terms — `shape` is the visible shape, `strides` and
+ /// `offset` are computed by composing the view with the source's
+ /// natural C-order byte strides. Strides may be zero (broadcast) or
+ /// negative (reverse iteration).
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError>;
+
+ /// Contiguous row-major bytes covering the *visible* region. Zero-copy
+ /// (`Cow::Borrowed`) when the view is full identity over a C-order
+ /// source buffer; copies into a new buffer when the view slices,
+ /// broadcasts, or permutes. Most RS_* functions use this.
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError>;
+
+ /// Nodata value interpreted as f64.
///
/// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on
- /// success, or an error when the raw bytes have an unexpected length for
- /// the band's data type.
- fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
- let bytes = match self.nodata_value() {
+ /// success, or an error when the raw bytes have an unexpected length.
+ ///
+ /// # Warning
+ ///
+ /// For 64-bit integer bands (`Int64`, `UInt64`), the conversion to `f64`
+ /// is lossy when the magnitude exceeds 2^53 — values outside
+ /// `[-9_007_199_254_740_992, 9_007_199_254_740_992]` will be rounded to
+ /// the nearest representable double. Use `nodata()` directly to recover
+ /// the exact bytes if you need full integer precision.
+ fn nodata_as_f64(&self) -> Result<Option<f64>, ArrowError> {
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]