james-willis commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3228770685


##########
rust/sedona-raster/src/builder.rs:
##########
@@ -17,325 +17,580 @@
 
 use arrow_array::{
     builder::{
-        BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder, 
StringBuilder,
-        StringViewBuilder, UInt32Builder, UInt64Builder,
+        ArrayBuilder, BinaryBuilder, BinaryViewBuilder, BooleanBuilder, 
Float64Builder,
+        Int64Builder, StringBuilder, StringViewBuilder, UInt32Builder, 
UInt64Builder,
     },
     Array, ArrayRef, ListArray, StructArray,
 };
-use arrow_buffer::{OffsetBuffer, ScalarBuffer};
-use arrow_schema::{ArrowError, DataType};
+use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
+use arrow_schema::ArrowError;
 use std::sync::Arc;
 
+use sedona_schema::raster::BandDataType;
 use sedona_schema::raster::RasterSchema;
 
-use crate::traits::{BandMetadata, MetadataRef};
+use arrow_schema::DataType;
 
-/// Builder for constructing raster arrays with zero-copy band data writing
+/// Builder for constructing N-D raster arrays.
 ///
-/// Required steps to build a raster:
-/// 1. Create a RasterBuilder with a specified capacity
-/// 2. For each raster to add:
-///    - Call `start_raster` with the appropriate metadata, CRS
-///    - For each band in the raster:
-///       - Call `start_band` with the band metadata
-///       - Use `band_data_writer` to get a BinaryViewBuilder and write the 
band data
-///       - Call `finish_band` to complete the band
-///    - Call `finish_raster` to complete the raster
-/// 3. After all rasters are added, call `finish` to get the final StructArray
+/// # Usage
 ///
-/// Example usage:
 /// ```
-/// use sedona_raster::traits::{RasterMetadata, BandMetadata};
-/// use sedona_schema::raster::{StorageType, BandDataType};
 /// use sedona_raster::builder::RasterBuilder;
+/// use sedona_schema::raster::BandDataType;
 ///
 /// let mut builder = RasterBuilder::new(1);
-/// let metadata = RasterMetadata {
-///     width: 100, height: 100,
-///     upperleft_x: 0.0, upperleft_y: 0.0,
-///     scale_x: 1.0, scale_y: -1.0,
-///     skew_x: 0.0, skew_y: 0.0,
-/// };
-/// // Start a raster from RasterMetadata struct
-/// builder.start_raster(&metadata, Some("EPSG:4326")).unwrap();
 ///
-/// // Add a band:
-/// let band_metadata = BandMetadata {
-///     nodata_value: Some(vec![0u8]),
-///     storage_type: StorageType::InDb,
-///     datatype: BandDataType::UInt8,
-///     outdb_url: None,
-///     outdb_band_id: None,
-/// };
-/// builder.start_band(band_metadata).unwrap();
-/// let band_writer = builder.band_data_writer();
-/// band_writer.append_value(&vec![/* band data bytes */]);
-/// builder.finish_band().unwrap();
+/// // 2D raster convenience: sets transform, spatial_dims=["x","y"], 
spatial_shape=[w,h]
+/// builder.start_raster_2d(100, 100, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 
Some("EPSG:4326")).unwrap();
 ///
-/// // Finish the raster
+/// // 2D band convenience: sets dim_names=["y","x"], shape=[h,w], contiguous 
strides
+/// builder.start_band_2d(BandDataType::UInt8, Some(&[0u8])).unwrap();
+/// builder.band_data_writer().append_value(&vec![0u8; 10000]);
+/// builder.finish_band().unwrap();
 /// builder.finish_raster().unwrap();
 ///
-/// // Finish building and get the StructArray
 /// let raster_array = builder.finish().unwrap();
 /// ```
 pub struct RasterBuilder {
-    // Metadata fields
-    width: UInt64Builder,
-    height: UInt64Builder,
-    upper_left_x: Float64Builder,
-    upper_left_y: Float64Builder,
-    scale_x: Float64Builder,
-    scale_y: Float64Builder,
-    skew_x: Float64Builder,
-    skew_y: Float64Builder,
-
-    // CRS field
+    // Top-level raster fields
     crs: StringViewBuilder,
-
-    // Band metadata fields
-    band_nodata: BinaryBuilder,
-    band_storage_type: UInt32Builder,
+    transform_values: Float64Builder,
+    transform_offsets: Vec<i32>,
+    spatial_dims_values: StringViewBuilder,
+    spatial_dims_offsets: Vec<i32>,
+    spatial_shape_values: Int64Builder,
+    spatial_shape_offsets: Vec<i32>,
+
+    // Band fields (flattened across all bands)
+    band_name: StringBuilder,
+    band_dim_names_values: StringBuilder,
+    band_dim_names_offsets: Vec<i32>,
+    band_shape_values: UInt64Builder,
+    band_shape_offsets: Vec<i32>,
     band_datatype: UInt32Builder,
-    band_outdb_url: StringBuilder,
-    band_outdb_band_id: UInt32Builder,
-
-    // Band data field
+    band_nodata: BinaryBuilder,
+    // VIEW field — one entry per visible dimension per band. Stored as four
+    // parallel Int64 columns + a List offset vector; assembled into a
+    // `ListArray<StructArray<Int64,Int64,Int64,Int64>>` in `finish()`.
+    band_view_source_axis_values: Int64Builder,
+    band_view_start_values: Int64Builder,
+    band_view_step_values: Int64Builder,
+    band_view_steps_values: Int64Builder,
+    band_view_offsets: Vec<i32>,
+    // Per-band validity for the view list. `false` means the row is null —
+    // the canonical representation of an identity view. `true` means the row
+    // carries an explicit view in the four parallel value builders.
+    band_view_validity: Vec<bool>,
+    band_outdb_uri: StringBuilder,
+    band_outdb_format: StringViewBuilder,
     band_data: BinaryViewBuilder,
 
     // List structure tracking
     band_offsets: Vec<i32>,  // Track where each raster's bands start/end
     current_band_count: i32, // Track bands in current raster
 
-    raster_validity: BooleanBuilder, // Track which rasters are null
+    // Current raster state (needed for start_band_2d)
+    current_width: u64,
+    current_height: u64,
+
+    // Per-raster validation state: spatial dims/shape and recorded bands so
+    // finish_raster can check every band matches the top-level spatial grid.
+    current_spatial_dims: Vec<String>,
+    current_spatial_shape: Vec<i64>,
+    current_raster_bands: Vec<(Vec<String>, Vec<u64>)>,
+
+    // Track band_data count at the start of each band for finish_band 
validation
+    band_data_count_at_start: usize,
+
+    raster_validity: BooleanBuilder,
 }
 
 impl RasterBuilder {
-    /// Create a new raster builder with the specified capacity
+    /// Create a new raster builder with the specified capacity.
     pub fn new(capacity: usize) -> Self {
         Self {
-            // Metadata builders
-            width: UInt64Builder::with_capacity(capacity),
-            height: UInt64Builder::with_capacity(capacity),
-            upper_left_x: Float64Builder::with_capacity(capacity),
-            upper_left_y: Float64Builder::with_capacity(capacity),
-            scale_x: Float64Builder::with_capacity(capacity),
-            scale_y: Float64Builder::with_capacity(capacity),
-            skew_x: Float64Builder::with_capacity(capacity),
-            skew_y: Float64Builder::with_capacity(capacity),
-
-            // CRS builder
             crs: StringViewBuilder::with_capacity(capacity),
-
-            // Band builders - estimate some bands per raster
-            // The capacity is at raster level, but each raster has multiple 
bands and
-            // are large. We may want to add an optional parameter to control 
expected
-            // bands per raster or even band size in the future
-            band_nodata: BinaryBuilder::with_capacity(capacity, capacity),
-            band_storage_type: UInt32Builder::with_capacity(capacity),
+            transform_values: Float64Builder::with_capacity(capacity * 6),
+            transform_offsets: vec![0],
+            spatial_dims_values: StringViewBuilder::with_capacity(capacity * 
2),
+            spatial_dims_offsets: vec![0],
+            spatial_shape_values: Int64Builder::with_capacity(capacity * 2),
+            spatial_shape_offsets: vec![0],
+
+            band_name: StringBuilder::with_capacity(capacity, capacity),
+            band_dim_names_values: StringBuilder::with_capacity(capacity * 2, 
capacity * 4),
+            band_dim_names_offsets: vec![0],
+            band_shape_values: UInt64Builder::with_capacity(capacity * 2),
+            band_shape_offsets: vec![0],
             band_datatype: UInt32Builder::with_capacity(capacity),
-            band_outdb_url: StringBuilder::with_capacity(capacity, capacity),
-            band_outdb_band_id: UInt32Builder::with_capacity(capacity),
+            band_nodata: BinaryBuilder::with_capacity(capacity, capacity),
+            band_view_source_axis_values: Int64Builder::with_capacity(capacity 
* 2),
+            band_view_start_values: Int64Builder::with_capacity(capacity * 2),
+            band_view_step_values: Int64Builder::with_capacity(capacity * 2),
+            band_view_steps_values: Int64Builder::with_capacity(capacity * 2),
+            band_view_offsets: vec![0],
+            band_view_validity: Vec::with_capacity(capacity),
+            band_outdb_uri: StringBuilder::with_capacity(capacity, capacity),
+            band_outdb_format: StringViewBuilder::with_capacity(capacity),
             band_data: BinaryViewBuilder::with_capacity(capacity),
 
-            // List tracking
             band_offsets: vec![0],
             current_band_count: 0,
+            current_width: 0,
+            current_height: 0,
+
+            current_spatial_dims: Vec::new(),
+            current_spatial_shape: Vec::new(),
+            current_raster_bands: Vec::new(),
+
+            band_data_count_at_start: 0,
 
-            // Raster-level validity (keeps track of null rasters)
             raster_validity: BooleanBuilder::with_capacity(capacity),
         }
     }
 
-    /// Start a new raster with metadata and optional CRS
+    /// Start a new raster with explicit N-D parameters.
+    ///
+    /// `transform` must be a 6-element GDAL GeoTransform:
+    /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+    ///
+    /// `spatial_dims` names the raster-level spatial dimensions (today always
+    /// length 2, e.g. `["x","y"]`). `spatial_shape` gives their sizes in the
+    /// same order. Every band added to this raster must contain each name in
+    /// `spatial_dims` within its own `dim_names`, with matching size.
     pub fn start_raster(
         &mut self,
-        metadata: &dyn MetadataRef,
+        transform: &[f64; 6],
+        spatial_dims: &[&str],
+        spatial_shape: &[i64],
         crs: Option<&str>,
     ) -> Result<(), ArrowError> {
-        self.append_metadata_from_ref(metadata)?;
-        self.append_crs(crs)?;
+        if spatial_dims.len() != spatial_shape.len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "spatial_dims.len() ({}) must equal spatial_shape.len() ({})",
+                spatial_dims.len(),
+                spatial_shape.len()
+            )));
+        }
+
+        // Transform
+        for &v in transform {
+            self.transform_values.append_value(v);
+        }
+        let next = *self.transform_offsets.last().unwrap() + 6;
+        self.transform_offsets.push(next);
+
+        // Spatial dims + shape
+        for d in spatial_dims {
+            self.spatial_dims_values.append_value(d);
+        }
+        let next = *self.spatial_dims_offsets.last().unwrap() + 
spatial_dims.len() as i32;
+        self.spatial_dims_offsets.push(next);
+
+        for &s in spatial_shape {
+            self.spatial_shape_values.append_value(s);
+        }
+        let next = *self.spatial_shape_offsets.last().unwrap() + 
spatial_shape.len() as i32;
+        self.spatial_shape_offsets.push(next);
+
+        // CRS
+        match crs {
+            Some(crs_data) => self.crs.append_value(crs_data),
+            None => self.crs.append_null(),
+        }
 
-        // Reset band count for this raster
         self.current_band_count = 0;
+        self.current_spatial_dims = spatial_dims.iter().map(|s| 
s.to_string()).collect();
+        self.current_spatial_shape = spatial_shape.to_vec();
+        self.current_raster_bands.clear();
+        // Preserve legacy current_width/current_height for start_band_2d (set
+        // by start_raster_2d). Callers using this direct entry point drive
+        // their own shapes via start_band.
+        self.current_width = 0;
+        self.current_height = 0;
+
+        Ok(())
+    }
 
+    /// Convenience: start a 2D raster with the legacy 8-parameter interface.
+    ///
+    /// Sets `spatial_dims=["x","y"]`, `spatial_shape=[width, height]`, and
+    /// builds the 6-element GDAL transform from the individual parameters.
+    #[allow(clippy::too_many_arguments)]
+    pub fn start_raster_2d(
+        &mut self,
+        width: u64,
+        height: u64,
+        origin_x: f64,
+        origin_y: f64,
+        scale_x: f64,
+        scale_y: f64,
+        skew_x: f64,
+        skew_y: f64,
+        crs: Option<&str>,
+    ) -> Result<(), ArrowError> {
+        let transform = [origin_x, scale_x, skew_x, origin_y, skew_y, scale_y];
+        self.start_raster(&transform, &["x", "y"], &[width as i64, height as 
i64], crs)?;
+        self.current_width = width;
+        self.current_height = height;
         Ok(())
     }
 
-    /// Start a new band - this must be called before writing band data
-    pub fn start_band(&mut self, band_metadata: BandMetadata) -> Result<(), 
ArrowError> {
-        // Append band metadata
-        match band_metadata.nodata_value {
-            Some(nodata) => self.band_nodata.append_value(&nodata),
+    /// Start a new band with explicit N-D parameters.
+    ///
+    /// `outdb_uri` is the *location* of the external resource (scheme is
+    /// resolved by an `ObjectStoreRegistry`). `outdb_format` is the *format*
+    /// used to interpret the bytes at that location (e.g. `"geotiff"`,
+    /// `"zarr"`). A null `outdb_format` means the band is in-memory — the
+    /// band's `data` buffer is authoritative.
+    #[allow(clippy::too_many_arguments)]
+    pub fn start_band(
+        &mut self,
+        name: Option<&str>,
+        dim_names: &[&str],
+        shape: &[u64],
+        data_type: BandDataType,
+        nodata: Option<&[u8]>,
+        outdb_uri: Option<&str>,
+        outdb_format: Option<&str>,
+    ) -> Result<(), ArrowError> {
+        if dim_names.is_empty() {
+            return Err(ArrowError::InvalidArgumentError(
+                "start_band: 0-dimensional bands are not supported".into(),
+            ));
+        }
+        if dim_names.len() != shape.len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "start_band: dim_names ({}) and shape ({}) must have the same 
length",
+                dim_names.len(),
+                shape.len(),
+            )));
+        }
+        // Name
+        match name {
+            Some(n) => self.band_name.append_value(n),
+            None => self.band_name.append_null(),
+        }
+
+        // Dim names
+        for dn in dim_names {
+            self.band_dim_names_values.append_value(dn);
+        }
+        let next = *self.band_dim_names_offsets.last().unwrap() + 
dim_names.len() as i32;
+        self.band_dim_names_offsets.push(next);
+
+        // Shape
+        for &s in shape {
+            self.band_shape_values.append_value(s);
+        }
+        let next = *self.band_shape_offsets.last().unwrap() + shape.len() as 
i32;
+        self.band_shape_offsets.push(next);
+
+        // Data type
+        self.band_datatype.append_value(data_type as u32);
+
+        // Nodata
+        match nodata {
+            Some(nodata_bytes) => self.band_nodata.append_value(nodata_bytes),
             None => self.band_nodata.append_null(),
         }
 
-        self.band_storage_type
-            .append_value(band_metadata.storage_type as u32);
-        self.band_datatype
-            .append_value(band_metadata.datatype as u32);
+        // VIEW: canonical identity is encoded as a null list entry — no
+        // values appended, offset unchanged, validity bit cleared.
+        let next = *self.band_view_offsets.last().unwrap();
+        self.band_view_offsets.push(next);
+        self.band_view_validity.push(false);
 
-        match band_metadata.outdb_url {
-            Some(url) => self.band_outdb_url.append_value(&url),
-            None => self.band_outdb_url.append_null(),
+        // OutDb URI
+        match outdb_uri {
+            Some(uri) => self.band_outdb_uri.append_value(uri),
+            None => self.band_outdb_uri.append_null(),
         }
 
-        match band_metadata.outdb_band_id {
-            Some(band_id) => self.band_outdb_band_id.append_value(band_id),
-            None => self.band_outdb_band_id.append_null(),
+        // OutDb format
+        match outdb_format {
+            Some(format) => self.band_outdb_format.append_value(format),
+            None => self.band_outdb_format.append_null(),
         }
 
         self.current_band_count += 1;
+        self.band_data_count_at_start = self.band_data.len();
+
+        // Record this band's dims/shape for strict validation at 
finish_raster.
+        self.current_raster_bands.push((
+            dim_names.iter().map(|s| s.to_string()).collect(),
+            shape.to_vec(),
+        ));
 
         Ok(())
     }
 
-    /// Get direct access to the BinaryViewBuilder for writing the current 
band's data
-    /// Must be called after start_band() to write data to the current band
+    /// Convenience: start a 2D band with `dim_names=["y","x"]` and 
`shape=[height, width]`.
+    ///
+    /// Must be called after `start_raster_2d` which sets the current 
width/height.
+    pub fn start_band_2d(

Review Comment:
   renamed the *_2d methods back to their original name.
   
   
   I made a note to myself to come back later and give them a _2d suffix. I 
think its good to give them a clear name but more important to right now keep 
the code churn lower on this PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to