Re: [PR] feat: Add array data type support [fluss-rust]

via GitHub Sun, 08 Mar 2026 18:47:19 -0700


fresh-borzoni commented on code in PR #433:
URL: https://github.com/apache/fluss-rust/pull/433#discussion_r2902724242



##########
crates/fluss/src/row/binary_array.rs:
##########
@@ -0,0 +1,734 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary array format matching Java's `BinaryArray.java` layout.
+//!
+//! Binary layout:
+//! ```text
+//! [size(4B)] + [null bits (4-byte word aligned)] + [fixed-length part] + 
[variable-length part]
+//! ```
+//!
+//! Java reference: `BinaryArray.java`, `BinaryArrayWriter.java`
+
+use crate::error::Error::IllegalArgument;
+use crate::error::Result;
+use crate::metadata::DataType;
+use crate::row::Decimal;
+use crate::row::datum::{Date, Time, TimestampLtz, TimestampNtz};
+use serde::Serialize;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+const MAX_FIX_PART_DATA_SIZE: usize = 7;
+const HIGHEST_FIRST_BIT: u64 = 0x80_u64 << 56;
+const HIGHEST_SECOND_TO_EIGHTH_BIT: u64 = 0x7F_u64 << 56;
+
+/// Calculates the header size in bytes: 4 (for element count) + null bits 
(4-byte word aligned).
+/// Matches Java's `BinaryArray.calculateHeaderInBytes(numFields)`.
+pub fn calculate_header_in_bytes(num_elements: usize) -> usize {
+    4 + num_elements.div_ceil(32) * 4
+}
+
+/// Calculates the fixed-length part size per element for a given data type.
+/// Matches Java's `BinaryArray.calculateFixLengthPartSize(DataType)`.
+pub fn calculate_fix_length_part_size(element_type: &DataType) -> usize {
+    match element_type {
+        DataType::Boolean(_) | DataType::TinyInt(_) => 1,
+        DataType::SmallInt(_) => 2,
+        DataType::Int(_) | DataType::Float(_) | DataType::Date(_) | 
DataType::Time(_) => 4,
+        DataType::BigInt(_)
+        | DataType::Double(_)
+        | DataType::Char(_)
+        | DataType::String(_)
+        | DataType::Binary(_)
+        | DataType::Bytes(_)
+        | DataType::Decimal(_)
+        | DataType::Timestamp(_)
+        | DataType::TimestampLTz(_)
+        | DataType::Array(_)
+        | DataType::Map(_)
+        | DataType::Row(_) => 8,
+    }
+}
+
+/// Rounds a byte count up to the nearest 8-byte word boundary.
+/// Matches Java's `roundNumberOfBytesToNearestWord`.
+fn round_to_nearest_word(num_bytes: usize) -> usize {
+    (num_bytes + 7) & !7
+}
+
+/// A Fluss binary array, wire-compatible with Java's `BinaryArray`.
+///
+/// Stores elements in a flat byte buffer with a header (element count + null 
bitmap)
+/// followed by fixed-length slots and an optional variable-length section.
+#[derive(Clone)]
+pub struct FlussArray {
+    data: Vec<u8>,
+    size: usize,
+    element_offset: usize,
+}
+
+impl fmt::Debug for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlussArray")
+            .field("size", &self.size)
+            .field("data_len", &self.data.len())
+            .finish()
+    }
+}
+
+impl fmt::Display for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "FlussArray[size={}]", self.size)
+    }
+}
+
+impl PartialEq for FlussArray {
+    fn eq(&self, other: &Self) -> bool {
+        self.data == other.data
+    }
+}
+
+impl Eq for FlussArray {}
+
+impl PartialOrd for FlussArray {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FlussArray {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.data.cmp(&other.data)
+    }
+}
+
+impl Hash for FlussArray {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.data.hash(state);
+    }
+}
+
+impl Serialize for FlussArray {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, 
S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_bytes(&self.data)
+    }
+}
+
+impl FlussArray {
+    /// Creates a FlussArray from a byte slice (copies data into owned 
storage).
+    pub fn from_bytes(data: &[u8]) -> Result<Self> {
+        if data.len() < 4 {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray data too short: need at least 4 bytes, got {}",
+                    data.len()
+                ),
+            });
+        }
+        let raw_size = i32::from_ne_bytes(data[0..4].try_into().unwrap());
+        if raw_size < 0 {
+            return Err(IllegalArgument {
+                message: format!("FlussArray size must be non-negative, got 
{raw_size}"),
+            });
+        }
+        let size = raw_size as usize;
+        let element_offset = calculate_header_in_bytes(size);
+        if element_offset > data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray header exceeds payload: header={}, payload={}",
+                    element_offset,
+                    data.len()
+                ),
+            });
+        }
+
+        Ok(FlussArray {
+            data: data.to_vec(),
+            size,
+            element_offset,
+        })
+    }
+
+    /// Returns the number of elements.
+    pub fn size(&self) -> usize {
+        self.size
+    }
+
+    /// Returns the raw bytes of this array (the complete binary 
representation).
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.data
+    }
+
+    /// Returns true if the element at position `pos` is null.
+    pub fn is_null_at(&self, pos: usize) -> bool {
+        let byte_index = pos >> 3;
+        let bit = pos & 7;
+        (self.data[4 + byte_index] & (1u8 << bit)) != 0
+    }
+
+    fn element_offset(&self, ordinal: usize, element_size: usize) -> usize {
+        self.element_offset + ordinal * element_size
+    }
+
+    fn checked_slice(&self, start: usize, len: usize, context: &str) -> 
Result<&[u8]> {
+        let end = start.checked_add(len).ok_or_else(|| IllegalArgument {
+            message: format!("Overflow while reading {context}: start={start}, 
len={len}"),
+        })?;
+        if end > self.data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "Out-of-bounds while reading {context}: start={start}, 
len={len}, payload={}",
+                    self.data.len()
+                ),
+            });
+        }
+        Ok(&self.data[start..end])
+    }
+
+    fn read_var_len_bytes(&self, pos: usize) -> Result<&[u8]> {
+        let field_offset = self.element_offset(pos, 8);
+        let packed = self.get_long(pos) as u64;
+        let mark = packed & HIGHEST_FIRST_BIT;
+
+        if mark == 0 {
+            let offset = (packed >> 32) as usize;
+            let len = (packed & 0xFFFF_FFFF) as usize;
+            self.checked_slice(offset, len, "variable-length array element")
+        } else {
+            let len = ((packed & HIGHEST_SECOND_TO_EIGHTH_BIT) >> 56) as usize;
+            if len > MAX_FIX_PART_DATA_SIZE {
+                return Err(IllegalArgument {
+                    message: format!(
+                        "Inline array element length must be <= 
{MAX_FIX_PART_DATA_SIZE}, got {len}"
+                    ),
+                });
+            }
+            // Java stores inline bytes in the 8-byte slot itself.
+            // On little-endian, bytes start at field_offset; on big-endian 
they start at +1.
+            let start = if cfg!(target_endian = "little") {
+                field_offset
+            } else {
+                field_offset + 1
+            };
+            self.checked_slice(start, len, "inline array element")
+        }
+    }
+
+    pub fn get_boolean(&self, pos: usize) -> bool {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] != 0
+    }
+
+    pub fn get_byte(&self, pos: usize) -> i8 {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] as i8
+    }
+
+    pub fn get_short(&self, pos: usize) -> i16 {
+        let offset = self.element_offset(pos, 2);
+        i16::from_ne_bytes(self.data[offset..offset + 2].try_into().unwrap())

Review Comment:
   ditto and all other cases in this file



##########
crates/fluss/src/row/mod.rs:
##########
@@ -273,6 +278,15 @@ impl<'a> InternalRow for GenericRow<'a> {
             }),
         }
     }
+
+    fn get_array(&self, pos: usize) -> Result<FlussArray> {
+        match self.get_value(pos)? {
+            Datum::Array(a) => Ok(a.clone()),

Review Comment:
   it clones on evert call, mb we wish to use Bytes to zero-copy?



##########
crates/fluss/src/row/column.rs:
##########
@@ -407,17 +410,379 @@ impl InternalRow for ColumnarRow {
             })?
             .value(self.row_id))
     }
+
+    fn get_array(&self, pos: usize) -> Result<crate::row::FlussArray> {
+        use crate::record::from_arrow_type;
+        use crate::row::binary_array::FlussArrayWriter;
+
+        let column = self.column(pos)?;
+        let list_array =
+            column
+                .as_any()
+                .downcast_ref::<ListArray>()
+                .ok_or_else(|| IllegalArgument {
+                    message: format!("expected List array at position {pos}"),
+                })?;
+
+        let values = list_array.value(self.row_id);
+        let element_fluss_type = from_arrow_type(values.data_type())?;
+        let mut writer = FlussArrayWriter::new(values.len(), 
&element_fluss_type);
+
+        write_arrow_values_to_fluss_array(&*values, &element_fluss_type, &mut 
writer)?;
+        writer.complete()
+    }
+}
+
+/// Downcast to a primitive Arrow array type, then loop with null checks 
calling a writer method.
+macro_rules! write_primitive_elements {
+    ($values:expr, $arrow_type:ty, $element_type:expr, $writer:expr, 
$write_method:ident) => {{
+        let arr = $values
+            .as_primitive_opt::<$arrow_type>()
+            .ok_or_else(|| IllegalArgument {
+                message: format!(
+                    "Expected {} for {:?} element",
+                    stringify!($arrow_type),
+                    $element_type
+                ),
+            })?;
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                $writer.set_null_at(i);
+            } else {
+                $writer.$write_method(i, arr.value(i));
+            }
+        }
+    }};
+}
+
+/// Downcast via `downcast_ref`, then loop with null checks calling a writer 
method.
+macro_rules! write_downcast_elements {
+    ($values:expr, $array_type:ty, $element_type:expr, $writer:expr, 
$write_method:ident) => {{
+        let arr = $values
+            .as_any()
+            .downcast_ref::<$array_type>()
+            .ok_or_else(|| IllegalArgument {
+                message: format!(
+                    "Expected {} for {:?} element",
+                    stringify!($array_type),
+                    $element_type
+                ),
+            })?;
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                $writer.set_null_at(i);
+            } else {
+                $writer.$write_method(i, arr.value(i));
+            }
+        }
+    }};
+}
+
+/// Converts all elements of an Arrow array into a `FlussArrayWriter`, 
downcasting
+/// the Arrow array once per call rather than per element.
+fn write_arrow_values_to_fluss_array(
+    values: &dyn Array,
+    element_type: &crate::metadata::DataType,
+    writer: &mut crate::row::binary_array::FlussArrayWriter,
+) -> Result<()> {
+    use crate::metadata::DataType;
+    use crate::record::from_arrow_type;
+    use crate::row::binary_array::FlussArrayWriter;
+
+    let len = values.len();
+
+    match element_type {
+        DataType::Boolean(_) => {
+            write_downcast_elements!(values, BooleanArray, element_type, 
writer, write_boolean)
+        }
+        DataType::TinyInt(_) => {
+            write_primitive_elements!(values, Int8Type, element_type, writer, 
write_byte)
+        }
+        DataType::SmallInt(_) => {
+            write_primitive_elements!(values, Int16Type, element_type, writer, 
write_short)
+        }
+        DataType::Int(_) => {
+            write_primitive_elements!(values, Int32Type, element_type, writer, 
write_int)
+        }
+        DataType::BigInt(_) => {
+            write_primitive_elements!(values, Int64Type, element_type, writer, 
write_long)
+        }
+        DataType::Float(_) => {
+            write_primitive_elements!(values, Float32Type, element_type, 
writer, write_float)
+        }
+        DataType::Double(_) => {
+            write_primitive_elements!(values, Float64Type, element_type, 
writer, write_double)
+        }
+        DataType::Char(_) | DataType::String(_) => {
+            write_downcast_elements!(values, StringArray, element_type, 
writer, write_string)
+        }
+        DataType::Binary(_) => {
+            write_downcast_elements!(
+                values,
+                FixedSizeBinaryArray,
+                element_type,
+                writer,
+                write_binary_bytes
+            )
+        }
+        DataType::Bytes(_) => {
+            write_downcast_elements!(
+                values,
+                BinaryArray,
+                element_type,
+                writer,
+                write_binary_bytes
+            )
+        }
+        DataType::Decimal(dt) => {
+            let arr =
+                values
+                    .as_primitive_opt::<Decimal128Type>()
+                    .ok_or_else(|| IllegalArgument {
+                        message: format!("Expected Decimal128Array for 
{element_type:?} element"),
+                    })?;
+            let arrow_scale = match values.data_type() {
+                ArrowDataType::Decimal128(_p, s) => *s as i64,
+                other => {
+                    return Err(IllegalArgument {
+                        message: format!(
+                            "Expected Decimal128 data type for 
{element_type:?} element, got {other:?}"
+                        ),
+                    });
+                }
+            };
+            let precision = dt.precision();
+            let scale = dt.scale();
+            for i in 0..len {
+                if arr.is_null(i) {
+                    writer.set_null_at(i);
+                } else {
+                    let d = crate::row::Decimal::from_arrow_decimal128(
+                        arr.value(i),
+                        arrow_scale,
+                        precision,
+                        scale,
+                    )?;
+                    writer.write_decimal(i, &d, precision);
+                }
+            }
+        }
+        DataType::Date(_) => {
+            let arr = values
+                .as_primitive_opt::<Date32Type>()
+                .ok_or_else(|| IllegalArgument {
+                    message: format!("Expected Date32Array for 
{element_type:?} element"),
+                })?;
+            for i in 0..len {
+                if arr.is_null(i) {
+                    writer.set_null_at(i);
+                } else {
+                    writer.write_date(i, Date::new(arr.value(i)));
+                }
+            }
+        }
+        DataType::Time(_) => {
+            write_time_elements(values, element_type, writer)?;
+        }
+        DataType::Timestamp(ts_type) => {
+            write_timestamp_elements(
+                values,
+                element_type,
+                writer,
+                ts_type.precision(),
+                TimestampNtz::new,
+                TimestampNtz::from_millis_nanos,
+                |w, i, ts, p| w.write_timestamp_ntz(i, &ts, p),
+            )?;
+        }
+        DataType::TimestampLTz(ts_type) => {
+            write_timestamp_elements(
+                values,
+                element_type,
+                writer,
+                ts_type.precision(),
+                TimestampLtz::new,
+                TimestampLtz::from_millis_nanos,
+                |w, i, ts, p| w.write_timestamp_ltz(i, &ts, p),
+            )?;
+        }
+        DataType::Array(_) => {
+            let list_arr =
+                values
+                    .as_any()
+                    .downcast_ref::<ListArray>()
+                    .ok_or_else(|| IllegalArgument {
+                        message: format!("Expected ListArray for 
{element_type:?} element"),
+                    })?;
+            for i in 0..len {
+                if list_arr.is_null(i) {
+                    writer.set_null_at(i);
+                } else {
+                    let nested_values = list_arr.value(i);
+                    let nested_element_type = 
from_arrow_type(nested_values.data_type())?;

Review Comment:
   we allocate DataType in a loop, it's wasteful. Shall we hoist it?



##########
crates/fluss/src/row/encode/compacted_key_encoder.rs:
##########


Review Comment:
   `+ 1`  it seems that server rejects arrays as primary key, but encoding is 
allowed



##########
crates/fluss/src/row/binary_array.rs:
##########
@@ -0,0 +1,734 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary array format matching Java's `BinaryArray.java` layout.
+//!
+//! Binary layout:
+//! ```text
+//! [size(4B)] + [null bits (4-byte word aligned)] + [fixed-length part] + 
[variable-length part]
+//! ```
+//!
+//! Java reference: `BinaryArray.java`, `BinaryArrayWriter.java`
+
+use crate::error::Error::IllegalArgument;
+use crate::error::Result;
+use crate::metadata::DataType;
+use crate::row::Decimal;
+use crate::row::datum::{Date, Time, TimestampLtz, TimestampNtz};
+use serde::Serialize;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+const MAX_FIX_PART_DATA_SIZE: usize = 7;
+const HIGHEST_FIRST_BIT: u64 = 0x80_u64 << 56;
+const HIGHEST_SECOND_TO_EIGHTH_BIT: u64 = 0x7F_u64 << 56;
+
+/// Calculates the header size in bytes: 4 (for element count) + null bits 
(4-byte word aligned).
+/// Matches Java's `BinaryArray.calculateHeaderInBytes(numFields)`.
+pub fn calculate_header_in_bytes(num_elements: usize) -> usize {
+    4 + num_elements.div_ceil(32) * 4
+}
+
+/// Calculates the fixed-length part size per element for a given data type.
+/// Matches Java's `BinaryArray.calculateFixLengthPartSize(DataType)`.
+pub fn calculate_fix_length_part_size(element_type: &DataType) -> usize {
+    match element_type {
+        DataType::Boolean(_) | DataType::TinyInt(_) => 1,
+        DataType::SmallInt(_) => 2,
+        DataType::Int(_) | DataType::Float(_) | DataType::Date(_) | 
DataType::Time(_) => 4,
+        DataType::BigInt(_)
+        | DataType::Double(_)
+        | DataType::Char(_)
+        | DataType::String(_)
+        | DataType::Binary(_)
+        | DataType::Bytes(_)
+        | DataType::Decimal(_)
+        | DataType::Timestamp(_)
+        | DataType::TimestampLTz(_)
+        | DataType::Array(_)
+        | DataType::Map(_)
+        | DataType::Row(_) => 8,
+    }
+}
+
+/// Rounds a byte count up to the nearest 8-byte word boundary.
+/// Matches Java's `roundNumberOfBytesToNearestWord`.
+fn round_to_nearest_word(num_bytes: usize) -> usize {
+    (num_bytes + 7) & !7
+}
+
+/// A Fluss binary array, wire-compatible with Java's `BinaryArray`.
+///
+/// Stores elements in a flat byte buffer with a header (element count + null 
bitmap)
+/// followed by fixed-length slots and an optional variable-length section.
+#[derive(Clone)]
+pub struct FlussArray {
+    data: Vec<u8>,
+    size: usize,
+    element_offset: usize,
+}
+
+impl fmt::Debug for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlussArray")
+            .field("size", &self.size)
+            .field("data_len", &self.data.len())
+            .finish()
+    }
+}
+
+impl fmt::Display for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "FlussArray[size={}]", self.size)
+    }
+}
+
+impl PartialEq for FlussArray {
+    fn eq(&self, other: &Self) -> bool {
+        self.data == other.data
+    }
+}
+
+impl Eq for FlussArray {}
+
+impl PartialOrd for FlussArray {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FlussArray {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.data.cmp(&other.data)
+    }
+}
+
+impl Hash for FlussArray {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.data.hash(state);
+    }
+}
+
+impl Serialize for FlussArray {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, 
S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_bytes(&self.data)
+    }
+}
+
+impl FlussArray {
+    /// Creates a FlussArray from a byte slice (copies data into owned 
storage).
+    pub fn from_bytes(data: &[u8]) -> Result<Self> {
+        if data.len() < 4 {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray data too short: need at least 4 bytes, got {}",
+                    data.len()
+                ),
+            });
+        }
+        let raw_size = i32::from_ne_bytes(data[0..4].try_into().unwrap());
+        if raw_size < 0 {
+            return Err(IllegalArgument {
+                message: format!("FlussArray size must be non-negative, got 
{raw_size}"),
+            });
+        }
+        let size = raw_size as usize;
+        let element_offset = calculate_header_in_bytes(size);
+        if element_offset > data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray header exceeds payload: header={}, payload={}",
+                    element_offset,
+                    data.len()
+                ),
+            });
+        }
+
+        Ok(FlussArray {
+            data: data.to_vec(),
+            size,
+            element_offset,
+        })
+    }
+
+    /// Returns the number of elements.
+    pub fn size(&self) -> usize {
+        self.size
+    }
+
+    /// Returns the raw bytes of this array (the complete binary 
representation).
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.data
+    }
+
+    /// Returns true if the element at position `pos` is null.
+    pub fn is_null_at(&self, pos: usize) -> bool {
+        let byte_index = pos >> 3;
+        let bit = pos & 7;
+        (self.data[4 + byte_index] & (1u8 << bit)) != 0
+    }
+
+    fn element_offset(&self, ordinal: usize, element_size: usize) -> usize {
+        self.element_offset + ordinal * element_size
+    }
+
+    fn checked_slice(&self, start: usize, len: usize, context: &str) -> 
Result<&[u8]> {
+        let end = start.checked_add(len).ok_or_else(|| IllegalArgument {
+            message: format!("Overflow while reading {context}: start={start}, 
len={len}"),
+        })?;
+        if end > self.data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "Out-of-bounds while reading {context}: start={start}, 
len={len}, payload={}",
+                    self.data.len()
+                ),
+            });
+        }
+        Ok(&self.data[start..end])
+    }
+
+    fn read_var_len_bytes(&self, pos: usize) -> Result<&[u8]> {
+        let field_offset = self.element_offset(pos, 8);
+        let packed = self.get_long(pos) as u64;
+        let mark = packed & HIGHEST_FIRST_BIT;
+
+        if mark == 0 {
+            let offset = (packed >> 32) as usize;
+            let len = (packed & 0xFFFF_FFFF) as usize;
+            self.checked_slice(offset, len, "variable-length array element")
+        } else {
+            let len = ((packed & HIGHEST_SECOND_TO_EIGHTH_BIT) >> 56) as usize;
+            if len > MAX_FIX_PART_DATA_SIZE {
+                return Err(IllegalArgument {
+                    message: format!(
+                        "Inline array element length must be <= 
{MAX_FIX_PART_DATA_SIZE}, got {len}"
+                    ),
+                });
+            }
+            // Java stores inline bytes in the 8-byte slot itself.
+            // On little-endian, bytes start at field_offset; on big-endian 
they start at +1.
+            let start = if cfg!(target_endian = "little") {
+                field_offset
+            } else {
+                field_offset + 1
+            };
+            self.checked_slice(start, len, "inline array element")
+        }
+    }
+
+    pub fn get_boolean(&self, pos: usize) -> bool {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] != 0
+    }
+
+    pub fn get_byte(&self, pos: usize) -> i8 {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] as i8
+    }
+
+    pub fn get_short(&self, pos: usize) -> i16 {
+        let offset = self.element_offset(pos, 2);
+        i16::from_ne_bytes(self.data[offset..offset + 2].try_into().unwrap())
+    }
+
+    pub fn get_int(&self, pos: usize) -> i32 {
+        let offset = self.element_offset(pos, 4);
+        i32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap())
+    }
+
+    pub fn get_long(&self, pos: usize) -> i64 {
+        let offset = self.element_offset(pos, 8);
+        i64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap())
+    }
+
+    pub fn get_float(&self, pos: usize) -> f32 {
+        let offset = self.element_offset(pos, 4);
+        f32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap())
+    }
+
+    pub fn get_double(&self, pos: usize) -> f64 {
+        let offset = self.element_offset(pos, 8);
+        f64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap())
+    }
+
+    /// Reads the offset_and_size packed long for variable-length elements.
+    fn get_offset_and_size(&self, pos: usize) -> (usize, usize) {
+        let packed = self.get_long(pos) as u64;
+        let offset = (packed >> 32) as usize;
+        let size = (packed & 0xFFFF_FFFF) as usize;
+        (offset, size)
+    }
+
+    pub fn get_string(&self, pos: usize) -> Result<&str> {

Review Comment:
   why do we return Result in half of the getters and half of the methods go 
unchecked?
   I doubt that Java is the same half-way, pls, check



##########
crates/fluss/src/row/binary_array.rs:
##########
@@ -0,0 +1,734 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary array format matching Java's `BinaryArray.java` layout.
+//!
+//! Binary layout:
+//! ```text
+//! [size(4B)] + [null bits (4-byte word aligned)] + [fixed-length part] + 
[variable-length part]
+//! ```
+//!
+//! Java reference: `BinaryArray.java`, `BinaryArrayWriter.java`
+
+use crate::error::Error::IllegalArgument;
+use crate::error::Result;
+use crate::metadata::DataType;
+use crate::row::Decimal;
+use crate::row::datum::{Date, Time, TimestampLtz, TimestampNtz};
+use serde::Serialize;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+const MAX_FIX_PART_DATA_SIZE: usize = 7;
+const HIGHEST_FIRST_BIT: u64 = 0x80_u64 << 56;
+const HIGHEST_SECOND_TO_EIGHTH_BIT: u64 = 0x7F_u64 << 56;
+
+/// Calculates the header size in bytes: 4 (for element count) + null bits 
(4-byte word aligned).
+/// Matches Java's `BinaryArray.calculateHeaderInBytes(numFields)`.
+pub fn calculate_header_in_bytes(num_elements: usize) -> usize {
+    4 + num_elements.div_ceil(32) * 4
+}
+
+/// Calculates the fixed-length part size per element for a given data type.
+/// Matches Java's `BinaryArray.calculateFixLengthPartSize(DataType)`.
+pub fn calculate_fix_length_part_size(element_type: &DataType) -> usize {
+    match element_type {
+        DataType::Boolean(_) | DataType::TinyInt(_) => 1,
+        DataType::SmallInt(_) => 2,
+        DataType::Int(_) | DataType::Float(_) | DataType::Date(_) | 
DataType::Time(_) => 4,
+        DataType::BigInt(_)
+        | DataType::Double(_)
+        | DataType::Char(_)
+        | DataType::String(_)
+        | DataType::Binary(_)
+        | DataType::Bytes(_)
+        | DataType::Decimal(_)
+        | DataType::Timestamp(_)
+        | DataType::TimestampLTz(_)
+        | DataType::Array(_)
+        | DataType::Map(_)
+        | DataType::Row(_) => 8,
+    }
+}
+
+/// Rounds a byte count up to the nearest 8-byte word boundary.
+/// Matches Java's `roundNumberOfBytesToNearestWord`.
+fn round_to_nearest_word(num_bytes: usize) -> usize {
+    (num_bytes + 7) & !7
+}
+
+/// A Fluss binary array, wire-compatible with Java's `BinaryArray`.
+///
+/// Stores elements in a flat byte buffer with a header (element count + null 
bitmap)
+/// followed by fixed-length slots and an optional variable-length section.
+#[derive(Clone)]
+pub struct FlussArray {
+    data: Vec<u8>,
+    size: usize,
+    element_offset: usize,
+}
+
+impl fmt::Debug for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlussArray")
+            .field("size", &self.size)
+            .field("data_len", &self.data.len())
+            .finish()
+    }
+}
+
+impl fmt::Display for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "FlussArray[size={}]", self.size)
+    }
+}
+
+impl PartialEq for FlussArray {
+    fn eq(&self, other: &Self) -> bool {
+        self.data == other.data
+    }
+}
+
+impl Eq for FlussArray {}
+
+impl PartialOrd for FlussArray {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FlussArray {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.data.cmp(&other.data)
+    }
+}
+
+impl Hash for FlussArray {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.data.hash(state);
+    }
+}
+
+impl Serialize for FlussArray {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, 
S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_bytes(&self.data)
+    }
+}
+
+impl FlussArray {
+    /// Creates a FlussArray from a byte slice (copies data into owned 
storage).
+    pub fn from_bytes(data: &[u8]) -> Result<Self> {
+        if data.len() < 4 {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray data too short: need at least 4 bytes, got {}",
+                    data.len()
+                ),
+            });
+        }
+        let raw_size = i32::from_ne_bytes(data[0..4].try_into().unwrap());

Review Comment:
   I think Java uses LE



##########
crates/fluss/src/row/binary_array.rs:
##########
@@ -0,0 +1,734 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary array format matching Java's `BinaryArray.java` layout.
+//!
+//! Binary layout:
+//! ```text
+//! [size(4B)] + [null bits (4-byte word aligned)] + [fixed-length part] + 
[variable-length part]
+//! ```
+//!
+//! Java reference: `BinaryArray.java`, `BinaryArrayWriter.java`
+
+use crate::error::Error::IllegalArgument;
+use crate::error::Result;
+use crate::metadata::DataType;
+use crate::row::Decimal;
+use crate::row::datum::{Date, Time, TimestampLtz, TimestampNtz};
+use serde::Serialize;
+use std::fmt;
+use std::hash::{Hash, Hasher};
+
+const MAX_FIX_PART_DATA_SIZE: usize = 7;
+const HIGHEST_FIRST_BIT: u64 = 0x80_u64 << 56;
+const HIGHEST_SECOND_TO_EIGHTH_BIT: u64 = 0x7F_u64 << 56;
+
+/// Calculates the header size in bytes: 4 (for element count) + null bits 
(4-byte word aligned).
+/// Matches Java's `BinaryArray.calculateHeaderInBytes(numFields)`.
+pub fn calculate_header_in_bytes(num_elements: usize) -> usize {
+    4 + num_elements.div_ceil(32) * 4
+}
+
+/// Calculates the fixed-length part size per element for a given data type.
+/// Matches Java's `BinaryArray.calculateFixLengthPartSize(DataType)`.
+pub fn calculate_fix_length_part_size(element_type: &DataType) -> usize {
+    match element_type {
+        DataType::Boolean(_) | DataType::TinyInt(_) => 1,
+        DataType::SmallInt(_) => 2,
+        DataType::Int(_) | DataType::Float(_) | DataType::Date(_) | 
DataType::Time(_) => 4,
+        DataType::BigInt(_)
+        | DataType::Double(_)
+        | DataType::Char(_)
+        | DataType::String(_)
+        | DataType::Binary(_)
+        | DataType::Bytes(_)
+        | DataType::Decimal(_)
+        | DataType::Timestamp(_)
+        | DataType::TimestampLTz(_)
+        | DataType::Array(_)
+        | DataType::Map(_)
+        | DataType::Row(_) => 8,
+    }
+}
+
+/// Rounds a byte count up to the nearest 8-byte word boundary.
+/// Matches Java's `roundNumberOfBytesToNearestWord`.
+fn round_to_nearest_word(num_bytes: usize) -> usize {
+    (num_bytes + 7) & !7
+}
+
+/// A Fluss binary array, wire-compatible with Java's `BinaryArray`.
+///
+/// Stores elements in a flat byte buffer with a header (element count + null 
bitmap)
+/// followed by fixed-length slots and an optional variable-length section.
+#[derive(Clone)]
+pub struct FlussArray {
+    data: Vec<u8>,
+    size: usize,
+    element_offset: usize,
+}
+
+impl fmt::Debug for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlussArray")
+            .field("size", &self.size)
+            .field("data_len", &self.data.len())
+            .finish()
+    }
+}
+
+impl fmt::Display for FlussArray {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "FlussArray[size={}]", self.size)
+    }
+}
+
+impl PartialEq for FlussArray {
+    fn eq(&self, other: &Self) -> bool {
+        self.data == other.data
+    }
+}
+
+impl Eq for FlussArray {}
+
+impl PartialOrd for FlussArray {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FlussArray {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.data.cmp(&other.data)
+    }
+}
+
+impl Hash for FlussArray {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.data.hash(state);
+    }
+}
+
+impl Serialize for FlussArray {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, 
S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_bytes(&self.data)
+    }
+}
+
+impl FlussArray {
+    /// Creates a FlussArray from a byte slice (copies data into owned 
storage).
+    pub fn from_bytes(data: &[u8]) -> Result<Self> {
+        if data.len() < 4 {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray data too short: need at least 4 bytes, got {}",
+                    data.len()
+                ),
+            });
+        }
+        let raw_size = i32::from_ne_bytes(data[0..4].try_into().unwrap());
+        if raw_size < 0 {
+            return Err(IllegalArgument {
+                message: format!("FlussArray size must be non-negative, got 
{raw_size}"),
+            });
+        }
+        let size = raw_size as usize;
+        let element_offset = calculate_header_in_bytes(size);
+        if element_offset > data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "FlussArray header exceeds payload: header={}, payload={}",
+                    element_offset,
+                    data.len()
+                ),
+            });
+        }
+
+        Ok(FlussArray {
+            data: data.to_vec(),
+            size,
+            element_offset,
+        })
+    }
+
+    /// Returns the number of elements.
+    pub fn size(&self) -> usize {
+        self.size
+    }
+
+    /// Returns the raw bytes of this array (the complete binary 
representation).
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.data
+    }
+
+    /// Returns true if the element at position `pos` is null.
+    pub fn is_null_at(&self, pos: usize) -> bool {
+        let byte_index = pos >> 3;
+        let bit = pos & 7;
+        (self.data[4 + byte_index] & (1u8 << bit)) != 0
+    }
+
+    fn element_offset(&self, ordinal: usize, element_size: usize) -> usize {
+        self.element_offset + ordinal * element_size
+    }
+
+    fn checked_slice(&self, start: usize, len: usize, context: &str) -> 
Result<&[u8]> {
+        let end = start.checked_add(len).ok_or_else(|| IllegalArgument {
+            message: format!("Overflow while reading {context}: start={start}, 
len={len}"),
+        })?;
+        if end > self.data.len() {
+            return Err(IllegalArgument {
+                message: format!(
+                    "Out-of-bounds while reading {context}: start={start}, 
len={len}, payload={}",
+                    self.data.len()
+                ),
+            });
+        }
+        Ok(&self.data[start..end])
+    }
+
+    fn read_var_len_bytes(&self, pos: usize) -> Result<&[u8]> {
+        let field_offset = self.element_offset(pos, 8);
+        let packed = self.get_long(pos) as u64;
+        let mark = packed & HIGHEST_FIRST_BIT;
+
+        if mark == 0 {
+            let offset = (packed >> 32) as usize;
+            let len = (packed & 0xFFFF_FFFF) as usize;
+            self.checked_slice(offset, len, "variable-length array element")
+        } else {
+            let len = ((packed & HIGHEST_SECOND_TO_EIGHTH_BIT) >> 56) as usize;
+            if len > MAX_FIX_PART_DATA_SIZE {
+                return Err(IllegalArgument {
+                    message: format!(
+                        "Inline array element length must be <= 
{MAX_FIX_PART_DATA_SIZE}, got {len}"
+                    ),
+                });
+            }
+            // Java stores inline bytes in the 8-byte slot itself.
+            // On little-endian, bytes start at field_offset; on big-endian 
they start at +1.
+            let start = if cfg!(target_endian = "little") {
+                field_offset
+            } else {
+                field_offset + 1
+            };
+            self.checked_slice(start, len, "inline array element")
+        }
+    }
+
+    pub fn get_boolean(&self, pos: usize) -> bool {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] != 0
+    }
+
+    pub fn get_byte(&self, pos: usize) -> i8 {
+        let offset = self.element_offset(pos, 1);
+        self.data[offset] as i8
+    }
+
+    pub fn get_short(&self, pos: usize) -> i16 {
+        let offset = self.element_offset(pos, 2);
+        i16::from_ne_bytes(self.data[offset..offset + 2].try_into().unwrap())
+    }
+
+    pub fn get_int(&self, pos: usize) -> i32 {
+        let offset = self.element_offset(pos, 4);
+        i32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap())
+    }
+
+    pub fn get_long(&self, pos: usize) -> i64 {
+        let offset = self.element_offset(pos, 8);
+        i64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap())
+    }
+
+    pub fn get_float(&self, pos: usize) -> f32 {
+        let offset = self.element_offset(pos, 4);
+        f32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap())
+    }
+
+    pub fn get_double(&self, pos: usize) -> f64 {
+        let offset = self.element_offset(pos, 8);
+        f64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap())
+    }
+
+    /// Reads the offset_and_size packed long for variable-length elements.
+    fn get_offset_and_size(&self, pos: usize) -> (usize, usize) {
+        let packed = self.get_long(pos) as u64;
+        let offset = (packed >> 32) as usize;
+        let size = (packed & 0xFFFF_FFFF) as usize;
+        (offset, size)
+    }
+
+    pub fn get_string(&self, pos: usize) -> Result<&str> {
+        let bytes = self.read_var_len_bytes(pos)?;
+        std::str::from_utf8(bytes).map_err(|e| IllegalArgument {
+            message: format!("Invalid UTF-8 in array element at position 
{pos}: {e}"),
+        })
+    }
+
+    pub fn get_binary(&self, pos: usize) -> Result<&[u8]> {
+        self.read_var_len_bytes(pos)
+    }
+
+    pub fn get_decimal(&self, pos: usize, precision: u32, scale: u32) -> 
Result<Decimal> {
+        if Decimal::is_compact_precision(precision) {
+            let unscaled = self.get_long(pos);
+            Decimal::from_unscaled_long(unscaled, precision, scale)
+        } else {
+            let (offset, size) = self.get_offset_and_size(pos);
+            let bytes = self.checked_slice(offset, size, "decimal bytes")?;
+            Decimal::from_unscaled_bytes(bytes, precision, scale)
+        }
+    }
+
+    pub fn get_date(&self, pos: usize) -> Date {
+        Date::new(self.get_int(pos))
+    }
+
+    pub fn get_time(&self, pos: usize) -> Time {
+        Time::new(self.get_int(pos))
+    }
+
+    pub fn get_timestamp_ntz(&self, pos: usize, precision: u32) -> 
Result<TimestampNtz> {
+        if TimestampNtz::is_compact(precision) {
+            Ok(TimestampNtz::new(self.get_long(pos)))
+        } else {
+            let (offset, nanos_of_millis) = self.get_offset_and_size(pos);
+            let millis_bytes = self.checked_slice(offset, 8, "timestamp ntz 
millis")?;
+            let millis = i64::from_ne_bytes(millis_bytes.try_into().unwrap());
+            TimestampNtz::from_millis_nanos(millis, nanos_of_millis as i32)
+        }
+    }
+
+    pub fn get_timestamp_ltz(&self, pos: usize, precision: u32) -> 
Result<TimestampLtz> {
+        if TimestampLtz::is_compact(precision) {
+            Ok(TimestampLtz::new(self.get_long(pos)))
+        } else {
+            let (offset, nanos_of_millis) = self.get_offset_and_size(pos);
+            let millis_bytes = self.checked_slice(offset, 8, "timestamp ltz 
millis")?;
+            let millis = i64::from_ne_bytes(millis_bytes.try_into().unwrap());
+            TimestampLtz::from_millis_nanos(millis, nanos_of_millis as i32)
+        }
+    }
+
+    pub fn get_array(&self, pos: usize) -> Result<FlussArray> {
+        let bytes = self.read_var_len_bytes(pos)?;
+        FlussArray::from_bytes(bytes)
+    }
+}
+
+/// Writer for building a `FlussArray` element by element.
+/// Matches Java's `BinaryArrayWriter`.
+pub struct FlussArrayWriter {
+    data: Vec<u8>,
+    null_bits_offset: usize,
+    element_offset: usize,
+    element_size: usize,
+    cursor: usize,
+    num_elements: usize,
+}
+
+impl FlussArrayWriter {
+    /// Creates a new writer for an array with `num_elements` elements of the 
given element type.
+    pub fn new(num_elements: usize, element_type: &DataType) -> Self {
+        let element_size = calculate_fix_length_part_size(element_type);
+        Self::with_element_size(num_elements, element_size)
+    }
+
+    /// Creates a new writer with an explicit element size (in bytes).
+    pub fn with_element_size(num_elements: usize, element_size: usize) -> Self 
{
+        let header_in_bytes = calculate_header_in_bytes(num_elements);
+        let fixed_size = round_to_nearest_word(header_in_bytes + element_size 
* num_elements);
+        let mut data = vec![0u8; fixed_size];
+
+        // Write element count at offset 0 (native endian, matches Java Unsafe 
behavior)
+        data[0..4].copy_from_slice(&(num_elements as i32).to_ne_bytes());
+
+        FlussArrayWriter {
+            data,
+            null_bits_offset: 4,
+            element_offset: header_in_bytes,
+            element_size,
+            cursor: fixed_size,
+            num_elements,
+        }
+    }
+
+    fn get_element_offset(&self, pos: usize) -> usize {
+        self.element_offset + self.element_size * pos
+    }
+
+    /// Sets the null bit for the element at position `pos`.
+    pub fn set_null_at(&mut self, pos: usize) {
+        let byte_index = pos >> 3;
+        let bit = pos & 7;
+        self.data[self.null_bits_offset + byte_index] |= 1u8 << bit;
+    }
+
+    pub fn write_boolean(&mut self, pos: usize, value: bool) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset] = if value { 1 } else { 0 };
+    }
+
+    pub fn write_byte(&mut self, pos: usize, value: i8) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset] = value as u8;
+    }
+
+    pub fn write_short(&mut self, pos: usize, value: i16) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset..offset + 2].copy_from_slice(&value.to_ne_bytes());
+    }
+
+    pub fn write_int(&mut self, pos: usize, value: i32) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset..offset + 4].copy_from_slice(&value.to_ne_bytes());
+    }
+
+    pub fn write_long(&mut self, pos: usize, value: i64) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset..offset + 8].copy_from_slice(&value.to_ne_bytes());
+    }
+
+    pub fn write_float(&mut self, pos: usize, value: f32) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset..offset + 4].copy_from_slice(&value.to_ne_bytes());
+    }
+
+    pub fn write_double(&mut self, pos: usize, value: f64) {
+        let offset = self.get_element_offset(pos);
+        self.data[offset..offset + 8].copy_from_slice(&value.to_ne_bytes());
+    }
+
+    /// Writes variable-length bytes to the variable part and stores 
offset+size in the fixed slot.
+    fn write_bytes_to_var_len_part(&mut self, pos: usize, bytes: &[u8]) {
+        let rounded = round_to_nearest_word(bytes.len());
+        let var_offset = self.cursor;
+        self.data.resize(self.data.len() + rounded, 0);
+        self.data[var_offset..var_offset + bytes.len()].copy_from_slice(bytes);
+        self.set_offset_and_size(pos, var_offset, bytes.len());
+        self.cursor += rounded;
+    }
+
+    fn set_offset_and_size(&mut self, pos: usize, offset: usize, size: usize) {
+        let packed = ((offset as i64) << 32) | (size as i64);
+        self.write_long(pos, packed);
+    }
+
+    fn write_bytes_to_fix_len_part(&mut self, pos: usize, bytes: &[u8]) {
+        let len = bytes.len();
+        debug_assert!(len <= MAX_FIX_PART_DATA_SIZE);
+        let first_byte = (len as u64) | 0x80;
+        let mut seven_bytes = 0_u64;
+        if cfg!(target_endian = "little") {
+            for (i, b) in bytes.iter().enumerate() {
+                seven_bytes |= ((*b as u64) & 0xFF) << (i * 8);
+            }
+        } else {
+            for (i, b) in bytes.iter().enumerate() {
+                seven_bytes |= ((*b as u64) & 0xFF) << ((6 - i) * 8);
+            }
+        }
+        let packed = ((first_byte << 56) | seven_bytes) as i64;
+        self.write_long(pos, packed);
+    }
+
+    pub fn write_string(&mut self, pos: usize, value: &str) {
+        let bytes = value.as_bytes();
+        if bytes.len() <= MAX_FIX_PART_DATA_SIZE {
+            self.write_bytes_to_fix_len_part(pos, bytes);
+        } else {
+            self.write_bytes_to_var_len_part(pos, bytes);
+        }
+    }
+
+    pub fn write_binary_bytes(&mut self, pos: usize, value: &[u8]) {
+        if value.len() <= MAX_FIX_PART_DATA_SIZE {
+            self.write_bytes_to_fix_len_part(pos, value);
+        } else {
+            self.write_bytes_to_var_len_part(pos, value);
+        }
+    }
+
+    pub fn write_decimal(&mut self, pos: usize, value: &Decimal, precision: 
u32) {
+        if Decimal::is_compact_precision(precision) {
+            self.write_long(
+                pos,
+                value
+                    .to_unscaled_long()
+                    .expect("Decimal should fit in i64 for compact precision"),
+            );
+        } else {
+            let bytes = value.to_unscaled_bytes();
+            self.write_bytes_to_var_len_part(pos, &bytes);
+        }
+    }
+
+    pub fn write_date(&mut self, pos: usize, value: Date) {
+        self.write_int(pos, value.get_inner());
+    }
+
+    pub fn write_time(&mut self, pos: usize, value: Time) {
+        self.write_int(pos, value.get_inner());
+    }
+
+    pub fn write_timestamp_ntz(&mut self, pos: usize, value: &TimestampNtz, 
precision: u32) {
+        if TimestampNtz::is_compact(precision) {
+            self.write_long(pos, value.get_millisecond());
+        } else {
+            let millis_bytes = value.get_millisecond().to_ne_bytes();
+            let var_offset = self.cursor;
+            let rounded = round_to_nearest_word(8);
+            self.data.resize(self.data.len() + rounded, 0);
+            self.data[var_offset..var_offset + 
8].copy_from_slice(&millis_bytes);
+            self.set_offset_and_size(pos, var_offset, 
value.get_nano_of_millisecond() as usize);
+            self.cursor += rounded;
+        }
+    }
+
+    pub fn write_timestamp_ltz(&mut self, pos: usize, value: &TimestampLtz, 
precision: u32) {
+        if TimestampLtz::is_compact(precision) {
+            self.write_long(pos, value.get_epoch_millisecond());
+        } else {
+            let millis_bytes = value.get_epoch_millisecond().to_ne_bytes();
+            let var_offset = self.cursor;
+            let rounded = round_to_nearest_word(8);
+            self.data.resize(self.data.len() + rounded, 0);
+            self.data[var_offset..var_offset + 
8].copy_from_slice(&millis_bytes);
+            self.set_offset_and_size(pos, var_offset, 
value.get_nano_of_millisecond() as usize);
+            self.cursor += rounded;
+        }
+    }
+
+    /// Writes a nested FlussArray into this array at position `pos`.
+    pub fn write_array(&mut self, pos: usize, value: &FlussArray) {
+        self.write_bytes_to_var_len_part(pos, value.as_bytes());
+    }
+
+    /// Finalizes the writer and returns the completed FlussArray.
+    pub fn complete(self) -> Result<FlussArray> {
+        let mut data = self.data;
+        data.truncate(self.cursor);
+        FlussArray::from_bytes(&data)
+    }
+

Review Comment:
   we already own data Vec, so from_bytes would unnecessary copy, 



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -1184,6 +1188,71 @@ pub fn to_arrow_type(fluss_type: &DataType) -> 
Result<ArrowDataType> {
     })
 }
 
+/// Converts an Arrow data type back to a Fluss `DataType`.
+/// Used for reading array elements from Arrow ListArray back into Fluss types.
+pub fn from_arrow_type(arrow_type: &ArrowDataType) -> Result<DataType> {

Review Comment:
   `+1` pub (crate), but I don't think we have similar function



##########
crates/fluss/src/record/arrow.rs:
##########
@@ -330,6 +330,10 @@ impl RowAppendRecordBatchBuilder {
             arrow_schema::DataType::Timestamp(arrow_schema::TimeUnit::Second, 
_) => {
                 Ok(Box::new(TimestampSecondBuilder::with_capacity(capacity)))
             }
+            arrow_schema::DataType::List(field) => {
+                let inner_builder = Self::create_builder(field.data_type(), 
capacity)?;
+                Ok(Box::new(ListBuilder::with_capacity(inner_builder, 
capacity)))

Review Comment:
   it's capacity + capacity, bc it's offset buffer and values buffer, it's 
fine, though we use lower bound of each list - 1 element, which is fine for now 
I guess



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: Add array data type support [fluss-rust]

Reply via email to