charlesdong1991 commented on code in PR #433: URL: https://github.com/apache/fluss-rust/pull/433#discussion_r2901684229
########## crates/fluss/src/row/binary_array.rs: ########## @@ -0,0 +1,736 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Binary array format matching Java's `BinaryArray.java` layout. +//! +//! Binary layout: +//! ```text +//! [size(4B)] + [null bits (4-byte word aligned)] + [fixed-length part] + [variable-length part] +//! ``` +//! +//! Java reference: `BinaryArray.java`, `BinaryArrayWriter.java` + +use crate::error::Error::IllegalArgument; +use crate::error::Result; +use crate::metadata::DataType; +use crate::row::Decimal; +use crate::row::datum::{Date, Time, TimestampLtz, TimestampNtz}; +use serde::Serialize; +use std::fmt; +use std::hash::{Hash, Hasher}; + +const MAX_FIX_PART_DATA_SIZE: usize = 7; +const HIGHEST_FIRST_BIT: u64 = 0x80_u64 << 56; +const HIGHEST_SECOND_TO_EIGHTH_BIT: u64 = 0x7F_u64 << 56; + +/// Calculates the header size in bytes: 4 (for element count) + null bits (4-byte word aligned). +/// Matches Java's `BinaryArray.calculateHeaderInBytes(numFields)`. +pub fn calculate_header_in_bytes(num_elements: usize) -> usize { + 4 + num_elements.div_ceil(32) * 4 +} + +/// Calculates the fixed-length part size per element for a given data type. +/// Matches Java's `BinaryArray.calculateFixLengthPartSize(DataType)`. +pub fn calculate_fix_length_part_size(element_type: &DataType) -> usize { + match element_type { + DataType::Boolean(_) | DataType::TinyInt(_) => 1, + DataType::SmallInt(_) => 2, + DataType::Int(_) | DataType::Float(_) | DataType::Date(_) | DataType::Time(_) => 4, + DataType::BigInt(_) + | DataType::Double(_) + | DataType::Char(_) + | DataType::String(_) + | DataType::Binary(_) + | DataType::Bytes(_) + | DataType::Decimal(_) + | DataType::Timestamp(_) + | DataType::TimestampLTz(_) + | DataType::Array(_) + | DataType::Map(_) + | DataType::Row(_) => 8, + } +} + +/// Rounds a byte count up to the nearest 8-byte word boundary. +/// Matches Java's `roundNumberOfBytesToNearestWord`. +fn round_to_nearest_word(num_bytes: usize) -> usize { + (num_bytes + 7) & !7 +} + +/// A Fluss binary array, wire-compatible with Java's `BinaryArray`. +/// +/// Stores elements in a flat byte buffer with a header (element count + null bitmap) +/// followed by fixed-length slots and an optional variable-length section. +#[derive(Clone)] +pub struct FlussArray { + data: Vec<u8>, + size: usize, + element_offset: usize, +} + +impl fmt::Debug for FlussArray { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FlussArray") + .field("size", &self.size) + .field("data_len", &self.data.len()) + .finish() + } +} + +impl fmt::Display for FlussArray { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "FlussArray[size={}]", self.size) + } +} + +impl PartialEq for FlussArray { + fn eq(&self, other: &Self) -> bool { + self.data == other.data + } +} + +impl Eq for FlussArray {} + +impl PartialOrd for FlussArray { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for FlussArray { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.data.cmp(&other.data) + } +} + +impl Hash for FlussArray { + fn hash<H: Hasher>(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl Serialize for FlussArray { + fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + serializer.serialize_bytes(&self.data) + } +} + +impl FlussArray { + /// Creates a FlussArray by pointing to existing bytes. + pub fn from_bytes(data: &[u8]) -> Result<Self> { + if data.len() < 4 { + return Err(IllegalArgument { + message: format!( + "FlussArray data too short: need at least 4 bytes, got {}", + data.len() + ), + }); + } + let raw_size = i32::from_ne_bytes(data[0..4].try_into().unwrap()); + if raw_size < 0 { + return Err(IllegalArgument { + message: format!("FlussArray size must be non-negative, got {raw_size}"), + }); + } + let size = raw_size as usize; + let element_offset = calculate_header_in_bytes(size); + if element_offset > data.len() { + return Err(IllegalArgument { + message: format!( + "FlussArray header exceeds payload: header={}, payload={}", + element_offset, + data.len() + ), + }); + } + + Ok(FlussArray { + data: data.to_vec(), + size, + element_offset, + }) + } + + /// Returns the number of elements. + pub fn size(&self) -> usize { + self.size + } + + /// Returns the raw bytes of this array (the complete binary representation). + pub fn as_bytes(&self) -> &[u8] { + &self.data + } + + /// Returns true if the element at position `pos` is null. + pub fn is_null_at(&self, pos: usize) -> bool { + let byte_index = pos >> 3; + let bit = pos & 7; + (self.data[4 + byte_index] & (1u8 << bit)) != 0 + } + + fn element_offset(&self, ordinal: usize, element_size: usize) -> usize { + self.element_offset + ordinal * element_size + } + + fn checked_slice(&self, start: usize, len: usize, context: &str) -> Result<&[u8]> { + let end = start.checked_add(len).ok_or_else(|| IllegalArgument { + message: format!("Overflow while reading {context}: start={start}, len={len}"), + })?; + if end > self.data.len() { + return Err(IllegalArgument { + message: format!( + "Out-of-bounds while reading {context}: start={start}, len={len}, payload={}", + self.data.len() + ), + }); + } + Ok(&self.data[start..end]) + } + + fn read_var_len_bytes(&self, pos: usize) -> Result<&[u8]> { + let field_offset = self.element_offset(pos, 8); + let packed = self.get_long(pos) as u64; + let mark = packed & HIGHEST_FIRST_BIT; + + if mark == 0 { + let offset = (packed >> 32) as usize; + let len = (packed & 0xFFFF_FFFF) as usize; + self.checked_slice(offset, len, "variable-length array element") + } else { + let len = ((packed & HIGHEST_SECOND_TO_EIGHTH_BIT) >> 56) as usize; + if len > MAX_FIX_PART_DATA_SIZE { + return Err(IllegalArgument { + message: format!( + "Inline array element length must be <= {MAX_FIX_PART_DATA_SIZE}, got {len}" + ), + }); + } + // Java stores inline bytes in the 8-byte slot itself. + // On little-endian, bytes start at field_offset; on big-endian they start at +1. + let start = if cfg!(target_endian = "little") { + field_offset + } else { + field_offset + 1 + }; + self.checked_slice(start, len, "inline array element") + } + } + + pub fn get_boolean(&self, pos: usize) -> bool { + let offset = self.element_offset(pos, 1); + self.data[offset] != 0 + } + + pub fn get_byte(&self, pos: usize) -> i8 { + let offset = self.element_offset(pos, 1); + self.data[offset] as i8 + } + + pub fn get_short(&self, pos: usize) -> i16 { + let offset = self.element_offset(pos, 2); + i16::from_ne_bytes(self.data[offset..offset + 2].try_into().unwrap()) + } + + pub fn get_int(&self, pos: usize) -> i32 { + let offset = self.element_offset(pos, 4); + i32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap()) + } + + pub fn get_long(&self, pos: usize) -> i64 { + let offset = self.element_offset(pos, 8); + i64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap()) + } + + pub fn get_float(&self, pos: usize) -> f32 { + let offset = self.element_offset(pos, 4); + f32::from_ne_bytes(self.data[offset..offset + 4].try_into().unwrap()) + } + + pub fn get_double(&self, pos: usize) -> f64 { + let offset = self.element_offset(pos, 8); + f64::from_ne_bytes(self.data[offset..offset + 8].try_into().unwrap()) + } + + /// Reads the offset_and_size packed long for variable-length elements. + fn get_offset_and_size(&self, pos: usize) -> (usize, usize) { + let packed = self.get_long(pos) as u64; + let offset = (packed >> 32) as usize; + let size = (packed & 0xFFFF_FFFF) as usize; + (offset, size) + } + + pub fn get_string(&self, pos: usize) -> Result<&str> { + let bytes = self.read_var_len_bytes(pos)?; + std::str::from_utf8(bytes).map_err(|e| IllegalArgument { + message: format!("Invalid UTF-8 in array element at position {pos}: {e}"), + }) + } + + pub fn get_binary(&self, pos: usize) -> Result<&[u8]> { + self.read_var_len_bytes(pos) + } + + pub fn get_decimal(&self, pos: usize, precision: u32, scale: u32) -> Result<Decimal> { + if Decimal::is_compact_precision(precision) { + let unscaled = self.get_long(pos); + Decimal::from_unscaled_long(unscaled, precision, scale) + } else { + let (offset, size) = self.get_offset_and_size(pos); + let bytes = self.checked_slice(offset, size, "decimal bytes")?; + Decimal::from_unscaled_bytes(bytes, precision, scale) + } + } + + pub fn get_date(&self, pos: usize) -> Date { + Date::new(self.get_int(pos)) + } + + pub fn get_time(&self, pos: usize) -> Time { + Time::new(self.get_int(pos)) + } + + pub fn get_timestamp_ntz(&self, pos: usize, precision: u32) -> Result<TimestampNtz> { + if TimestampNtz::is_compact(precision) { + Ok(TimestampNtz::new(self.get_long(pos))) + } else { + let (offset, _size) = self.get_offset_and_size(pos); + let millis_bytes = self.checked_slice(offset, 8, "timestamp ntz millis")?; + let millis = i64::from_ne_bytes(millis_bytes.try_into().unwrap()); + let nanos = _size as i32; + TimestampNtz::from_millis_nanos(millis, nanos) + } + } + + pub fn get_timestamp_ltz(&self, pos: usize, precision: u32) -> Result<TimestampLtz> { + if TimestampLtz::is_compact(precision) { + Ok(TimestampLtz::new(self.get_long(pos))) + } else { + let (offset, _size) = self.get_offset_and_size(pos); + let millis_bytes = self.checked_slice(offset, 8, "timestamp ltz millis")?; + let millis = i64::from_ne_bytes(millis_bytes.try_into().unwrap()); + let nanos = _size as i32; + TimestampLtz::from_millis_nanos(millis, nanos) + } + } + + pub fn get_array(&self, pos: usize) -> Result<FlussArray> { + let bytes = self.read_var_len_bytes(pos)?; + FlussArray::from_bytes(bytes) + } +} + +/// Writer for building a `FlussArray` element by element. +/// Matches Java's `BinaryArrayWriter`. +pub struct FlussArrayWriter { + data: Vec<u8>, + null_bits_offset: usize, + element_offset: usize, + element_size: usize, + cursor: usize, + num_elements: usize, +} + +impl FlussArrayWriter { + /// Creates a new writer for an array with `num_elements` elements of the given element type. + pub fn new(num_elements: usize, element_type: &DataType) -> Self { + let element_size = calculate_fix_length_part_size(element_type); + Self::with_element_size(num_elements, element_size) + } + + /// Creates a new writer with an explicit element size (in bytes). + pub fn with_element_size(num_elements: usize, element_size: usize) -> Self { + let header_in_bytes = calculate_header_in_bytes(num_elements); + let fixed_size = round_to_nearest_word(header_in_bytes + element_size * num_elements); + let mut data = vec![0u8; fixed_size]; + + // Write element count at offset 0 (native endian, matches Java Unsafe behavior) + data[0..4].copy_from_slice(&(num_elements as i32).to_ne_bytes()); + + FlussArrayWriter { + data, + null_bits_offset: 4, + element_offset: header_in_bytes, + element_size, + cursor: fixed_size, + num_elements, + } + } + + fn get_element_offset(&self, pos: usize) -> usize { + self.element_offset + self.element_size * pos + } + + /// Sets the null bit for the element at position `pos`. + pub fn set_null_at(&mut self, pos: usize) { + let byte_index = pos >> 3; + let bit = pos & 7; + self.data[self.null_bits_offset + byte_index] |= 1u8 << bit; + } + + pub fn write_boolean(&mut self, pos: usize, value: bool) { + let offset = self.get_element_offset(pos); + self.data[offset] = if value { 1 } else { 0 }; + } + + pub fn write_byte(&mut self, pos: usize, value: i8) { + let offset = self.get_element_offset(pos); + self.data[offset] = value as u8; + } + + pub fn write_short(&mut self, pos: usize, value: i16) { + let offset = self.get_element_offset(pos); + self.data[offset..offset + 2].copy_from_slice(&value.to_ne_bytes()); + } + + pub fn write_int(&mut self, pos: usize, value: i32) { + let offset = self.get_element_offset(pos); + self.data[offset..offset + 4].copy_from_slice(&value.to_ne_bytes()); + } + + pub fn write_long(&mut self, pos: usize, value: i64) { + let offset = self.get_element_offset(pos); + self.data[offset..offset + 8].copy_from_slice(&value.to_ne_bytes()); + } + + pub fn write_float(&mut self, pos: usize, value: f32) { + let offset = self.get_element_offset(pos); + self.data[offset..offset + 4].copy_from_slice(&value.to_ne_bytes()); + } + + pub fn write_double(&mut self, pos: usize, value: f64) { + let offset = self.get_element_offset(pos); + self.data[offset..offset + 8].copy_from_slice(&value.to_ne_bytes()); + } + + /// Writes variable-length bytes to the variable part and stores offset+size in the fixed slot. + fn write_bytes_to_var_len_part(&mut self, pos: usize, bytes: &[u8]) { + let rounded = round_to_nearest_word(bytes.len()); + let var_offset = self.cursor; + self.data.resize(self.data.len() + rounded, 0); + self.data[var_offset..var_offset + bytes.len()].copy_from_slice(bytes); + self.set_offset_and_size(pos, var_offset, bytes.len()); + self.cursor += rounded; + } + + fn set_offset_and_size(&mut self, pos: usize, offset: usize, size: usize) { + let packed = ((offset as i64) << 32) | (size as i64); + self.write_long(pos, packed); Review Comment: not applicable ########## crates/fluss/src/row/datum.rs: ########## @@ -504,6 +602,16 @@ impl Datum<'_> { append_null_to_arrow!(TimestampMillisecondBuilder); append_null_to_arrow!(TimestampMicrosecondBuilder); append_null_to_arrow!(TimestampNanosecondBuilder); + // For List (Array) type, append null generically + if let arrow_schema::DataType::List(_) = data_type { + if let Some(b) = builder + .as_any_mut() + .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>() + { + b.append_null(); + } Review Comment: updated -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
