houqp commented on a change in pull request #7365:
URL: https://github.com/apache/arrow/pull/7365#discussion_r436303006
##########
File path: rust/arrow/src/array/builder.rs
##########
@@ -841,12 +1048,91 @@ impl ArrayBuilder for StringBuilder {
}
}
+// Helper function for appending Binary and Utf8 data
+fn append_binary_data(
+ builder: &mut ListBuilder<UInt8Builder>,
+ data_type: &DataType,
+ data: &[ArrayDataRef],
+) -> Result<()> {
+ if !check_array_data_type(data_type, data) {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot append data to builder if data types are
different".to_string(),
+ ));
+ }
+ for array in data {
+ // convert string to List<u8> to reuse list's cast
+ let int_data = &array.buffers()[1];
+ let int_data = Arc::new(ArrayData::new(
+ DataType::UInt8,
+ int_data.len(),
+ None,
+ None,
+ 0,
+ vec![int_data.clone()],
+ vec![],
+ )) as ArrayDataRef;
+ let list_data = Arc::new(ArrayData::new(
+ DataType::List(Box::new(DataType::UInt8)),
+ array.len(),
+ None,
+ array.null_buffer().map(|buf| buf.clone()),
+ array.offset(),
+ vec![(&array.buffers()[0]).clone()],
+ vec![int_data],
+ ));
+ builder.append_data(&[list_data])?;
+ }
+ Ok(())
+}
+
impl ArrayBuilder for FixedSizeBinaryBuilder {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &Any {
self
}
+ /// Appends data from other arrays into the builder
+ ///
+ /// This is most useful when concatenating arrays of the same type into a
builder.
+ fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> {
+ if !check_array_data_type(&self.data_type(), data) {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot append data to builder if data types are
different".to_string(),
+ ));
+ }
+ for array in data {
+ // convert string to FixedSizeList<u8> to reuse list's append
+ let int_data = &array.buffers()[0];
+ let int_data = Arc::new(ArrayData::new(
+ DataType::UInt8,
+ int_data.len(),
+ None,
+ None,
+ 0,
+ vec![int_data.clone()],
+ vec![],
+ )) as ArrayDataRef;
+ let list_data = Arc::new(ArrayData::new(
+ DataType::FixedSizeList(Box::new(DataType::UInt8),
self.builder.list_len),
Review comment:
do we need to validate list_len for each ArrayData as well? Also I
recommend calling value_length() method to get list_len value instead or remove
value_length entirely and use list_len directly everywhere.
##########
File path: rust/arrow/src/array/builder.rs
##########
@@ -1018,6 +1304,48 @@ impl ArrayBuilder for StructBuilder {
self.len
}
+ /// Appends data from other arrays into the builder
+ ///
+ /// This is most useful when concatenating arrays of the same type into a
builder.
+ fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> {
+ if !check_array_data_type(&self.data_type(), data) {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot append data to builder if data types are
different".to_string(),
+ ));
+ }
+ for array in data {
+ let len = array.len();
+ if len == 0 {
+ continue;
+ }
+ let offset = array.offset();
+ for (builder, child_data) in self
+ .field_builders
+ .iter_mut()
+ .zip(array.child_data().iter())
+ {
+ // slice child_data to account for offsets
+ let child_array = make_array(child_data.clone());
+ let sliced = child_array.slice(offset, len);
+ builder.append_data(&[sliced.data()])?;
+ }
+ // append array length
+ self.len += len;
+ for i in 0..len {
+ // account for offset as `ArrayData` does not
+ self.bitmap_builder.append(array.is_valid(offset + i))?;
Review comment:
looks like we missed the reserve call before the loop here.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]