nevi-me commented on a change in pull request #7365:
URL: https://github.com/apache/arrow/pull/7365#discussion_r436308395
##########
File path: rust/arrow/src/array/builder.rs
##########
@@ -841,12 +1048,91 @@ impl ArrayBuilder for StringBuilder {
}
}
+// Helper function for appending Binary and Utf8 data
+fn append_binary_data(
+ builder: &mut ListBuilder<UInt8Builder>,
+ data_type: &DataType,
+ data: &[ArrayDataRef],
+) -> Result<()> {
+ if !check_array_data_type(data_type, data) {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot append data to builder if data types are
different".to_string(),
+ ));
+ }
+ for array in data {
+ // convert string to List<u8> to reuse list's cast
+ let int_data = &array.buffers()[1];
+ let int_data = Arc::new(ArrayData::new(
+ DataType::UInt8,
+ int_data.len(),
+ None,
+ None,
+ 0,
+ vec![int_data.clone()],
+ vec![],
+ )) as ArrayDataRef;
+ let list_data = Arc::new(ArrayData::new(
+ DataType::List(Box::new(DataType::UInt8)),
+ array.len(),
+ None,
+ array.null_buffer().map(|buf| buf.clone()),
+ array.offset(),
+ vec![(&array.buffers()[0]).clone()],
+ vec![int_data],
+ ));
+ builder.append_data(&[list_data])?;
+ }
+ Ok(())
+}
+
impl ArrayBuilder for FixedSizeBinaryBuilder {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &Any {
self
}
+ /// Appends data from other arrays into the builder
+ ///
+ /// This is most useful when concatenating arrays of the same type into a
builder.
+ fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> {
+ if !check_array_data_type(&self.data_type(), data) {
+ return Err(ArrowError::InvalidArgumentError(
+ "Cannot append data to builder if data types are
different".to_string(),
+ ));
+ }
+ for array in data {
+ // convert string to FixedSizeList<u8> to reuse list's append
+ let int_data = &array.buffers()[0];
+ let int_data = Arc::new(ArrayData::new(
+ DataType::UInt8,
+ int_data.len(),
+ None,
+ None,
+ 0,
+ vec![int_data.clone()],
+ vec![],
+ )) as ArrayDataRef;
+ let list_data = Arc::new(ArrayData::new(
+ DataType::FixedSizeList(Box::new(DataType::UInt8),
self.builder.list_len),
Review comment:
I don't understand you entirely here, mind clarifying?
Here I'm reconstructing the data as a FSList<u8, len> from FSBinary<len>,
otherwise I'd get an error about data types not being the same.
I used list_len directly because i32 is a Copy type. I can change to
builder.value_length()
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]