houqp commented on a change in pull request #7365:
URL: https://github.com/apache/arrow/pull/7365#discussion_r436335986



##########
File path: rust/arrow/src/array/builder.rs
##########
@@ -841,12 +1048,91 @@ impl ArrayBuilder for StringBuilder {
     }
 }
 
+// Helper function for appending Binary and Utf8 data
+fn append_binary_data(
+    builder: &mut ListBuilder<UInt8Builder>,
+    data_type: &DataType,
+    data: &[ArrayDataRef],
+) -> Result<()> {
+    if !check_array_data_type(data_type, data) {
+        return Err(ArrowError::InvalidArgumentError(
+            "Cannot append data to builder if data types are 
different".to_string(),
+        ));
+    }
+    for array in data {
+        // convert string to List<u8> to reuse list's cast
+        let int_data = &array.buffers()[1];
+        let int_data = Arc::new(ArrayData::new(
+            DataType::UInt8,
+            int_data.len(),
+            None,
+            None,
+            0,
+            vec![int_data.clone()],
+            vec![],
+        )) as ArrayDataRef;
+        let list_data = Arc::new(ArrayData::new(
+            DataType::List(Box::new(DataType::UInt8)),
+            array.len(),
+            None,
+            array.null_buffer().map(|buf| buf.clone()),
+            array.offset(),
+            vec![(&array.buffers()[0]).clone()],
+            vec![int_data],
+        ));
+        builder.append_data(&[list_data])?;
+    }
+    Ok(())
+}
+
 impl ArrayBuilder for FixedSizeBinaryBuilder {
     /// Returns the builder as a non-mutable `Any` reference.
     fn as_any(&self) -> &Any {
         self
     }
 
+    /// Appends data from other arrays into the builder
+    ///
+    /// This is most useful when concatenating arrays of the same type into a 
builder.
+    fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> {
+        if !check_array_data_type(&self.data_type(), data) {
+            return Err(ArrowError::InvalidArgumentError(
+                "Cannot append data to builder if data types are 
different".to_string(),
+            ));
+        }
+        for array in data {
+            // convert string to FixedSizeList<u8> to reuse list's append
+            let int_data = &array.buffers()[0];
+            let int_data = Arc::new(ArrayData::new(
+                DataType::UInt8,
+                int_data.len(),
+                None,
+                None,
+                0,
+                vec![int_data.clone()],
+                vec![],
+            )) as ArrayDataRef;
+            let list_data = Arc::new(ArrayData::new(
+                DataType::FixedSizeList(Box::new(DataType::UInt8), 
self.builder.list_len),

Review comment:
       Ha, good point, I forgot byte_width is part of the data type signature 
:+1: 
   
   with regards to `value_length()` v.s. `list_len`, after thinking more about 
it, I think you are right to use list_len here since these are all internal 
module implementation.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to