alamb commented on a change in pull request #8853:
URL: https://github.com/apache/arrow/pull/8853#discussion_r541919322



##########
File path: rust/arrow/src/array/array_binary.rs
##########
@@ -373,6 +376,45 @@ impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray {
     }
 }
 
+impl From<Vec<Option<Vec<u8>>>> for FixedSizeBinaryArray {
+    fn from(data: Vec<Option<Vec<u8>>>) -> Self {
+        let len = data.len();
+        assert!(len > 0);
+        // try to estimate the size. This may not be possible no entry is 
valid => panic

Review comment:
       This behavior (panic'ing') doesn't seem ideal, though I realize there 
isn't much useful to do when converting a Vec of entirely `None` -- maybe we 
could just return a zero length array. 
   
   Could definitely be done as a follow on PR 

##########
File path: rust/arrow/src/array/array_binary.rs
##########
@@ -373,6 +376,45 @@ impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray {
     }
 }
 
+impl From<Vec<Option<Vec<u8>>>> for FixedSizeBinaryArray {
+    fn from(data: Vec<Option<Vec<u8>>>) -> Self {
+        let len = data.len();
+        assert!(len > 0);
+        // try to estimate the size. This may not be possible no entry is 
valid => panic
+        let size = data.iter().filter_map(|e| 
e.as_ref()).next().unwrap().len();
+        assert!(data

Review comment:
       Given this operation can fail (if all the elements are not the same 
length) perhaps we should implement `TryFrom` instead of `From` and `panic` -- 
again, this would be an excellent follow on PR (or maybe file a ticket and it 
could be an excellent first contribution for someone who wanted to contribute)

##########
File path: rust/arrow/src/compute/kernels/concat.rs
##########
@@ -20,154 +20,60 @@
 //! Example:
 //!
 //! ```
-//! use std::sync::Arc;
 //! use arrow::array::{ArrayRef, StringArray};
 //! use arrow::compute::concat;
 //!
-//! let arr = concat(&vec![
-//!     Arc::new(StringArray::from(vec!["hello", "world"])) as ArrayRef,
-//!     Arc::new(StringArray::from(vec!["!"])) as ArrayRef,
+//! let arr = concat(&[
+//!     &StringArray::from(vec!["hello", "world"]),
+//!     &StringArray::from(vec!["!"]),
 //! ]).unwrap();
 //! assert_eq!(arr.len(), 3);
 //! ```
 
+use std::sync::Arc;
+
 use crate::array::*;
-use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 
-use TimeUnit::*;
-
-/// Concatenate multiple `ArrayRef` with the same type.
-///
-/// Returns a new ArrayRef.
-pub fn concat(array_list: &[ArrayRef]) -> Result<ArrayRef> {
-    if array_list.is_empty() {
+/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
+pub fn concat(arrays: &[&Array]) -> Result<ArrayRef> {
+    if arrays.is_empty() {
         return Err(ArrowError::ComputeError(
             "concat requires input of at least one array".to_string(),
         ));
     }
-    let array_data_list = &array_list
+
+    if arrays
         .iter()
-        .map(|a| a.data_ref().clone())
-        .collect::<Vec<ArrayDataRef>>();
-
-    match array_data_list[0].data_type() {
-        DataType::Utf8 => {
-            let mut builder = StringBuilder::new(0);
-            builder.append_data(array_data_list)?;
-            Ok(ArrayBuilder::finish(&mut builder))
-        }
-        DataType::Boolean => {
-            let mut builder = BooleanArray::builder(0);
-            builder.append_data(array_data_list)?;
-            Ok(ArrayBuilder::finish(&mut builder))
-        }
-        DataType::Int8 => concat_primitive::<Int8Type>(array_data_list),
-        DataType::Int16 => concat_primitive::<Int16Type>(array_data_list),
-        DataType::Int32 => concat_primitive::<Int32Type>(array_data_list),
-        DataType::Int64 => concat_primitive::<Int64Type>(array_data_list),
-        DataType::UInt8 => concat_primitive::<UInt8Type>(array_data_list),
-        DataType::UInt16 => concat_primitive::<UInt16Type>(array_data_list),
-        DataType::UInt32 => concat_primitive::<UInt32Type>(array_data_list),
-        DataType::UInt64 => concat_primitive::<UInt64Type>(array_data_list),
-        DataType::Float32 => concat_primitive::<Float32Type>(array_data_list),
-        DataType::Float64 => concat_primitive::<Float64Type>(array_data_list),
-        DataType::Date32(_) => concat_primitive::<Date32Type>(array_data_list),
-        DataType::Date64(_) => concat_primitive::<Date64Type>(array_data_list),
-        DataType::Time32(Second) => 
concat_primitive::<Time32SecondType>(array_data_list),
-        DataType::Time32(Millisecond) => {
-            concat_primitive::<Time32MillisecondType>(array_data_list)
-        }
-        DataType::Time64(Microsecond) => {
-            concat_primitive::<Time64MicrosecondType>(array_data_list)
-        }
-        DataType::Time64(Nanosecond) => {
-            concat_primitive::<Time64NanosecondType>(array_data_list)
-        }
-        DataType::Timestamp(Second, _) => {
-            concat_primitive::<TimestampSecondType>(array_data_list)
-        }
-        DataType::Timestamp(Millisecond, _) => {
-            concat_primitive::<TimestampMillisecondType>(array_data_list)
-        }
-        DataType::Timestamp(Microsecond, _) => {
-            concat_primitive::<TimestampMicrosecondType>(array_data_list)
-        }
-        DataType::Timestamp(Nanosecond, _) => {
-            concat_primitive::<TimestampNanosecondType>(array_data_list)
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            concat_primitive::<IntervalYearMonthType>(array_data_list)
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            concat_primitive::<IntervalDayTimeType>(array_data_list)
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            concat_primitive::<DurationSecondType>(array_data_list)
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            concat_primitive::<DurationMillisecondType>(array_data_list)
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            concat_primitive::<DurationMicrosecondType>(array_data_list)
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            concat_primitive::<DurationNanosecondType>(array_data_list)
-        }
-        DataType::List(nested_field) => {
-            concat_list(array_data_list, nested_field.data_type())
-        }
-        t => Err(ArrowError::ComputeError(format!(
-            "Concat not supported for data type {:?}",
-            t
-        ))),
+        .any(|array| array.data_type() != arrays[0].data_type())
+    {
+        return Err(ArrowError::InvalidArgumentError(
+            "It is not possible to concatenate arrays of different data types."
+                .to_string(),
+        ));
     }
-}
 
-#[inline]
-fn concat_primitive<T>(array_data_list: &[ArrayDataRef]) -> Result<ArrayRef>
-where
-    T: ArrowNumericType,
-{
-    let mut builder = PrimitiveArray::<T>::builder(0);
-    builder.append_data(array_data_list)?;
-    Ok(ArrayBuilder::finish(&mut builder))
-}
+    let lengths = arrays.iter().map(|array| array.len()).collect::<Vec<_>>();
+    let capacity = lengths.iter().sum();
 
-#[inline]
-fn concat_primitive_list<T>(array_data_list: &[ArrayDataRef]) -> 
Result<ArrayRef>
-where
-    T: ArrowNumericType,
-{
-    let mut builder = ListBuilder::new(PrimitiveArray::<T>::builder(0));
-    builder.append_data(array_data_list)?;
-    Ok(ArrayBuilder::finish(&mut builder))
-}
+    let arrays = arrays
+        .iter()
+        .map(|a| a.data_ref().as_ref())
+        .collect::<Vec<_>>();
 
-#[inline]
-fn concat_list(
-    array_data_list: &[ArrayDataRef],
-    data_type: &DataType,
-) -> Result<ArrayRef> {
-    match data_type {
-        DataType::Int8 => concat_primitive_list::<Int8Type>(array_data_list),
-        DataType::Int16 => concat_primitive_list::<Int16Type>(array_data_list),
-        DataType::Int32 => concat_primitive_list::<Int32Type>(array_data_list),
-        DataType::Int64 => concat_primitive_list::<Int64Type>(array_data_list),
-        DataType::UInt8 => concat_primitive_list::<UInt8Type>(array_data_list),
-        DataType::UInt16 => 
concat_primitive_list::<UInt16Type>(array_data_list),
-        DataType::UInt32 => 
concat_primitive_list::<UInt32Type>(array_data_list),
-        DataType::UInt64 => 
concat_primitive_list::<UInt64Type>(array_data_list),
-        t => Err(ArrowError::ComputeError(format!(
-            "Concat not supported for list with data type {:?}",
-            t
-        ))),
+    let mut mutable = MutableArrayData::new(arrays, false, capacity);
+
+    for (i, len) in lengths.iter().enumerate() {
+        mutable.extend(i, 0, *len)

Review comment:
       well, that is certainly nicer




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to