mingmwang commented on code in PR #6003:
URL: https://github.com/apache/arrow-datafusion/pull/6003#discussion_r1169592336
##########
datafusion/core/src/physical_plan/aggregates/row_hash.rs:
##########
@@ -898,199 +873,9 @@ fn col_to_scalar(
return Ok(ScalarValue::Null);
}
}
- match array.data_type() {
- DataType::Null => Ok(ScalarValue::Null),
- DataType::Boolean => {
- typed_cast_to_scalar!(array, row_index, BooleanArray, Boolean)
- }
- DataType::Int8 => typed_cast_to_scalar!(array, row_index, Int8Array,
Int8),
- DataType::Int16 => typed_cast_to_scalar!(array, row_index, Int16Array,
Int16),
- DataType::Int32 => typed_cast_to_scalar!(array, row_index, Int32Array,
Int32),
- DataType::Int64 => typed_cast_to_scalar!(array, row_index, Int64Array,
Int64),
- DataType::UInt8 => typed_cast_to_scalar!(array, row_index, UInt8Array,
UInt8),
- DataType::UInt16 => {
- typed_cast_to_scalar!(array, row_index, UInt16Array, UInt16)
- }
- DataType::UInt32 => {
- typed_cast_to_scalar!(array, row_index, UInt32Array, UInt32)
- }
- DataType::UInt64 => {
- typed_cast_to_scalar!(array, row_index, UInt64Array, UInt64)
- }
- DataType::Float32 => {
- typed_cast_to_scalar!(array, row_index, Float32Array, Float32)
- }
- DataType::Float64 => {
- typed_cast_to_scalar!(array, row_index, Float64Array, Float64)
- }
- DataType::Decimal128(p, s) => {
- let array = as_decimal128_array(array)?;
- Ok(ScalarValue::Decimal128(
- Some(array.value(row_index)),
- *p,
- *s,
- ))
- }
- DataType::Binary => {
- typed_cast_to_scalar!(array, row_index, BinaryArray, Binary)
- }
- DataType::LargeBinary => {
- typed_cast_to_scalar!(array, row_index, LargeBinaryArray,
LargeBinary)
- }
- DataType::Utf8 => typed_cast_to_scalar!(array, row_index, StringArray,
Utf8),
- DataType::LargeUtf8 => {
- typed_cast_to_scalar!(array, row_index, LargeStringArray,
LargeUtf8)
- }
- DataType::List(nested_type) => {
- let list_array = as_list_array(array)?;
-
- let nested_array = list_array.value(row_index);
- let scalar_vec = (0..nested_array.len())
- .map(|i| ScalarValue::try_from_array(&nested_array, i))
- .collect::<Result<Vec<_>>>()?;
- let value = Some(scalar_vec);
- Ok(ScalarValue::new_list(
- value,
- nested_type.data_type().clone(),
- ))
- }
- DataType::Date32 => {
- typed_cast_to_scalar!(array, row_index, Date32Array, Date32)
- }
- DataType::Date64 => {
- typed_cast_to_scalar!(array, row_index, Date64Array, Date64)
- }
- DataType::Time32(TimeUnit::Second) => {
- typed_cast_to_scalar!(array, row_index, Time32SecondArray,
Time32Second)
- }
- DataType::Time32(TimeUnit::Millisecond) => typed_cast_to_scalar!(
- array,
- row_index,
- Time32MillisecondArray,
- Time32Millisecond
- ),
- DataType::Time64(TimeUnit::Microsecond) => typed_cast_to_scalar!(
- array,
- row_index,
- Time64MicrosecondArray,
- Time64Microsecond
- ),
- DataType::Time64(TimeUnit::Nanosecond) => typed_cast_to_scalar!(
- array,
- row_index,
- Time64NanosecondArray,
- Time64Nanosecond
- ),
- DataType::Timestamp(TimeUnit::Second, tz_opt) =>
typed_cast_tz_to_scalar!(
- array,
- row_index,
- TimestampSecondArray,
- TimestampSecond,
- tz_opt
- ),
- DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
- typed_cast_tz_to_scalar!(
- array,
- row_index,
- TimestampMillisecondArray,
- TimestampMillisecond,
- tz_opt
- )
- }
- DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
- typed_cast_tz_to_scalar!(
- array,
- row_index,
- TimestampMicrosecondArray,
- TimestampMicrosecond,
- tz_opt
- )
- }
- DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => {
- typed_cast_tz_to_scalar!(
- array,
- row_index,
- TimestampNanosecondArray,
- TimestampNanosecond,
- tz_opt
- )
- }
- DataType::Dictionary(key_type, _) => {
- let (values_array, values_index) = match key_type.as_ref() {
- DataType::Int8 => get_dict_value::<Int8Type>(array, row_index),
- DataType::Int16 => get_dict_value::<Int16Type>(array,
row_index),
- DataType::Int32 => get_dict_value::<Int32Type>(array,
row_index),
- DataType::Int64 => get_dict_value::<Int64Type>(array,
row_index),
- DataType::UInt8 => get_dict_value::<UInt8Type>(array,
row_index),
- DataType::UInt16 => get_dict_value::<UInt16Type>(array,
row_index),
- DataType::UInt32 => get_dict_value::<UInt32Type>(array,
row_index),
- DataType::UInt64 => get_dict_value::<UInt64Type>(array,
row_index),
- _ => unreachable!("Invalid dictionary keys type: {:?}",
key_type),
- };
- // look up the index in the values dictionary
- match values_index {
- Some(values_index) => {
- let value = ScalarValue::try_from_array(values_array,
values_index)?;
- Ok(ScalarValue::Dictionary(key_type.clone(),
Box::new(value)))
- }
- // else entry was null, so return null
- None => Ok(ScalarValue::Null),
- }
- }
- DataType::Struct(fields) => {
- let array = as_struct_array(array)?;
- let mut field_values: Vec<ScalarValue> = Vec::new();
- for col_index in 0..array.num_columns() {
- let col_array = array.column(col_index);
- let col_scalar = ScalarValue::try_from_array(col_array,
row_index)?;
- field_values.push(col_scalar);
- }
- Ok(ScalarValue::Struct(Some(field_values), fields.clone()))
- }
- DataType::FixedSizeList(nested_type, _len) => {
- let list_array = as_fixed_size_list_array(array)?;
- match list_array.is_null(row_index) {
- true => Ok(ScalarValue::Null),
- false => {
- let nested_array = list_array.value(row_index);
- let scalar_vec = (0..nested_array.len())
- .map(|i| ScalarValue::try_from_array(&nested_array, i))
- .collect::<Result<Vec<_>>>()?;
- Ok(ScalarValue::new_list(
- Some(scalar_vec),
- nested_type.data_type().clone(),
- ))
- }
- }
- }
- DataType::FixedSizeBinary(_) => {
- let array = as_fixed_size_binary_array(array)?;
- let size = match array.data_type() {
- DataType::FixedSizeBinary(size) => *size,
- _ => unreachable!(),
- };
- Ok(ScalarValue::FixedSizeBinary(
- size,
- Some(array.value(row_index).into()),
- ))
- }
- DataType::Interval(IntervalUnit::DayTime) => {
- typed_cast_to_scalar!(array, row_index, IntervalDayTimeArray,
IntervalDayTime)
- }
- DataType::Interval(IntervalUnit::YearMonth) => typed_cast_to_scalar!(
- array,
- row_index,
- IntervalYearMonthArray,
- IntervalYearMonth
- ),
- DataType::Interval(IntervalUnit::MonthDayNano) =>
typed_cast_to_scalar!(
- array,
- row_index,
- IntervalMonthDayNanoArray,
- IntervalMonthDayNano
- ),
- other => Err(DataFusionError::NotImplemented(format!(
- "GroupedHashAggregate: can't create a scalar from array of type
\"{other:?}\""
- ))),
+ let mut res = ScalarValue::try_from_array(array, row_index)?;
+ if res.is_null() {
Review Comment:
Yes, agree. Just remove it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]