jorgecarleitao commented on a change in pull request #8172:
URL: https://github.com/apache/arrow/pull/8172#discussion_r487307067
##########
File path: rust/datafusion/src/physical_plan/common.rs
##########
@@ -205,3 +213,91 @@ pub fn get_scalar_value(array: &ArrayRef, row: usize) ->
Result<Option<ScalarVal
};
Ok(value)
}
+
+/// Converts a scalar value into an array.
+/// This is useful for aggregations.
+pub fn to_array(value: &ScalarValue) -> Result<ArrayRef> {
+ match value {
+ ScalarValue::Boolean(e) => Ok(Arc::new(BooleanArray::from(vec![*e]))
as ArrayRef),
+ ScalarValue::Float64(e) => Ok(Arc::new(Float64Array::from(vec![*e]))
as ArrayRef),
+ ScalarValue::Float32(e) => Ok(Arc::new(Float32Array::from(vec![*e]))),
+ ScalarValue::Int8(e) => Ok(Arc::new(Int8Array::from(vec![*e]))),
+ ScalarValue::Int16(e) => Ok(Arc::new(Int16Array::from(vec![*e]))),
+ ScalarValue::Int32(e) => Ok(Arc::new(Int32Array::from(vec![*e]))),
+ ScalarValue::Int64(e) => Ok(Arc::new(Int64Array::from(vec![*e]))),
+ ScalarValue::UInt8(e) => Ok(Arc::new(UInt8Array::from(vec![*e]))),
+ ScalarValue::UInt16(e) => Ok(Arc::new(UInt16Array::from(vec![*e]))),
+ ScalarValue::UInt32(e) => Ok(Arc::new(UInt32Array::from(vec![*e]))),
+ ScalarValue::UInt64(e) => Ok(Arc::new(UInt64Array::from(vec![*e]))),
+ ScalarValue::Utf8(e) => {
+ // awful code...
+ let v = e.as_ref().unwrap_or(&"".to_string()).clone();
+ let v = e.as_ref().and_then(|_| Some(&*v));
+ Ok(Arc::new(StringArray::from(vec![v])))
+ }
+ ScalarValue::LargeUtf8(e) => {
+ // awful code...
+ let v = e.as_ref().unwrap_or(&"".to_string()).clone();
+ let v = e.as_ref().and_then(|_| Some(&*v));
+ Ok(Arc::new(LargeStringArray::from(vec![v])))
+ }
+ ScalarValue::Null => Err(ExecutionError::InternalError(format!(
+ "Cannot convert scalar {:?} to array",
+ value
+ ))),
+ ScalarValue::Struct(_) => Err(ExecutionError::InternalError(format!(
+ "Cannot convert scalar {:?} to array",
+ value
+ ))),
+ }
+}
+
+/// creates an empty record batch.
+pub fn create_batch_empty(schema: &Schema) -> Result<Vec<ArrayRef>> {
+ schema
+ .fields()
+ .iter()
+ .map(|f| match f.data_type() {
+ DataType::Float32 => {
+ Ok(Arc::new(Float32Array::from(vec![] as Vec<f32>)) as
ArrayRef)
+ }
+ DataType::Float64 => {
+ Ok(Arc::new(Float64Array::from(vec![] as Vec<f64>)) as
ArrayRef)
+ }
+ DataType::Int64 => {
+ Ok(Arc::new(Int64Array::from(vec![] as Vec<i64>)) as ArrayRef)
+ }
+ DataType::Int32 => {
+ Ok(Arc::new(Int32Array::from(vec![] as Vec<i32>)) as ArrayRef)
+ }
+ DataType::Int16 => {
+ Ok(Arc::new(Int16Array::from(vec![] as Vec<i16>)) as ArrayRef)
+ }
+ DataType::Int8 => {
+ Ok(Arc::new(Int8Array::from(vec![] as Vec<i8>)) as ArrayRef)
+ }
+ DataType::UInt64 => {
+ Ok(Arc::new(UInt64Array::from(vec![] as Vec<u64>)) as ArrayRef)
+ }
+ DataType::UInt32 => {
+ Ok(Arc::new(UInt32Array::from(vec![] as Vec<u32>)) as ArrayRef)
+ }
+ DataType::UInt16 => {
+ Ok(Arc::new(UInt16Array::from(vec![] as Vec<u16>)) as ArrayRef)
+ }
+ DataType::UInt8 => {
+ Ok(Arc::new(UInt8Array::from(vec![] as Vec<u8>)) as ArrayRef)
+ }
+ DataType::Utf8 => {
+ Ok(Arc::new(StringArray::from(vec![] as Vec<&str>)) as
ArrayRef)
+ }
+ DataType::Boolean => {
+ Ok(Arc::new(BooleanArray::from(vec![] as Vec<bool>)) as
ArrayRef)
+ }
+ _ => Err(ExecutionError::NotImplemented(format!(
Review comment:
So far this was enough because we do not have aggregations with other
types, but this is needed because some batches can have no entries, in which
case we need to build an empty record batch.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]