Jefffrey commented on code in PR #22244:
URL: https://github.com/apache/datafusion/pull/22244#discussion_r3297390646
##########
datafusion/functions/src/string/concat.rs:
##########
@@ -126,7 +137,16 @@ impl ScalarUDFImpl for ConcatFunc {
};
if let ScalarValue::Binary(Some(value)) = scalar {
values.push(value);
+ } else if let ScalarValue::LargeBinary(Some(value)) = scalar {
+ values.push(value);
+ } else if let ScalarValue::BinaryView(Some(value)) = scalar {
+ values.push(value);
+ } else if let ScalarValue::FixedSizeBinary(_, Some(value)) =
scalar {
Review Comment:
Can we have fsb at this point? Shouldn't coercion promote it to largebinary?
##########
datafusion/functions/src/string/concat_ws.rs:
##########
@@ -934,4 +915,116 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn concat_ws_binary_scalars() -> Result<()> {
+ let c0 =
ColumnarValue::Scalar(ScalarValue::Binary(Some(b"|".to_vec())));
+ let c1 =
ColumnarValue::Scalar(ScalarValue::Binary(Some(b"aa".to_vec())));
+ let c2 = ColumnarValue::Scalar(ScalarValue::Binary(None));
+ let c3 =
ColumnarValue::Scalar(ScalarValue::Binary(Some(b"cc".to_vec())));
+
+ let arg_fields = vec![
+ Field::new("a", Binary, true).into(),
+ Field::new("a", Binary, true).into(),
+ Field::new("a", Binary, true).into(),
+ Field::new("a", Binary, true).into(),
+ ];
+ let args = ScalarFunctionArgs {
+ args: vec![c0, c1, c2, c3],
+ arg_fields,
+ number_rows: 1,
+ return_field: Field::new("f", Binary, true).into(),
+ config_options: Arc::new(ConfigOptions::default()),
+ };
+ let result = ConcatWsFunc::new().invoke_with_args(args)?;
+ match result {
+ ColumnarValue::Scalar(ScalarValue::Binary(Some(v))) => {
+ assert_eq!(v, b"aa|cc");
+ }
+ other => panic!("Expected Binary scalar, got {other:?}"),
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn concat_ws_binary_arrays() -> Result<()> {
+ let c0 =
ColumnarValue::Scalar(ScalarValue::Binary(Some(b",".to_vec())));
+ let c1 = ColumnarValue::Array(Arc::new(BinaryArray::from_vec(vec![
+ b"foo".as_ref(),
+ b"bar",
+ b"baz",
+ ])));
+ let c2 =
ColumnarValue::Array(Arc::new(LargeBinaryArray::from_opt_vec(vec![
+ Some(b"x".as_ref()),
+ None,
+ Some(b"z"),
+ ])));
+
+ let arg_fields = vec![
+ Field::new("a", Binary, true).into(),
+ Field::new("a", Binary, true).into(),
+ Field::new("a", LargeBinary, true).into(),
+ ];
+ let args = ScalarFunctionArgs {
+ args: vec![c0, c1, c2],
+ arg_fields,
+ number_rows: 3,
+ return_field: Field::new("f", LargeBinary, true).into(),
+ config_options: Arc::new(ConfigOptions::default()),
+ };
+
+ let result = ConcatWsFunc::new().invoke_with_args(args)?;
+ let expected = Arc::new(LargeBinaryArray::from_opt_vec(vec![
+ Some(b"foo,x".as_ref()),
+ Some(b"bar"),
+ Some(b"baz,z"),
+ ])) as ArrayRef;
+ match &result {
+ ColumnarValue::Array(array) => assert_eq!(&expected, array),
+ _ => panic!("Expected array result"),
+ }
+
+ Ok(())
+ }
+ //
+ // #[test]
Review Comment:
Commented tests?
##########
datafusion/functions/src/strings.rs:
##########
@@ -1190,15 +1081,182 @@ impl ColumnarValueRef<'_> {
| Self::NonNullableArray(_)
| Self::NonNullableStringViewArray(_)
| Self::NonNullableLargeStringArray(_)
- | Self::NonNullableBinaryArray(_) => None,
+ | Self::NonNullableBinaryArray(_)
+ | Self::NonNullableLargeBinaryArray(_)
+ | Self::NonNullableBinaryViewArray(_)
+ | Self::NonNullableFixedSizeBinaryArray(_) => None,
Self::NullableArray(array) => array.nulls().cloned(),
Self::NullableStringViewArray(array) => array.nulls().cloned(),
Self::NullableLargeStringArray(array) => array.nulls().cloned(),
Self::NullableBinaryArray(array) => array.nulls().cloned(),
+ Self::NullableLargeBinaryArray(array) => array.nulls().cloned(),
+ Self::NullableBinaryViewArray(array) => array.nulls().cloned(),
+ Self::NullableFixedSizeBinaryArray(array) =>
array.nulls().cloned(),
+ }
+ }
+
+ /// Parse a [`ColumnarValue`] argument into `ColumnarValueRef`.
+ /// Returns `None` when the argument is null or null scalar
+ /// Returns an error when a columnar value type is not supported.
+ /// Shared by `concat` and `concat_ws`.
+ pub(crate) fn from_columnar_value<'a>(
+ col: &'a ColumnarValue,
+ data_size: &mut usize,
+ len: usize,
+ size_factor: usize,
+ convert_to_str: bool,
+ ) -> Result<Option<ColumnarValueRef<'a>>> {
+ match col {
+ ColumnarValue::Scalar(ScalarValue::Utf8(maybe_value))
+ | ColumnarValue::Scalar(ScalarValue::LargeUtf8(maybe_value))
+ | ColumnarValue::Scalar(ScalarValue::Utf8View(maybe_value)) => {
+ if let Some(s) = maybe_value {
+ *data_size += s.len() * len * size_factor;
+ Ok(Some(ColumnarValueRef::Scalar(s.as_bytes())))
+ } else {
+ Ok(None)
+ }
+ }
+ ColumnarValue::Scalar(ScalarValue::Binary(maybe_value))
+ | ColumnarValue::Scalar(ScalarValue::LargeBinary(maybe_value))
+ | ColumnarValue::Scalar(ScalarValue::BinaryView(maybe_value))
+ | ColumnarValue::Scalar(ScalarValue::FixedSizeBinary(_,
maybe_value)) => {
+ if let Some(b) = maybe_value {
+ *data_size += b.len() * len * size_factor;
+ Ok(Some(ColumnarValueRef::Scalar(b.as_slice())))
+ } else {
+ Ok(None)
+ }
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => {
+ // null scalar is skipped
+ Ok(None)
+ }
+ ColumnarValue::Scalar(scalar) if convert_to_str => {
+ match scalar.try_as_str() {
+ Some(Some(s)) => {
+ *data_size += s.len() * len * size_factor;
+ Ok(Some(ColumnarValueRef::Scalar(s.as_bytes())))
+ }
+ Some(None) => unreachable!("null handled above"),
+ None => {
+ internal_err!("Expected string or binary, got
{scalar:?}")
+ }
+ }
+ }
+ ColumnarValue::Array(array) => match array.data_type() {
+ DataType::Utf8 => {
+ let string_array = as_string_array(array)?;
+ *data_size += string_array.values().len() * size_factor;
+ let column = if array.is_nullable() {
+ ColumnarValueRef::NullableArray(string_array)
+ } else {
+ ColumnarValueRef::NonNullableArray(string_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::LargeUtf8 => {
+ let string_array = as_largestring_array(array);
+ *data_size += string_array.values().len() * size_factor;
+ let column = if array.is_nullable() {
+
ColumnarValueRef::NullableLargeStringArray(string_array)
+ } else {
+
ColumnarValueRef::NonNullableLargeStringArray(string_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::Utf8View => {
+ let string_array = as_string_view_array(array)?;
+ *data_size += string_array.total_buffer_bytes_used() *
size_factor;
+ let column = if array.is_nullable() {
+ ColumnarValueRef::NullableStringViewArray(string_array)
+ } else {
+
ColumnarValueRef::NonNullableStringViewArray(string_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::Binary => {
+ let binary_array = as_binary_array(array)?;
+ *data_size += binary_array.values().len() * size_factor;
+ let column = if array.is_nullable() {
+ ColumnarValueRef::NullableBinaryArray(binary_array)
+ } else {
+ ColumnarValueRef::NonNullableBinaryArray(binary_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::LargeBinary => {
+ let binary_array = as_large_binary_array(array)?;
+ *data_size += binary_array.values().len() * size_factor;
+ let column = if array.is_nullable() {
+
ColumnarValueRef::NullableLargeBinaryArray(binary_array)
+ } else {
+
ColumnarValueRef::NonNullableLargeBinaryArray(binary_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::BinaryView => {
+ let binary_array = as_binary_view_array(array)?;
+ *data_size += binary_array.total_buffer_bytes_used() *
size_factor;
+ let column = if array.is_nullable() {
+ ColumnarValueRef::NullableBinaryViewArray(binary_array)
+ } else {
+
ColumnarValueRef::NonNullableBinaryViewArray(binary_array)
+ };
+ Ok(Some(column))
+ }
+ DataType::FixedSizeBinary(_) => {
Review Comment:
Do we need this code for fsb if we coerce it to largebinary in signatures?
##########
datafusion/functions/src/string/concat.rs:
##########
@@ -307,7 +274,67 @@ impl ScalarUDFImpl for ConcatFunc {
}
}
+pub(crate) fn deduce_return_type(arg_types: &[DataType]) -> DataType {
+ use DataType::*;
+ if arg_types.contains(&BinaryView) {
+ BinaryView
+ } else if arg_types.contains(&LargeBinary) {
+ LargeBinary
+ } else if arg_types.contains(&Binary) {
+ Binary
+ } else if arg_types.iter().any(|dt| matches!(dt, FixedSizeBinary(_))) {
Review Comment:
I think this arm for fsb would need to be above binary? Also what is this
based on that fsb is promoted to largebinary?
##########
datafusion/functions/src/string/concat.rs:
##########
@@ -112,6 +113,16 @@ impl ScalarUDFImpl for ConcatFunc {
let arg_types: Vec<DataType> = args.iter().map(|c|
c.data_type()).collect();
let return_datatype = deduce_return_type(&arg_types);
+ let with_binary = arg_types.iter().any(|dt| dt.is_binary());
+ let with_string = arg_types.iter().any(|dt| dt.is_string());
+
+ if with_binary && with_string {
+ return plan_err!(
Review Comment:
We can grab the return type from `ScalarFunctionArgs` and not need to do
this check since signature should already guard for us (`coerce_types` would
already have been called)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]