alamb commented on code in PR #12792:
URL: https://github.com/apache/datafusion/pull/12792#discussion_r1798261713
##########
datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs:
##########
@@ -91,3 +100,105 @@ pub fn filtered_null_mask(
let opt_filter = opt_filter.and_then(filter_to_nulls);
NullBuffer::union(opt_filter.as_ref(), input.nulls())
}
+
+/// Applies optional filter to input, returning a new array of the same type
+/// with the same data, but with any values that were filtered out set to null
+pub fn apply_filter_as_nulls(
+ input: &dyn Array,
+ opt_filter: Option<&BooleanArray>,
+) -> Result<ArrayRef> {
+ let nulls = filtered_null_mask(opt_filter, input);
+ set_nulls_dyn(input, nulls)
+}
+
+/// Replaces the nulls in the input array with the given `NullBuffer`
+///
+/// Can replace when upstreamed in arrow-rs:
<https://github.com/apache/arrow-rs/issues/6528>
+pub fn set_nulls_dyn(input: &dyn Array, nulls: Option<NullBuffer>) ->
Result<ArrayRef> {
+ if let Some(nulls) = nulls.as_ref() {
+ assert_eq!(nulls.len(), input.len());
+ }
+
+ let output: ArrayRef = match input.data_type() {
+ DataType::Utf8 => {
+ let input = input.as_string::<i32>();
+ // safety: values / offsets came from a valid string array, so are
valid utf8
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(StringArray::new_unchecked(
+ input.offsets().clone(),
+ input.values().clone(),
+ nulls,
+ ))
+ }
+ }
+ DataType::LargeUtf8 => {
+ let input = input.as_string::<i64>();
+ // safety: values / offsets came from a valid string array, so are
valid utf8
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(LargeStringArray::new_unchecked(
+ input.offsets().clone(),
+ input.values().clone(),
+ nulls,
+ ))
+ }
+ }
+ DataType::Utf8View => {
+ let input = input.as_string_view();
+ // safety: values / views came from a valid string view array, so
are valid utf8
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(StringViewArray::new_unchecked(
+ input.views().clone(),
+ input.data_buffers().to_vec(),
+ nulls,
+ ))
+ }
+ }
+
+ DataType::Binary => {
+ let input = input.as_binary::<i32>();
+ // safety: values / offsets came from a valid binary array
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(BinaryArray::new_unchecked(
+ input.offsets().clone(),
+ input.values().clone(),
+ nulls,
+ ))
+ }
+ }
+ DataType::LargeBinary => {
+ let input = input.as_binary::<i64>();
+ // safety: values / offsets came from a valid large binary array
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(LargeBinaryArray::new_unchecked(
+ input.offsets().clone(),
+ input.values().clone(),
+ nulls,
+ ))
+ }
+ }
+ DataType::BinaryView => {
+ let input = input.as_binary_view();
+ // safety: values / views came from a valid binary view array
+ // and we checked nulls has the same length as values
+ unsafe {
+ Arc::new(BinaryViewArray::new_unchecked(
+ input.views().clone(),
+ input.data_buffers().to_vec(),
+ nulls,
+ ))
+ }
+ }
+ _ => {
+ return not_impl_err!("Applying nulls {:?}", input.data_type());
Review Comment:
not necessairly -- I am hoping we put this code upstream in arrow-rs and can
remove it entirely from datafusion eventually
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]