ashdnazg commented on code in PR #15924: URL: https://github.com/apache/datafusion/pull/15924#discussion_r2071709763
########## datafusion/common/src/scalar/mod.rs: ########## @@ -3415,6 +3415,100 @@ impl ScalarValue { .map(|sv| sv.size() - size_of_val(sv)) .sum::<usize>() } + + /// Compacts the allocation referenced by `self` to the minimum, copying the data if + /// necessary. + /// + /// This can be relevant when `self` is a list or contains a list as a nested value, as + /// a single list holds an Arc to its entire original array buffer. + pub fn compact(&mut self) { + match self { + ScalarValue::Null + | ScalarValue::Boolean(_) + | ScalarValue::Float16(_) + | ScalarValue::Float32(_) + | ScalarValue::Float64(_) + | ScalarValue::Decimal128(_, _, _) + | ScalarValue::Decimal256(_, _, _) + | ScalarValue::Int8(_) + | ScalarValue::Int16(_) + | ScalarValue::Int32(_) + | ScalarValue::Int64(_) + | ScalarValue::UInt8(_) + | ScalarValue::UInt16(_) + | ScalarValue::UInt32(_) + | ScalarValue::UInt64(_) + | ScalarValue::Date32(_) + | ScalarValue::Date64(_) + | ScalarValue::Time32Second(_) + | ScalarValue::Time32Millisecond(_) + | ScalarValue::Time64Microsecond(_) + | ScalarValue::Time64Nanosecond(_) + | ScalarValue::IntervalYearMonth(_) + | ScalarValue::IntervalDayTime(_) + | ScalarValue::IntervalMonthDayNano(_) + | ScalarValue::DurationSecond(_) + | ScalarValue::DurationMillisecond(_) + | ScalarValue::DurationMicrosecond(_) + | ScalarValue::DurationNanosecond(_) + | ScalarValue::Utf8(_) + | ScalarValue::LargeUtf8(_) + | ScalarValue::Utf8View(_) + | ScalarValue::TimestampSecond(_, _) + | ScalarValue::TimestampMillisecond(_, _) + | ScalarValue::TimestampMicrosecond(_, _) + | ScalarValue::TimestampNanosecond(_, _) + | ScalarValue::Binary(_) + | ScalarValue::FixedSizeBinary(_, _) + | ScalarValue::LargeBinary(_) + | ScalarValue::BinaryView(_) => (), + ScalarValue::FixedSizeList(arr) => { + let taken = arrow::compute::take( + arr.as_ref(), + &Int32Array::from_iter_values([0]), + None, + ) + .unwrap(); + *Arc::make_mut(arr) = cast::as_fixed_size_list_array(&taken).clone(); Review Comment: It's probably possible to make it more performant, but I don't want to invest into that unless we know it's necessary. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org