This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 7dbe58a6e0 feat: support BinaryView in bit_length kernel (#9363)
7dbe58a6e0 is described below
commit 7dbe58a6e0e18985861db1dfa71507174e838cae
Author: Abhishek <[email protected]>
AuthorDate: Sat Feb 7 02:42:34 2026 +0530
feat: support BinaryView in bit_length kernel (#9363)
# Which issue does this PR close?
- Closes #9351.
# Rationale for this change
The `bit_length` kernel supports `Utf8View` but is missing support for
`BinaryView`. This adds parity between string and binary view types.
# What changes are included in this PR?
- Add `DataType::BinaryView` match arm in `bit_length()` function
- Update docstring to reflect supported types
- Add tests for `BinaryView` bit_length (with and without nulls)
# Are these changes tested?
Yes. Added two tests:
- `bit_length_binary_view` - tests basic functionality
- `bit_length_null_binary_view` - tests null handling
# Are there any user-facing changes?
Yes. `bit_length()` now accepts `BinaryViewArray` as input and returns
`Int32Array` containing bit lengths.
---
arrow-string/src/length.rs | 45 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 94a2d338c2..f7461d37c4 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -124,7 +124,8 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
/// Returns an array of Int32/Int64 denoting the number of bits in each value
in the array.
///
-/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, BinaryArray
and LargeBinaryArray,
+/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8,
StringViewArray/Utf8View,
+/// BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
/// or DictionaryArray with above Arrays as values
/// * bit_length of null is null.
/// * bit_length is in number of bits
@@ -167,6 +168,18 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
vec![*len * 8; array.len()].into(),
array.nulls().cloned(),
)?)),
+ DataType::BinaryView => {
+ let list = array.as_binary_view();
+ let values = list
+ .views()
+ .iter()
+ .map(|view| (*view as i32).wrapping_mul(8))
+ .collect();
+ Ok(Arc::new(Int32Array::try_new(
+ values,
+ array.nulls().cloned(),
+ )?))
+ }
other => Err(ArrowError::ComputeError(format!(
"bit_length not supported for {other:?}"
))),
@@ -586,6 +599,36 @@ mod tests {
length_binary_helper!(i64, Int64Array, bit_length, value, expected)
}
+ #[test]
+ fn bit_length_binary_view() {
+ let value: Vec<&[u8]> = vec![
+ b"zero",
+ &[0xff, 0xf8],
+ b"two",
+ b"this is a longer string to test binary array with",
+ ];
+ let expected: Vec<i32> = vec![32, 16, 24, 392];
+
+ let array = BinaryViewArray::from(value);
+ let result = bit_length(&array).unwrap();
+ let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
+ let expected: Int32Array = expected.into();
+ assert_eq!(&expected, result);
+ }
+
+ #[test]
+ fn bit_length_null_binary_view() {
+ let value: Vec<Option<&[u8]>> =
+ vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
+ let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40),
Some(16)];
+
+ let array = BinaryViewArray::from(value);
+ let result = bit_length(&array).unwrap();
+ let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
+ let expected: Int32Array = expected.into();
+ assert_eq!(&expected, result);
+ }
+
fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize,
Vec<Option<i32>>)> {
vec![(
vec![Some("one"), None, Some("three"), Some("four")],