This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new fc98d5c282 feat: Implement Spark `bitmap_bucket_number` function 
(#20288)
fc98d5c282 is described below

commit fc98d5c2826949060ce8ecaddcc0f2113e603c74
Author: Kazantsev Maksim <[email protected]>
AuthorDate: Sat Feb 21 06:08:44 2026 +0400

    feat: Implement Spark `bitmap_bucket_number` function (#20288)
    
    ## Which issue does this PR close?
    
    N/A
    
    ## Rationale for this change
    
    Add new function:
    https://spark.apache.org/docs/latest/api/sql/index.html#bitmap_bucket_number
    
    ## What changes are included in this PR?
    
    - Implementation
    - Unit Tests
    - SLT tests
    
    ## Are these changes tested?
    
    Yes, tests added as part of this PR.
    
    ## Are there any user-facing changes?
    
    No, these are new function.
    
    ---------
    
    Co-authored-by: Kazantsev Maksim <[email protected]>
---
 .../src/function/bitmap/bitmap_bucket_number.rs    | 141 +++++++++++++++++++++
 datafusion/spark/src/function/bitmap/mod.rs        |  16 ++-
 .../spark/bitmap/bitmap_bucket_number.slt          | 122 ++++++++++++++++++
 3 files changed, 278 insertions(+), 1 deletion(-)

diff --git a/datafusion/spark/src/function/bitmap/bitmap_bucket_number.rs 
b/datafusion/spark/src/function/bitmap/bitmap_bucket_number.rs
new file mode 100644
index 0000000000..fe72a4fe8a
--- /dev/null
+++ b/datafusion/spark/src/function/bitmap/bitmap_bucket_number.rs
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, AsArray, Int64Array};
+use arrow::datatypes::Field;
+use arrow::datatypes::{DataType, FieldRef, Int8Type, Int16Type, Int32Type, 
Int64Type};
+use datafusion::logical_expr::{ColumnarValue, Signature, TypeSignature, 
Volatility};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, internal_err};
+use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl};
+use datafusion_functions::utils::make_scalar_function;
+use std::any::Any;
+use std::sync::Arc;
+
+/// Spark-compatible `bitmap_bucket_number` expression
+/// 
<https://spark.apache.org/docs/latest/api/sql/index.html#bitmap_bucket_number>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct BitmapBucketNumber {
+    signature: Signature,
+}
+
+impl Default for BitmapBucketNumber {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl BitmapBucketNumber {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Exact(vec![DataType::Int8]),
+                    TypeSignature::Exact(vec![DataType::Int16]),
+                    TypeSignature::Exact(vec![DataType::Int32]),
+                    TypeSignature::Exact(vec![DataType::Int64]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for BitmapBucketNumber {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "bitmap_bucket_number"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(
+        &self,
+        args: datafusion_expr::ReturnFieldArgs,
+    ) -> Result<FieldRef> {
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::Int64,
+            args.arg_fields[0].is_nullable(),
+        )))
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        make_scalar_function(bitmap_bucket_number_inner, vec![])(&args.args)
+    }
+}
+
+pub fn bitmap_bucket_number_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    let [array] = take_function_args("bitmap_bucket_number", arg)?;
+    match &array.data_type() {
+        DataType::Int8 => {
+            let result: Int64Array = array
+                .as_primitive::<Int8Type>()
+                .iter()
+                .map(|opt| opt.map(|value| bitmap_bucket_number(value.into())))
+                .collect();
+            Ok(Arc::new(result))
+        }
+        DataType::Int16 => {
+            let result: Int64Array = array
+                .as_primitive::<Int16Type>()
+                .iter()
+                .map(|opt| opt.map(|value| bitmap_bucket_number(value.into())))
+                .collect();
+            Ok(Arc::new(result))
+        }
+        DataType::Int32 => {
+            let result: Int64Array = array
+                .as_primitive::<Int32Type>()
+                .iter()
+                .map(|opt| opt.map(|value| bitmap_bucket_number(value.into())))
+                .collect();
+            Ok(Arc::new(result))
+        }
+        DataType::Int64 => {
+            let result: Int64Array = array
+                .as_primitive::<Int64Type>()
+                .iter()
+                .map(|opt| opt.map(bitmap_bucket_number))
+                .collect();
+            Ok(Arc::new(result))
+        }
+        data_type => {
+            internal_err!("bitmap_bucket_number does not support {data_type}")
+        }
+    }
+}
+
+const NUM_BYTES: i64 = 4 * 1024;
+const NUM_BITS: i64 = NUM_BYTES * 8;
+
+fn bitmap_bucket_number(value: i64) -> i64 {
+    if value > 0 {
+        1 + (value - 1) / NUM_BITS
+    } else {
+        value / NUM_BITS
+    }
+}
diff --git a/datafusion/spark/src/function/bitmap/mod.rs 
b/datafusion/spark/src/function/bitmap/mod.rs
index 1a7dce02db..4992992aea 100644
--- a/datafusion/spark/src/function/bitmap/mod.rs
+++ b/datafusion/spark/src/function/bitmap/mod.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 pub mod bitmap_bit_position;
+pub mod bitmap_bucket_number;
 pub mod bitmap_count;
 
 use datafusion_expr::ScalarUDF;
@@ -24,6 +25,10 @@ use std::sync::Arc;
 
 make_udf_function!(bitmap_count::BitmapCount, bitmap_count);
 make_udf_function!(bitmap_bit_position::BitmapBitPosition, 
bitmap_bit_position);
+make_udf_function!(
+    bitmap_bucket_number::BitmapBucketNumber,
+    bitmap_bucket_number
+);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
@@ -38,8 +43,17 @@ pub mod expr_fn {
         "Returns the bit position for the given input child expression.",
         arg
     ));
+    export_functions!((
+        bitmap_bucket_number,
+        "Returns the bucket number for the given input child expression.",
+        arg
+    ));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![bitmap_count(), bitmap_bit_position()]
+    vec![
+        bitmap_count(),
+        bitmap_bit_position(),
+        bitmap_bucket_number(),
+    ]
 }
diff --git 
a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_bucket_number.slt 
b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_bucket_number.slt
new file mode 100644
index 0000000000..2a6e190b31
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_bucket_number.slt
@@ -0,0 +1,122 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(1, 'Int8'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(127, 'Int8'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-1, 'Int8'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-64, 'Int8'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-65, 'Int8'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(1, 'Int16'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(257, 'Int16'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(32767, 'Int16'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-1, 'Int16'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-256, 'Int16'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(1, 'Int32'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(65537, 'Int32'));
+----
+3
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(2147483647, 'Int32'));
+----
+65536
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-1, 'Int32'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-65536, 'Int32'));
+----
+-2
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(1, 'Int64'));
+----
+1
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(4294967297, 'Int64'));
+----
+131073
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(9223372036854775807, 'Int64'));
+----
+281474976710656
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-1, 'Int64'));
+----
+0
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-4294967296, 'Int64'));
+----
+-131072
+
+query I
+SELECT bitmap_bucket_number(arrow_cast(-9223372036854775808, 'Int64'));
+----
+-281474976710656


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to