This is an automated email from the ASF dual-hosted git repository.

zhouyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new dd16714827 [VL][Delta] Add roaring bitmap facilities for Delta DV 
support (#12001)
dd16714827 is described below

commit dd167148274a41984860ae36e3a1e3b37cf8ddef
Author: Mohammad Linjawi <[email protected]>
AuthorDate: Tue May 5 17:29:23 2026 +0300

    [VL][Delta] Add roaring bitmap facilities for Delta DV support (#12001)
    
    This PR is the first step in the split Delta deletion-vector (DV) stack.
    
    It introduces the native roaring bitmap facilities that later PRs use for 
Delta DV payload handling, without adding Delta scan integration yet.
    
    Main changes:
    
    add FindRoaring.cmake to discover CRoaring from an existing Velox build, 
pkg-config, or FetchContent
    wire roaring into the native Velox build
    add gluten::delta::RoaringBitmapArray, a small 64-bit roaring bitmap 
wrapper for Delta DV payloads
    use the same payload shape expected by the later native DV reader:
    4-byte magic number
    CRoaring portable Roaring64Map serialization
    add focused native unit coverage for:
    serialization / deserialization round-trip
    invalid magic-number rejection
    This PR is intentionally infrastructure-only:
    
    no Delta scan offload behavior changes yet
    no JVM-side Delta scan integration yet
    Those pieces will be added in follow-up stacked PRs.
    
    issue #11901
    
    Co-authored-by: Mohammad Linjawi <[email protected]>
---
 cpp/CMake/BuildRoaring.cmake                       |  32 ++++
 cpp/CMake/FindRoaring.cmake                        | 180 +++++++++++++++++++++
 cpp/velox/CMakeLists.txt                           |   5 +
 cpp/velox/compute/delta/RoaringBitmapArray.cpp     |  86 ++++++++++
 cpp/velox/compute/delta/RoaringBitmapArray.h       |  60 +++++++
 cpp/velox/compute/delta/tests/CMakeLists.txt       |  24 +++
 .../compute/delta/tests/RoaringBitmapArrayTest.cpp |  53 ++++++
 7 files changed, 440 insertions(+)

diff --git a/cpp/CMake/BuildRoaring.cmake b/cpp/CMake/BuildRoaring.cmake
new file mode 100644
index 0000000000..964a832258
--- /dev/null
+++ b/cpp/CMake/BuildRoaring.cmake
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more 
contributor
+# license agreements.  See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership. The ASF licenses this
+# file to You under the Apache License, Version 2.0 (the "License"); you may 
not
+# use this file except in compliance with the License.  You may obtain a copy 
of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software 
distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+include_guard(GLOBAL)
+include(FetchContent)
+
+if(NOT DEFINED GLUTEN_ROARING_VERSION)
+  set(GLUTEN_ROARING_VERSION "4.3.11")
+endif()
+
+set(ENABLE_ROARING_TESTS
+    OFF
+    CACHE BOOL "" FORCE)
+
+message(STATUS "Building roaring from source")
+FetchContent_Declare(
+  roaring_fetch
+  GIT_REPOSITORY "https://github.com/RoaringBitmap/CRoaring.git";
+  GIT_TAG "v${GLUTEN_ROARING_VERSION}"
+  GIT_SHALLOW TRUE)
+FetchContent_MakeAvailable(roaring_fetch)
diff --git a/cpp/CMake/FindRoaring.cmake b/cpp/CMake/FindRoaring.cmake
new file mode 100644
index 0000000000..0d36108373
--- /dev/null
+++ b/cpp/CMake/FindRoaring.cmake
@@ -0,0 +1,180 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more 
contributor
+# license agreements.  See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership. The ASF licenses this
+# file to You under the Apache License, Version 2.0 (the "License"); you may 
not
+# use this file except in compliance with the License.  You may obtain a copy 
of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+set(_roaring_pkgconfig_hints "")
+set(_roaring_include_hints "")
+set(_roaring_library_hints "")
+set(GLUTEN_ROARING_VERSION "4.3.11")
+
+foreach(_root ${VELOX_BUILD_PATH} ${VELOX_HOME}/_build/release
+              ${VELOX_HOME}/_build/debug)
+  if(_root)
+    list(APPEND _roaring_pkgconfig_hints "${_root}/_deps/roaring-build")
+    list(APPEND _roaring_include_hints "${_root}/_deps/roaring-src/include"
+         "${_root}/_deps/roaring-src/cpp"
+         "${_root}/_deps/roaring-build/include")
+    list(APPEND _roaring_library_hints "${_root}/_deps/roaring-build/src"
+         "${_root}/_deps/roaring-build")
+  endif()
+endforeach()
+
+if(DEFINED VELOX_HOME)
+  list(APPEND _roaring_pkgconfig_hints
+       "${VELOX_HOME}/deps-install/lib/pkgconfig"
+       "${VELOX_HOME}/deps-install/lib64/pkgconfig")
+  list(APPEND _roaring_include_hints "${VELOX_HOME}/deps-install/include")
+  list(APPEND _roaring_library_hints "${VELOX_HOME}/deps-install/lib"
+       "${VELOX_HOME}/deps-install/lib64")
+endif()
+
+list(REMOVE_DUPLICATES _roaring_pkgconfig_hints)
+list(REMOVE_DUPLICATES _roaring_include_hints)
+list(REMOVE_DUPLICATES _roaring_library_hints)
+
+function(_gluten_roaring_add_headers target_name)
+  find_path(
+    Roaring_INCLUDE_DIR
+    NAMES roaring/roaring.h
+    HINTS ${_roaring_include_hints})
+  find_path(
+    Roaring_CPP_INCLUDE_DIR
+    NAMES roaring/roaring64map.hh
+    HINTS ${_roaring_include_hints})
+  if(Roaring_INCLUDE_DIR)
+    target_include_directories(${target_name}
+                               INTERFACE "${Roaring_INCLUDE_DIR}")
+  endif()
+  if(Roaring_CPP_INCLUDE_DIR)
+    target_include_directories(${target_name}
+                               INTERFACE "${Roaring_CPP_INCLUDE_DIR}")
+  endif()
+endfunction()
+
+function(_gluten_roaring_enable_pic target_name)
+  if(NOT TARGET ${target_name})
+    return()
+  endif()
+
+  get_target_property(_gluten_roaring_imported ${target_name} IMPORTED)
+  if(NOT _gluten_roaring_imported)
+    set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE
+                                                    ON)
+  endif()
+endfunction()
+
+# Check if roaring target already exists.
+if(TARGET roaring)
+  _gluten_roaring_enable_pic(roaring)
+  _gluten_roaring_add_headers(roaring)
+  set(Roaring_FOUND TRUE)
+  message(STATUS "Target roaring was already found.")
+  return()
+endif()
+
+find_package(PkgConfig QUIET)
+set(_roaring_found_via_pkgconfig OFF)
+
+if(PkgConfig_FOUND)
+  set(_roaring_saved_pkg_config_path "$ENV{PKG_CONFIG_PATH}")
+  list(JOIN _roaring_pkgconfig_hints ":" _roaring_hint_pkg_config_path)
+  if(_roaring_hint_pkg_config_path)
+    if(_roaring_saved_pkg_config_path STREQUAL "")
+      set(ENV{PKG_CONFIG_PATH} "${_roaring_hint_pkg_config_path}")
+    else()
+      set(ENV{PKG_CONFIG_PATH}
+          "${_roaring_hint_pkg_config_path}:${_roaring_saved_pkg_config_path}")
+    endif()
+  endif()
+  pkg_check_modules(Roaring QUIET IMPORTED_TARGET roaring)
+  set(ENV{PKG_CONFIG_PATH} "${_roaring_saved_pkg_config_path}")
+  if(Roaring_FOUND)
+    list(APPEND _roaring_include_hints ${Roaring_INCLUDE_DIRS})
+    list(APPEND _roaring_library_hints ${Roaring_LIBRARY_DIRS})
+    if(DEFINED Roaring_INCLUDEDIR)
+      list(APPEND _roaring_include_hints "${Roaring_INCLUDEDIR}")
+    endif()
+    if(DEFINED Roaring_LIBDIR)
+      list(APPEND _roaring_library_hints "${Roaring_LIBDIR}")
+    endif()
+    set(_roaring_found_via_pkgconfig ON)
+  endif()
+endif()
+
+list(REMOVE_DUPLICATES _roaring_include_hints)
+list(REMOVE_DUPLICATES _roaring_library_hints)
+
+find_path(
+  Roaring_INCLUDE_DIR
+  NAMES roaring/roaring.h
+  HINTS ${_roaring_include_hints})
+find_path(
+  Roaring_CPP_INCLUDE_DIR
+  NAMES roaring/roaring64map.hh
+  HINTS ${_roaring_include_hints})
+find_library(
+  Roaring_LIBRARY
+  NAMES roaring
+  HINTS ${_roaring_library_hints})
+
+if((NOT Roaring_LIBRARY)
+   AND DEFINED pkgcfg_lib_Roaring_roaring
+   AND EXISTS "${pkgcfg_lib_Roaring_roaring}")
+  set(Roaring_LIBRARY "${pkgcfg_lib_Roaring_roaring}")
+endif()
+
+if(Roaring_INCLUDE_DIR
+   AND Roaring_CPP_INCLUDE_DIR
+   AND Roaring_LIBRARY)
+  add_library(roaring UNKNOWN IMPORTED)
+  set_target_properties(
+    roaring
+    PROPERTIES IMPORTED_LOCATION "${Roaring_LIBRARY}"
+               INTERFACE_INCLUDE_DIRECTORIES
+               "${Roaring_INCLUDE_DIR};${Roaring_CPP_INCLUDE_DIR}")
+  _gluten_roaring_add_headers(roaring)
+  set(Roaring_FOUND TRUE)
+  message(STATUS "Found roaring via direct library lookup.")
+  return()
+endif()
+
+if(_roaring_found_via_pkgconfig)
+  add_library(roaring INTERFACE)
+  target_link_libraries(roaring INTERFACE PkgConfig::Roaring)
+  _gluten_roaring_add_headers(roaring)
+  set(Roaring_FOUND TRUE)
+  message(STATUS "Found roaring via pkg-config imported target fallback.")
+  return()
+endif()
+
+include(BuildRoaring)
+
+if(TARGET roaring)
+  _gluten_roaring_enable_pic(roaring)
+  _gluten_roaring_add_headers(roaring)
+  set(Roaring_FOUND TRUE)
+  message(STATUS "Found roaring via FetchContent.")
+  return()
+endif()
+
+set(Roaring_FOUND FALSE)
+
+if(Roaring_FIND_REQUIRED)
+  message(
+    FATAL_ERROR
+      "Failed to find roaring. Set VELOX_HOME/VELOX_BUILD_PATH to a Velox 
build "
+      "that contains roaring in _deps, or provide roaring via 
PKG_CONFIG_PATH.")
+elseif(NOT Roaring_FIND_QUIETLY)
+  message(WARNING "Failed to find roaring.")
+endif()
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index b9d3e65afe..5034c1601a 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -75,6 +75,8 @@ if(NOT DEFINED VELOX_BUILD_PATH)
   endif()
 endif()
 
+find_package(Roaring REQUIRED)
+
 set(VELOX_PROTO_SRC_DIR
     ${GLUTEN_HOME}/backends-velox/src/main/resources/org/apache/gluten/proto)
 message(STATUS "Set Gluten Proto Directory in ${VELOX_PROTO_SRC_DIR}")
@@ -157,6 +159,7 @@ set(VELOX_SRCS
     compute/VeloxRuntime.cc
     compute/VeloxPlanConverter.cc
     compute/WholeStageResultIterator.cc
+    compute/delta/RoaringBitmapArray.cpp
     compute/iceberg/IcebergPlanConverter.cc
     jni/JniFileSystem.cc
     jni/JniUdf.cc
@@ -323,6 +326,7 @@ endif()
 target_link_libraries(velox PUBLIC facebook::velox)
 
 target_link_libraries(velox PUBLIC Folly::folly)
+target_link_libraries(velox PUBLIC roaring)
 
 find_re2()
 target_link_libraries(velox PUBLIC ${RE2_LIBRARY})
@@ -387,6 +391,7 @@ target_link_libraries(velox PUBLIC ICU::i18n ICU::uc 
ICU::data)
 
 if(BUILD_TESTS)
   add_subdirectory(tests)
+  add_subdirectory(compute/delta/tests)
 endif()
 
 if(BUILD_BENCHMARKS)
diff --git a/cpp/velox/compute/delta/RoaringBitmapArray.cpp 
b/cpp/velox/compute/delta/RoaringBitmapArray.cpp
new file mode 100644
index 0000000000..7da3364d0b
--- /dev/null
+++ b/cpp/velox/compute/delta/RoaringBitmapArray.cpp
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compute/delta/RoaringBitmapArray.h"
+
+#include <cstring>
+
+#include "velox/common/base/Exceptions.h"
+
+namespace gluten::delta {
+
+namespace {
+
+uint32_t readUint32LittleEndian(const char* data) {
+  const auto* bytes = reinterpret_cast<const uint8_t*>(data);
+  return static_cast<uint32_t>(bytes[0]) | (static_cast<uint32_t>(bytes[1]) << 
8) |
+      (static_cast<uint32_t>(bytes[2]) << 16) | 
(static_cast<uint32_t>(bytes[3]) << 24);
+}
+
+void writeUint32LittleEndian(char* data, uint32_t value) {
+  auto* bytes = reinterpret_cast<uint8_t*>(data);
+  bytes[0] = static_cast<uint8_t>(value & 0xFF);
+  bytes[1] = static_cast<uint8_t>((value >> 8) & 0xFF);
+  bytes[2] = static_cast<uint8_t>((value >> 16) & 0xFF);
+  bytes[3] = static_cast<uint8_t>((value >> 24) & 0xFF);
+}
+
+} // namespace
+
+void RoaringBitmapArray::addSafe(uint64_t value) {
+  bitmap_.add(value);
+}
+
+bool RoaringBitmapArray::containsSafe(uint64_t value) const {
+  return bitmap_.contains(value);
+}
+
+void RoaringBitmapArray::serialize(char* buffer) const {
+  VELOX_CHECK_NOT_NULL(buffer, "RoaringBitmapArray serialization buffer is 
null");
+  writeUint32LittleEndian(buffer, kPortableSerializationFormatMagicNumber);
+  bitmap_.write(buffer + sizeof(uint32_t), true);
+}
+
+void RoaringBitmapArray::deserialize(const char* buffer, size_t size) {
+  VELOX_CHECK_NOT_NULL(buffer, "RoaringBitmapArray input buffer is null");
+  VELOX_CHECK_GE(size, sizeof(uint32_t), "RoaringBitmapArray payload is too 
small: {}", size);
+  const auto magic = readUint32LittleEndian(buffer);
+  VELOX_CHECK_EQ(
+      magic, kPortableSerializationFormatMagicNumber, "Unexpected 
RoaringBitmapArray magic number {}", magic);
+  bitmap_ = roaring::Roaring64Map::readSafe(buffer + sizeof(uint32_t), size - 
sizeof(uint32_t));
+}
+
+size_t RoaringBitmapArray::serializedSizeInBytes() const {
+  return sizeof(uint32_t) + bitmap_.getSizeInBytes(true);
+}
+
+} // namespace gluten::delta
diff --git a/cpp/velox/compute/delta/RoaringBitmapArray.h 
b/cpp/velox/compute/delta/RoaringBitmapArray.h
new file mode 100644
index 0000000000..68eb2c9db3
--- /dev/null
+++ b/cpp/velox/compute/delta/RoaringBitmapArray.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <roaring/roaring64map.hh>
+
+namespace gluten::delta {
+
+/// Minimal 64-bit roaring bitmap wrapper for Delta deletion-vector payloads.
+/// This intentionally models the payload format consumed by 
DeltaDeletionVectorReader:
+/// a 4-byte magic number followed by CRoaring's portable Roaring64Map 
serialization.
+class RoaringBitmapArray {
+ public:
+  static constexpr uint32_t kPortableSerializationFormatMagicNumber = 
1681511377;
+
+  void addSafe(uint64_t value);
+  bool containsSafe(uint64_t value) const;
+
+  void serialize(char* buffer) const;
+  void deserialize(const char* buffer, size_t size);
+
+  size_t serializedSizeInBytes() const;
+
+ private:
+  roaring::Roaring64Map bitmap_;
+};
+
+} // namespace gluten::delta
diff --git a/cpp/velox/compute/delta/tests/CMakeLists.txt 
b/cpp/velox/compute/delta/tests/CMakeLists.txt
new file mode 100644
index 0000000000..ec86cab0d7
--- /dev/null
+++ b/cpp/velox/compute/delta/tests/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_roaring_bitmap_array_test RoaringBitmapArrayTest.cpp)
+
+target_link_libraries(velox_roaring_bitmap_array_test velox roaring
+                      GTest::gtest GTest::gtest_main)
+
+add_test(
+  NAME velox_roaring_bitmap_array_test
+  COMMAND velox_roaring_bitmap_array_test
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/cpp/velox/compute/delta/tests/RoaringBitmapArrayTest.cpp 
b/cpp/velox/compute/delta/tests/RoaringBitmapArrayTest.cpp
new file mode 100644
index 0000000000..0eca672608
--- /dev/null
+++ b/cpp/velox/compute/delta/tests/RoaringBitmapArrayTest.cpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compute/delta/RoaringBitmapArray.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace gluten::delta {
+namespace {
+
+TEST(RoaringBitmapArrayTest, SerializeRoundTrip) {
+  RoaringBitmapArray bitmap;
+  bitmap.addSafe(1);
+  bitmap.addSafe(7);
+  bitmap.addSafe(1ULL << 33);
+
+  std::vector<char> serialized(bitmap.serializedSizeInBytes());
+  bitmap.serialize(serialized.data());
+
+  RoaringBitmapArray restored;
+  restored.deserialize(serialized.data(), serialized.size());
+
+  EXPECT_TRUE(restored.containsSafe(1));
+  EXPECT_TRUE(restored.containsSafe(7));
+  EXPECT_TRUE(restored.containsSafe(1ULL << 33));
+  EXPECT_FALSE(restored.containsSafe(2));
+}
+
+TEST(RoaringBitmapArrayTest, RejectsBadMagic) {
+  RoaringBitmapArray bitmap;
+  std::vector<char> invalid(sizeof(uint32_t), '\0');
+  EXPECT_ANY_THROW(bitmap.deserialize(invalid.data(), invalid.size()));
+}
+
+} // namespace
+} // namespace gluten::delta


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to