This is an automated email from the ASF dual-hosted git repository.
yuxia pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 20601fc docs: add cpp build guides with examples (#376)
20601fc is described below
commit 20601fc28416c859b9bfa4810e0da0099677c7e8
Author: AlexZhao <[email protected]>
AuthorDate: Mon Mar 2 10:38:07 2026 +0800
docs: add cpp build guides with examples (#376)
---
MODULE.bazel | 65 ++++
bindings/cpp/.gitignore | 10 +
bindings/cpp/BUILD.bazel | 46 ++-
bindings/cpp/CMakeLists.txt | 141 ++++++++-
.../cpp/{MODULE.bazel => bazel/cpp/BUILD.bazel} | 13 +-
bindings/cpp/bazel/cpp/deps.bzl | 349 +++++++++++++++++++++
.../bazel-consumer/build/BUILD.bazel} | 12 +-
.../bazel-consumer/build}/MODULE.bazel | 24 +-
bindings/cpp/examples/bazel-consumer/build/main.cc | 28 ++
.../bazel-consumer/system/BUILD.bazel} | 12 +-
.../examples/bazel-consumer/system/MODULE.bazel | 44 +++
.../cpp/examples/bazel-consumer/system/main.cc | 27 ++
bindings/cpp/scripts/ensure_protoc.sh | 277 ++++++++++++++++
docs/cpp-bazel-usage.md | 291 +++++++++++++++++
docs/cpp-cmake-usage.md | 129 ++++++++
15 files changed, 1431 insertions(+), 37 deletions(-)
diff --git a/MODULE.bazel b/MODULE.bazel
new file mode 100644
index 0000000..f0e6025
--- /dev/null
+++ b/MODULE.bazel
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Required at repository root for root module mode (`bazel_dep(name =
"fluss-cpp", ...)`).
+# Consumer examples use `local_path_override(..., path =
"/path/to/fluss-rust")`, so
+# Bazel resolves the module from the repository root. This also matches the
Rust
+# workspace layout used by `bindings/cpp` during cargo-based Bazel/CMake
builds.
+# `0.0.0` is a local-development placeholder in this repository branch.
+# Consumers should depend on a published release version.
+module(
+ name = "fluss-cpp",
+ version = "0.0.0",
+)
+
+bazel_dep(name = "rules_cc", version = "0.0.17")
+bazel_dep(name = "platforms", version = "0.0.10")
+bazel_dep(name = "rules_foreign_cc", version = "0.15.1")
+bazel_dep(name = "rules_python", version = "1.2.0")
+
+python = use_extension("@rules_python//python/extensions:python.bzl", "python")
+python.toolchain(python_version = "3.12")
+use_repo(python, "python_3_12")
+
+foreign_cc_tools =
use_extension("@rules_foreign_cc//foreign_cc:extensions.bzl", "tools")
+use_repo(
+ foreign_cc_tools,
+ "cmake_3.31.8_toolchains",
+ "cmake_src",
+ "ninja_1.13.0_toolchains",
+ "ninja_build_src",
+ "rules_foreign_cc_framework_toolchains",
+)
+
+register_toolchains(
+ "@rules_foreign_cc_framework_toolchains//:all",
+ "@cmake_3.31.8_toolchains//:all",
+ "@ninja_1.13.0_toolchains//:all",
+ "@python_3_12//:all",
+ "@rules_foreign_cc//toolchains:all",
+)
+
+cpp_sdk = use_extension("//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk")
+cpp_sdk.config(
+ mode = "build",
+ arrow_cpp_version = "19.0.1",
+ protobuf_version = "3.25.5",
+ ep_cmake_ranlib = "/usr/bin/ranlib",
+ ep_cmake_ar = "/usr/bin/ar",
+ ep_cmake_nm = "/usr/bin/nm",
+)
+use_repo(cpp_sdk, "apache_arrow_cpp")
diff --git a/bindings/cpp/.gitignore b/bindings/cpp/.gitignore
index da15a58..1f1632b 100644
--- a/bindings/cpp/.gitignore
+++ b/bindings/cpp/.gitignore
@@ -15,3 +15,13 @@ bazel-testlogs
bazel-cpp
bazel-*
MODULE.bazel.lock
+
+# Keep versioned Bazel consumer examples (name starts with bazel-).
+!examples/bazel-consumer/
+!examples/bazel-consumer/**
+# `build/` is ignored globally above; keep this fixture path visible.
+!examples/bazel-consumer/build/
+!examples/bazel-consumer/build/**
+examples/bazel-consumer/**/MODULE.bazel.lock
+examples/bazel-consumer/**/bazel-*
+examples/bazel-consumer/**/tmp.log
diff --git a/bindings/cpp/BUILD.bazel b/bindings/cpp/BUILD.bazel
index 0ae2ce3..d247baf 100644
--- a/bindings/cpp/BUILD.bazel
+++ b/bindings/cpp/BUILD.bazel
@@ -17,7 +17,7 @@
licenses(["notice"])
-load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library")
config_setting(
name = "debug_mode",
@@ -34,6 +34,37 @@ config_setting(
values = {"compilation_mode": "opt"},
)
+_PROTOC_SETUP_SNIPPET = """
+ set -e
+ if [ -n "$${CARGO:-}" ]; then
+ if [ ! -x "$$CARGO" ]; then
+ echo "Error: CARGO is set but not executable: $$CARGO" >&2
+ exit 1
+ fi
+ CARGO_BIN="$$CARGO"
+ else
+ CARGO_BIN=$$(command -v cargo || true)
+ if [ -z "$$CARGO_BIN" ]; then
+ echo "Error: cargo not found in PATH and CARGO is not set" >&2
+ exit 1
+ fi
+ fi
+ if [ -n "$${PROTOC:-}" ]; then
+ if [ ! -x "$$PROTOC" ]; then
+ echo "Error: PROTOC is set but not executable: $$PROTOC" >&2
+ exit 1
+ fi
+ export PROTOC
+ else
+ PROTOC_BIN=$$(command -v protoc || true)
+ if [ -z "$$PROTOC_BIN" ]; then
+ echo "Error: protoc not found in PATH and PROTOC is not set"
>&2
+ exit 1
+ fi
+ export PROTOC="$$PROTOC_BIN"
+ fi
+"""
+
genrule(
name = "cargo_build_debug",
srcs = glob([
@@ -47,8 +78,7 @@ genrule(
"src/lib.rs_debug.h",
"cxxbridge/rust/cxx_debug.h",
],
- cmd = """
- set -e
+ cmd = _PROTOC_SETUP_SNIPPET + """
EXECROOT=$$(pwd)
OUTPUT_LIB=$(location rust_lib_debug.a)
OUTPUT_CC=$(location rust_bridge_cc_debug.cc)
@@ -66,7 +96,7 @@ genrule(
exit 1
fi
cd $$WORKSPACE_ROOT
- cargo build --manifest-path $$CARGO_DIR/Cargo.toml
+ "$$CARGO_BIN" build --manifest-path $$CARGO_DIR/Cargo.toml
CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target
# cxxbridge uses the Cargo package name (with hyphen): fluss-cpp
RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src
@@ -114,8 +144,7 @@ genrule(
"src/lib.rs_release.h",
"cxxbridge/rust/cxx_release.h",
],
- cmd = """
- set -e
+ cmd = _PROTOC_SETUP_SNIPPET + """
EXECROOT=$$(pwd)
OUTPUT_LIB=$(location rust_lib_release.a)
OUTPUT_CC=$(location rust_bridge_cc_release.cc)
@@ -133,7 +162,7 @@ genrule(
exit 1
fi
cd $$WORKSPACE_ROOT
- cargo build --release --manifest-path $$CARGO_DIR/Cargo.toml
+ "$$CARGO_BIN" build --release --manifest-path $$CARGO_DIR/Cargo.toml
CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target
# cxxbridge uses the Cargo package name (with hyphen): fluss-cpp
RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src
@@ -252,7 +281,6 @@ cc_library(
"src/admin.cpp",
"src/connection.cpp",
"src/table.cpp",
- ":rust_bridge_cc_unified",
],
hdrs = [
"include/fluss.hpp",
@@ -303,6 +331,7 @@ cc_library(
}),
deps = [
":rust_lib",
+ "//bindings/cpp/bazel/cpp:arrow_cpp_dep",
],
visibility = ["//visibility:public"],
)
@@ -405,4 +434,3 @@ cc_binary(
}),
visibility = ["//visibility:public"],
)
-
diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt
index 6bd9fc7..0cedf68 100644
--- a/bindings/cpp/CMakeLists.txt
+++ b/bindings/cpp/CMakeLists.txt
@@ -27,9 +27,21 @@ include(FetchContent)
set(FLUSS_GOOGLETEST_VERSION 1.15.2 CACHE STRING "version of GoogleTest")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-find_package(Threads REQUIRED)
+set(FLUSS_CPP_DEP_MODE "system" CACHE STRING "Dependency provisioning mode for
fluss-cpp (system|build)")
+set_property(CACHE FLUSS_CPP_DEP_MODE PROPERTY STRINGS system build)
+set(FLUSS_CPP_ARROW_VERSION "19.0.1" CACHE STRING "Arrow C++ version baseline
for fluss-cpp")
+set(FLUSS_CPP_PROTOBUF_VERSION "3.25.5" CACHE STRING "Protobuf/protoc version
baseline for fluss-cpp")
+set(FLUSS_CPP_ARROW_SYSTEM_ROOT "" CACHE PATH "Optional Arrow installation
prefix for system mode")
+set(FLUSS_CPP_ARROW_SOURCE_URL
+
"https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"
+ CACHE STRING
+ "Arrow source archive URL used in build mode")
+set(FLUSS_CPP_ARROW_SOURCE_SHA256
+ "4c898504958841cc86b6f8710ecb2919f96b5e10fa8989ac10ac4fca8362d86a"
+ CACHE STRING
+ "SHA256 for the Arrow source archive used in build mode")
-find_package(Arrow REQUIRED)
+find_package(Threads REQUIRED)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
@@ -47,11 +59,117 @@ if (FLUSS_DEV)
set(FLUSS_ENABLE_TESTING ON)
endif()
+if (NOT FLUSS_CPP_DEP_MODE STREQUAL "system" AND NOT FLUSS_CPP_DEP_MODE
STREQUAL "build")
+ message(FATAL_ERROR "Unsupported
FLUSS_CPP_DEP_MODE='${FLUSS_CPP_DEP_MODE}'. Expected 'system' or 'build'.")
+endif()
+
+find_program(FLUSS_PROTOC_EXECUTABLE NAMES protoc)
+if (NOT FLUSS_PROTOC_EXECUTABLE)
+ message(FATAL_ERROR "protoc not found. Install protoc or set it in PATH.
(Fluss baseline: ${FLUSS_CPP_PROTOBUF_VERSION})")
+endif()
+
+if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "" AND EXISTS
"$ENV{CARGO}")
+ set(FLUSS_CARGO_EXECUTABLE "$ENV{CARGO}")
+else()
+ if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "")
+ get_filename_component(_FLUSS_CARGO_HINT_DIR "$ENV{CARGO}" DIRECTORY)
+ endif()
+ find_program(FLUSS_CARGO_EXECUTABLE NAMES cargo HINTS
"${_FLUSS_CARGO_HINT_DIR}")
+endif()
+if (NOT FLUSS_CARGO_EXECUTABLE)
+ message(FATAL_ERROR "cargo not found. Install Rust toolchain or set
CARGO/PATH.")
+endif()
+
+execute_process(
+ COMMAND ${FLUSS_PROTOC_EXECUTABLE} --version
+ OUTPUT_VARIABLE FLUSS_PROTOC_VERSION_OUTPUT
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ ERROR_QUIET
+)
+string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" FLUSS_PROTOC_VERSION
"${FLUSS_PROTOC_VERSION_OUTPUT}")
+set(FLUSS_PROTOC_VERSION_NORM "${FLUSS_PROTOC_VERSION}")
+set(FLUSS_CPP_PROTOBUF_VERSION_NORM "${FLUSS_CPP_PROTOBUF_VERSION}")
+string(REGEX REPLACE "^3\\." "" FLUSS_PROTOC_VERSION_NORM
"${FLUSS_PROTOC_VERSION_NORM}")
+string(REGEX REPLACE "^3\\." "" FLUSS_CPP_PROTOBUF_VERSION_NORM
"${FLUSS_CPP_PROTOBUF_VERSION_NORM}")
+if (FLUSS_PROTOC_VERSION AND
+ NOT FLUSS_PROTOC_VERSION VERSION_EQUAL FLUSS_CPP_PROTOBUF_VERSION AND
+ NOT FLUSS_PROTOC_VERSION_NORM VERSION_EQUAL
FLUSS_CPP_PROTOBUF_VERSION_NORM)
+ message(WARNING
+ "protoc version (${FLUSS_PROTOC_VERSION}) does not match Fluss
baseline "
+ "(${FLUSS_CPP_PROTOBUF_VERSION}). Build may still work, but this is
outside the tested baseline.")
+endif()
+
+message(STATUS "Fluss C++ dependency mode: ${FLUSS_CPP_DEP_MODE}")
+message(STATUS "Fluss C++ protoc executable: ${FLUSS_PROTOC_EXECUTABLE}
(${FLUSS_PROTOC_VERSION_OUTPUT})")
+message(STATUS "Fluss C++ cargo executable: ${FLUSS_CARGO_EXECUTABLE}")
+
+if (FLUSS_CPP_DEP_MODE STREQUAL "system")
+ if (FLUSS_CPP_ARROW_SYSTEM_ROOT)
+ list(APPEND CMAKE_PREFIX_PATH "${FLUSS_CPP_ARROW_SYSTEM_ROOT}")
+ set(Arrow_ROOT "${FLUSS_CPP_ARROW_SYSTEM_ROOT}")
+ endif()
+
+ find_package(Arrow REQUIRED)
+
+ if (DEFINED Arrow_VERSION AND Arrow_VERSION AND NOT Arrow_VERSION
VERSION_EQUAL FLUSS_CPP_ARROW_VERSION)
+ message(WARNING
+ "Arrow version (${Arrow_VERSION}) does not match Fluss baseline "
+ "(${FLUSS_CPP_ARROW_VERSION}). Build may still work, but this is
outside the tested baseline.")
+ endif()
+else()
+ # Build mode: provision Arrow C++ from source in-tree.
+ set(ARROW_BUILD_SHARED ON CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_STATIC OFF CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_INTEGRATION OFF CACHE BOOL "" FORCE)
+ set(ARROW_BUILD_UTILITIES OFF CACHE BOOL "" FORCE)
+ set(ARROW_COMPUTE OFF CACHE BOOL "" FORCE)
+ set(ARROW_CSV OFF CACHE BOOL "" FORCE)
+ set(ARROW_DATASET OFF CACHE BOOL "" FORCE)
+ set(ARROW_FILESYSTEM OFF CACHE BOOL "" FORCE)
+ set(ARROW_JSON OFF CACHE BOOL "" FORCE)
+ set(ARROW_PARQUET OFF CACHE BOOL "" FORCE)
+ set(ARROW_IPC ON CACHE BOOL "" FORCE)
+ # Reduce third-party sub-build complexity in build mode.
+ set(ARROW_JEMALLOC OFF CACHE BOOL "" FORCE)
+ set(ARROW_MIMALLOC OFF CACHE BOOL "" FORCE)
+ set(ARROW_DEPENDENCY_SOURCE BUNDLED CACHE STRING "" FORCE)
+ set(ARROW_SIMD_LEVEL NONE CACHE STRING "" FORCE)
+ set(ARROW_RUNTIME_SIMD_LEVEL NONE CACHE STRING "" FORCE)
+
+ FetchContent_Declare(
+ apache_arrow_src
+ URL ${FLUSS_CPP_ARROW_SOURCE_URL}
+ URL_HASH SHA256=${FLUSS_CPP_ARROW_SOURCE_SHA256}
+ SOURCE_SUBDIR cpp
+ )
+ FetchContent_MakeAvailable(apache_arrow_src)
+ set(FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS
+ "${apache_arrow_src_SOURCE_DIR}/cpp/src"
+ "${apache_arrow_src_BINARY_DIR}/src")
+
+ if (TARGET arrow_shared AND NOT TARGET Arrow::arrow_shared)
+ add_library(Arrow::arrow_shared ALIAS arrow_shared)
+ endif()
+ if (NOT TARGET Arrow::arrow_shared)
+ message(FATAL_ERROR "Arrow build mode did not produce target
Arrow::arrow_shared (or arrow_shared).")
+ endif()
+endif()
+
# Get cargo target dir
-execute_process(COMMAND cargo locate-project --workspace --message-format plain
- OUTPUT_VARIABLE CARGO_TARGET_DIR
+execute_process(COMMAND ${FLUSS_CARGO_EXECUTABLE} locate-project --workspace
--message-format plain
+ OUTPUT_VARIABLE CARGO_MANIFEST_PATH
+ OUTPUT_STRIP_TRAILING_WHITESPACE
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
-string(REGEX REPLACE "/Cargo.toml\n$" "/target" CARGO_TARGET_DIR
"${CARGO_TARGET_DIR}")
+if (NOT CARGO_MANIFEST_PATH)
+ message(FATAL_ERROR
+ "Failed to resolve Cargo workspace target dir via
'${FLUSS_CARGO_EXECUTABLE} locate-project'. "
+ "Check Rust toolchain installation and PATH/CARGO.")
+endif()
+get_filename_component(CARGO_WORKSPACE_DIR "${CARGO_MANIFEST_PATH}" DIRECTORY)
+set(CARGO_TARGET_DIR "${CARGO_WORKSPACE_DIR}/target")
set(CARGO_MANIFEST ${PROJECT_SOURCE_DIR}/Cargo.toml)
set(RUST_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/lib.rs)
@@ -77,7 +195,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
endif()
add_custom_target(cargo_build
- COMMAND cargo build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS}
+ COMMAND ${CMAKE_COMMAND} -E env PROTOC=${FLUSS_PROTOC_EXECUTABLE}
${FLUSS_CARGO_EXECUTABLE} build --manifest-path ${CARGO_MANIFEST}
${CARGO_BUILD_FLAGS}
BYPRODUCTS ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE}
DEPENDS ${RUST_SOURCE_FILE}
USES_TERMINAL
@@ -88,6 +206,9 @@ add_library(fluss_cpp STATIC ${CPP_SOURCE_FILE}
${RUST_BRIDGE_CPP})
target_sources(fluss_cpp PUBLIC ${CPP_HEADER_FILE})
target_sources(fluss_cpp PRIVATE ${RUST_HEADER_FILE})
target_include_directories(fluss_cpp PUBLIC ${CPP_INCLUDE_DIR})
+if (FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS)
+ target_include_directories(fluss_cpp PUBLIC
${FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS})
+endif()
target_link_libraries(fluss_cpp PUBLIC ${RUST_LIB})
target_link_libraries(fluss_cpp PRIVATE ${CMAKE_DL_LIBS} Threads::Threads)
target_link_libraries(fluss_cpp PUBLIC Arrow::arrow_shared)
@@ -114,9 +235,11 @@ target_link_libraries(fluss_cpp_kv_example PRIVATE
Arrow::arrow_shared)
target_compile_definitions(fluss_cpp_kv_example PRIVATE ARROW_FOUND)
target_include_directories(fluss_cpp_kv_example PUBLIC ${CPP_INCLUDE_DIR})
-set_target_properties(fluss_cpp
- PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR}
-)
+if (CARGO_TARGET_DIR)
+ set_target_properties(fluss_cpp
+ PROPERTIES ADDITIONAL_CLEAN_FILES "${CARGO_TARGET_DIR}"
+ )
+endif()
add_dependencies(fluss_cpp cargo_build)
if (FLUSS_ENABLE_ADDRESS_SANITIZER)
diff --git a/bindings/cpp/MODULE.bazel b/bindings/cpp/bazel/cpp/BUILD.bazel
similarity index 71%
copy from bindings/cpp/MODULE.bazel
copy to bindings/cpp/bazel/cpp/BUILD.bazel
index f75d3e6..e4b730d 100644
--- a/bindings/cpp/MODULE.bazel
+++ b/bindings/cpp/bazel/cpp/BUILD.bazel
@@ -15,9 +15,12 @@
# specific language governing permissions and limitations
# under the License.
-module(
- name = "fluss_cpp",
-)
+package(default_visibility = ["//visibility:public"])
-bazel_dep(name = "rules_cc", version = "0.0.17")
-bazel_dep(name = "platforms", version = "0.0.10")
+# Stable indirection target for the Arrow C++ dependency. The implementation
+# repo name can change across modes (registry/build/system) without touching
+# bindings/cpp/BUILD.bazel.
+alias(
+ name = "arrow_cpp_dep",
+ actual = "@apache_arrow_cpp//:arrow_cpp",
+)
diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl
new file mode 100644
index 0000000..6dd5e1b
--- /dev/null
+++ b/bindings/cpp/bazel/cpp/deps.bzl
@@ -0,0 +1,349 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Bzlmod extension for fluss C++ SDK dependency provisioning."""
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+_ARROW_BUILD_FILE_TEMPLATE = """
+load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake")
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+ name = "all_srcs",
+ srcs = glob(
+ ["**"],
+ exclude = [
+ "**/BUILD",
+ "**/BUILD.bazel",
+ ],
+ ),
+)
+
+cmake(
+ name = "arrow_cpp",
+ lib_source = ":all_srcs",
+ working_directory = "cpp",
+ generate_args = ["-GUnix Makefiles"],
+ cache_entries = {
+ "CMAKE_BUILD_TYPE": "Release",
+ "CMAKE_INSTALL_LIBDIR": "lib",
+ "CMAKE_POSITION_INDEPENDENT_CODE": "ON",
+ "ARROW_BUILD_SHARED": "ON",
+ "ARROW_BUILD_STATIC": "OFF",
+ "ARROW_BUILD_TESTS": "OFF",
+ "ARROW_BUILD_EXAMPLES": "OFF",
+ "ARROW_BUILD_BENCHMARKS": "OFF",
+ "ARROW_BUILD_INTEGRATION": "OFF",
+ "ARROW_BUILD_UTILITIES": "OFF",
+ "ARROW_COMPUTE": "OFF",
+ "ARROW_CSV": "OFF",
+ "ARROW_DATASET": "OFF",
+ "ARROW_FILESYSTEM": "OFF",
+ "ARROW_JSON": "OFF",
+ "ARROW_PARQUET": "OFF",
+ "ARROW_IPC": "ON",
+ "ARROW_JEMALLOC": "OFF",
+ "ARROW_MIMALLOC": "OFF",
+ "ARROW_SIMD_LEVEL": "NONE",
+ "ARROW_RUNTIME_SIMD_LEVEL": "NONE",
+ "ARROW_DEPENDENCY_SOURCE": "BUNDLED",
+ # Temporary workarounds for older images / Bazel sandbox toolchain
detection.
+ "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__",
+ "EP_CMAKE_AR": "__EP_CMAKE_AR__",
+ "EP_CMAKE_NM": "__EP_CMAKE_NM__",
+ },
+ out_include_dir = "include",
+ out_lib_dir = "lib",
+ out_shared_libs = select({
+ "@platforms//os:macos": [
+ "libarrow.dylib",
+ "libarrow.1900.dylib",
+ ],
+ "//conditions:default": [
+ "libarrow.so",
+ "libarrow.so.1900",
+ "libarrow.so.1900.1.0",
+ ],
+ }),
+)
+"""
+
+_ARROW_PATCH_CMDS = [
+ "sed -i.bak 's|#define ARROW_CXX_COMPILER_FLAGS
\"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|'
cpp/src/arrow/util/config.h.cmake && rm -f
cpp/src/arrow/util/config.h.cmake.bak",
+]
+
+_SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """
+load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_import(
+ name = "arrow_shared_import",
+ shared_library = "__SYSTEM_ARROW_SHARED_LIBRARY__",
+)
+
+filegroup(
+ name = "arrow_runtime_libs",
+ srcs = [
+__SYSTEM_ARROW_RUNTIME_SRCS__
+ ],
+)
+
+cc_library(
+ name = "arrow_cpp",
+ hdrs = [
+__SYSTEM_ARROW_HDRS__
+ ],
+ includes = ["__SYSTEM_ARROW_INCLUDE_DIR__"],
+ data = [":arrow_runtime_libs"],
+ deps = [":arrow_shared_import"],
+)
+"""
+
+_ARROW_BUILD_VERSIONS = {
+ "19.0.1": {
+ "urls":
["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"],
+ "strip_prefix": "arrow-apache-arrow-19.0.1",
+ "integrity": "sha256-TImFBJWIQcyGtvhxDsspGflrXhD6iYmsEKxPyoNi2Go=",
+ },
+}
+
+_config_tag = tag_class(attrs = {
+ "mode": attr.string(default = "build"),
+ "arrow_cpp_version": attr.string(default = "19.0.1"),
+ "protobuf_version": attr.string(default = "3.25.5"),
+ "ep_cmake_ranlib": attr.string(default = "ranlib"),
+ "ep_cmake_ar": attr.string(default = "ar"),
+ "ep_cmake_nm": attr.string(default = "nm"),
+ "system_arrow_prefix": attr.string(default = "/usr"),
+ "system_arrow_include_dir": attr.string(default = "include"),
+ "system_arrow_shared_library": attr.string(default =
"lib/x86_64-linux-gnu/libarrow.so"),
+ "system_arrow_runtime_glob": attr.string(default =
"lib/x86_64-linux-gnu/libarrow.so*"),
+})
+
+def _render_arrow_build_file(tag):
+ return _ARROW_BUILD_FILE_TEMPLATE.replace(
+ "__EP_CMAKE_RANLIB__",
+ tag.ep_cmake_ranlib,
+ ).replace(
+ "__EP_CMAKE_AR__",
+ tag.ep_cmake_ar,
+ ).replace(
+ "__EP_CMAKE_NM__",
+ tag.ep_cmake_nm,
+ )
+
+def _render_system_arrow_build_file(tag, shared_library_override = None):
+ shared_library = shared_library_override if shared_library_override else
(tag.system_arrow_shared_library if hasattr(tag, "system_arrow_shared_library")
else tag.shared_library)
+ include_dir = tag.system_arrow_include_dir if hasattr(tag,
"system_arrow_include_dir") else tag.include_dir
+ return _SYSTEM_ARROW_BUILD_FILE_TEMPLATE.replace(
+ "__SYSTEM_ARROW_SHARED_LIBRARY__",
+ "sysroot/" + shared_library,
+ ).replace(
+ "__SYSTEM_ARROW_INCLUDE_DIR__",
+ "sysroot/" + include_dir,
+ )
+
+def _starlark_string_list(items):
+ if not items:
+ return ""
+ return "\n".join([' "%s",' % i for i in items])
+
+def _list_files(repo_ctx, base_dir, suffixes):
+ result = repo_ctx.execute([
+ "/usr/bin/find",
+ base_dir,
+ "(",
+ "-type",
+ "f",
+ "-o",
+ "-type",
+ "l",
+ ")",
+ ])
+ if result.return_code != 0:
+ fail("failed to enumerate files under %s: %s" % (base_dir,
result.stderr))
+ files = []
+ for line in result.stdout.splitlines():
+ for suffix in suffixes:
+ if line.endswith(suffix):
+ files.append(line)
+ break
+ return sorted(files)
+
+def _copy_file_to_sysroot(repo_ctx, prefix, rel_path):
+ if rel_path.startswith("/"):
+ fail("expected relative path under prefix, got absolute path: %s" %
rel_path)
+ src = prefix + "/" + rel_path
+ dst = "sysroot/" + rel_path
+ dst_parent = dst.rsplit("/", 1)[0] if "/" in dst else "sysroot"
+ mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", dst_parent])
+ if mkdir_res.return_code != 0:
+ fail("failed to create directory %s: %s" % (dst_parent,
mkdir_res.stderr))
+ # Resolve symlinks into real files to keep the generated sysroot
self-contained.
+ cp_res = repo_ctx.execute(["/bin/cp", "-L", src, dst])
+ if cp_res.return_code != 0:
+ fail("failed to copy %s to %s: %s" % (src, dst, cp_res.stderr))
+
+def _system_arrow_repo_impl(repo_ctx):
+ prefix = repo_ctx.attr.prefix.rstrip("/")
+ include_dir = repo_ctx.attr.include_dir
+ shared_library = repo_ctx.attr.shared_library
+ runtime_glob = repo_ctx.attr.runtime_glob
+
+ mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"])
+ if mkdir_res.return_code != 0:
+ fail("failed to create sysroot directory: %s" % mkdir_res.stderr)
+
+ include_dir_for_scan = include_dir
+ if include_dir_for_scan.endswith("/"):
+ include_dir_for_scan = include_dir_for_scan[:-1]
+ header_root = prefix + "/" + include_dir_for_scan + "/arrow"
+ headers = _list_files(repo_ctx, header_root, [".h", ".hpp"])
+ header_srcs_rel = []
+ header_srcs = []
+ for h in headers:
+ if not h.startswith(prefix + "/"):
+ fail("header path %s is outside prefix %s" % (h, prefix))
+ rel = h[len(prefix) + 1:]
+ header_srcs_rel.append(rel)
+ header_srcs.append("sysroot/" + rel)
+
+ runtime_dir = runtime_glob.rsplit("/", 1)[0]
+ runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "")
+ runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""])
+ runtime_srcs_rel = []
+ runtime_srcs = []
+ for f in runtime_files:
+ rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None
+ if rel == None:
+ continue
+ if rel.startswith(runtime_dir + "/") and rel.rsplit("/",
1)[1].startswith(runtime_prefix):
+ runtime_srcs_rel.append(rel)
+ runtime_srcs.append("sysroot/" + rel)
+ runtime_srcs_rel = sorted(runtime_srcs_rel)
+ runtime_srcs = sorted(runtime_srcs)
+
+ # Prefer a versioned soname file as the imported shared library so Bazel
+ # runfiles contain the exact filename required by the runtime loader.
+ shared_import_rel = "sysroot/" + shared_library
+ shared_basename = shared_library.rsplit("/", 1)[1]
+ soname_candidates = []
+ for rel in runtime_srcs_rel:
+ base = rel.rsplit("/", 1)[1]
+ if base == shared_basename:
+ continue
+ if base.startswith(shared_basename + "."):
+ soname_candidates.append("sysroot/" + rel)
+ if soname_candidates:
+ # Prefer shortest suffix first (e.g. libarrow.so.1900 before
+ # libarrow.so.1900.1.0) to match ELF SONAME naming when available.
+ soname_candidates = sorted(soname_candidates, key = lambda s: (len(s),
s))
+ shared_import_rel = soname_candidates[0]
+
+ # Copy only required Arrow artifacts instead of mirroring the full system
prefix.
+ copy_rel_paths = {}
+ for rel in header_srcs_rel + runtime_srcs_rel + [shared_library]:
+ copy_rel_paths[rel] = True
+ for rel in sorted(copy_rel_paths.keys()):
+ _copy_file_to_sysroot(repo_ctx, prefix, rel)
+
+ build_file = _render_system_arrow_build_file(repo_ctx.attr,
shared_library_override = shared_import_rel[len("sysroot/"):]).replace(
+ "__SYSTEM_ARROW_HDRS__",
+ _starlark_string_list(header_srcs),
+ ).replace(
+ "__SYSTEM_ARROW_RUNTIME_SRCS__",
+ _starlark_string_list(runtime_srcs),
+ )
+ repo_ctx.file("BUILD.bazel", build_file)
+
+_system_arrow_repository = repository_rule(
+ implementation = _system_arrow_repo_impl,
+ attrs = {
+ "prefix": attr.string(mandatory = True),
+ "include_dir": attr.string(mandatory = True),
+ "shared_library": attr.string(mandatory = True),
+ "runtime_glob": attr.string(mandatory = True),
+ },
+ local = True,
+)
+
+def _select_config(ctx):
+ selected = None
+ selected_owner = None
+ root_selected = None
+ for mod in ctx.modules:
+ for tag in mod.tags.config:
+ is_root = hasattr(mod, "is_root") and mod.is_root
+ if is_root:
+ if root_selected != None:
+ fail("cpp_sdk.config may only be declared once in the root
module")
+ root_selected = tag
+ continue
+ if selected == None:
+ selected = tag
+ selected_owner = mod.name
+ elif selected_owner != mod.name:
+ # Prefer root override. Dependency defaults are tolerated as
long
+ # as they come from a single module.
+ fail("multiple dependency defaults for cpp_sdk.config without
root override")
+ if root_selected != None:
+ return root_selected
+ return selected
+
+def _cpp_sdk_impl(ctx):
+ tag = _select_config(ctx)
+ if tag == None:
+ return
+
+ if tag.mode == "registry":
+ return
+
+ if tag.mode == "system":
+ _system_arrow_repository(
+ name = "apache_arrow_cpp",
+ prefix = tag.system_arrow_prefix,
+ include_dir = tag.system_arrow_include_dir,
+ shared_library = tag.system_arrow_shared_library,
+ runtime_glob = tag.system_arrow_runtime_glob,
+ )
+ return
+
+ if tag.mode != "build":
+ fail("unsupported cpp_sdk mode: %s" % tag.mode)
+
+ arrow_version = _ARROW_BUILD_VERSIONS.get(tag.arrow_cpp_version)
+ if arrow_version == None:
+ fail("unsupported arrow_cpp_version for build mode: %s" %
tag.arrow_cpp_version)
+
+ http_archive(
+ name = "apache_arrow_cpp",
+ urls = arrow_version["urls"],
+ strip_prefix = arrow_version["strip_prefix"],
+ integrity = arrow_version["integrity"],
+ patch_cmds = _ARROW_PATCH_CMDS,
+ build_file_content = _render_arrow_build_file(tag),
+ )
+
+cpp_sdk = module_extension(
+ implementation = _cpp_sdk_impl,
+ tag_classes = {
+ "config": _config_tag,
+ },
+)
diff --git a/bindings/cpp/MODULE.bazel
b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel
similarity index 80%
copy from bindings/cpp/MODULE.bazel
copy to bindings/cpp/examples/bazel-consumer/build/BUILD.bazel
index f75d3e6..afd35ed 100644
--- a/bindings/cpp/MODULE.bazel
+++ b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel
@@ -15,9 +15,11 @@
# specific language governing permissions and limitations
# under the License.
-module(
- name = "fluss_cpp",
-)
+load("@rules_cc//cc:defs.bzl", "cc_binary")
-bazel_dep(name = "rules_cc", version = "0.0.17")
-bazel_dep(name = "platforms", version = "0.0.10")
+cc_binary(
+ name = "consumer_build",
+ srcs = ["main.cc"],
+ copts = ["-std=c++17"],
+ deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"],
+)
diff --git a/bindings/cpp/MODULE.bazel
b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel
similarity index 53%
copy from bindings/cpp/MODULE.bazel
copy to bindings/cpp/examples/bazel-consumer/build/MODULE.bazel
index f75d3e6..f31165c 100644
--- a/bindings/cpp/MODULE.bazel
+++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel
@@ -15,9 +15,25 @@
# specific language governing permissions and limitations
# under the License.
-module(
- name = "fluss_cpp",
+module(name = "fluss_cpp_consumer_build")
+
+bazel_dep(name = "rules_cc", version = "0.2.14")
+bazel_dep(name = "fluss-cpp", version = "0.1.0")
+
+# Local override for repository-local validation only.
+local_path_override(
+ module_name = "fluss-cpp",
+ # Repository root path (the directory containing `bindings/cpp`).
+ path = "../../../../../",
)
-bazel_dep(name = "rules_cc", version = "0.0.17")
-bazel_dep(name = "platforms", version = "0.0.10")
+fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl",
"cpp_sdk")
+fluss_cpp.config(
+ mode = "build",
+ protobuf_version = "3.25.5",
+ arrow_cpp_version = "19.0.1",
+ ep_cmake_ranlib = "/usr/bin/ranlib",
+ ep_cmake_ar = "/usr/bin/ar",
+ ep_cmake_nm = "/usr/bin/nm",
+)
+use_repo(fluss_cpp, "apache_arrow_cpp")
diff --git a/bindings/cpp/examples/bazel-consumer/build/main.cc
b/bindings/cpp/examples/bazel-consumer/build/main.cc
new file mode 100644
index 0000000..87e5b68
--- /dev/null
+++ b/bindings/cpp/examples/bazel-consumer/build/main.cc
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "fluss.hpp"
+
+#include <iostream>
+
+int main() {
+ fluss::TablePath table_path("demo_db", "demo_table");
+ std::cout << "Bazel build-mode dependency example ready: "
+ << table_path.ToString() << std::endl;
+ return 0;
+}
+
diff --git a/bindings/cpp/MODULE.bazel
b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel
similarity index 80%
rename from bindings/cpp/MODULE.bazel
rename to bindings/cpp/examples/bazel-consumer/system/BUILD.bazel
index f75d3e6..2f24e6d 100644
--- a/bindings/cpp/MODULE.bazel
+++ b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel
@@ -15,9 +15,11 @@
# specific language governing permissions and limitations
# under the License.
-module(
- name = "fluss_cpp",
-)
+load("@rules_cc//cc:defs.bzl", "cc_binary")
-bazel_dep(name = "rules_cc", version = "0.0.17")
-bazel_dep(name = "platforms", version = "0.0.10")
+cc_binary(
+ name = "consumer_system",
+ srcs = ["main.cc"],
+ copts = ["-std=c++17"],
+ deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"],
+)
diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel
b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel
new file mode 100644
index 0000000..2a4d6a6
--- /dev/null
+++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module(name = "fluss_cpp_consumer_system")
+
+bazel_dep(name = "rules_cc", version = "0.2.14")
+bazel_dep(name = "fluss-cpp", version = "0.1.0")
+
+# Repository-local example path (repository root containing `bindings/cpp`).
+# If you copy this example out of tree, replace this with an absolute path
+# (for example: /path/to/fluss-rust).
+local_path_override(
+ module_name = "fluss-cpp",
+ path = "../../../../../",
+)
+
+# Intended interface for preinstalled protoc + Arrow C++ environments.
+fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl",
"cpp_sdk")
+fluss_cpp.config(
+ mode = "system",
+ protobuf_version = "3.25.5",
+ arrow_cpp_version = "19.0.1",
+ # Adjust these paths for your environment.
+ # Ubuntu 22.04 (apt / custom package) commonly uses lib/x86_64-linux-gnu.
+ system_arrow_prefix = "/usr",
+ system_arrow_include_dir = "include",
+ system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so",
+ system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*",
+)
+use_repo(fluss_cpp, "apache_arrow_cpp")
diff --git a/bindings/cpp/examples/bazel-consumer/system/main.cc
b/bindings/cpp/examples/bazel-consumer/system/main.cc
new file mode 100644
index 0000000..b1f0b70
--- /dev/null
+++ b/bindings/cpp/examples/bazel-consumer/system/main.cc
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "fluss.hpp"
+
+#include <iostream>
+
+int main() {
+ fluss::TablePath table_path("demo_db", "demo_table");
+ std::cout << "Bazel system-mode dependency example ready: "
+ << table_path.ToString() << std::endl;
+ return 0;
+}
diff --git a/bindings/cpp/scripts/ensure_protoc.sh
b/bindings/cpp/scripts/ensure_protoc.sh
new file mode 100755
index 0000000..3210bcc
--- /dev/null
+++ b/bindings/cpp/scripts/ensure_protoc.sh
@@ -0,0 +1,277 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+PROTOBUF_BASELINE_VERSION="${PROTOBUF_BASELINE_VERSION:-3.25.5}"
+if [[ -n "${XDG_CACHE_HOME:-}" ]]; then
+ _PROTOC_DEFAULT_CACHE_BASE="${XDG_CACHE_HOME}"
+elif [[ -n "${HOME:-}" ]]; then
+ _PROTOC_DEFAULT_CACHE_BASE="${HOME}/.cache"
+else
+ _PROTOC_DEFAULT_CACHE_BASE="/tmp"
+fi
+
+_PROTOC_UNAME_S="$(uname -s | tr '[:upper:]' '[:lower:]')"
+case "${_PROTOC_UNAME_S}" in
+ linux*)
+ _PROTOC_DEFAULT_OS="linux"
+ ;;
+ darwin*)
+ _PROTOC_DEFAULT_OS="osx"
+ ;;
+ *)
+ echo "ERROR: unsupported host OS '${_PROTOC_UNAME_S}'. Please set
PROTOC_OS explicitly." >&2
+ exit 1
+ ;;
+esac
+
+_PROTOC_UNAME_M="$(uname -m)"
+case "${_PROTOC_UNAME_M}" in
+ x86_64|amd64)
+ _PROTOC_DEFAULT_ARCH="x86_64"
+ ;;
+ aarch64|arm64)
+ _PROTOC_DEFAULT_ARCH="aarch_64"
+ ;;
+ *)
+ echo "ERROR: unsupported host arch '${_PROTOC_UNAME_M}'. Please set
PROTOC_ARCH explicitly." >&2
+ exit 1
+ ;;
+esac
+
+PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-${_PROTOC_DEFAULT_CACHE_BASE}/fluss-cpp-tools}"
+PROTOC_OS="${PROTOC_OS:-${_PROTOC_DEFAULT_OS}}"
+PROTOC_ARCH="${PROTOC_ARCH:-${_PROTOC_DEFAULT_ARCH}}"
+PROTOC_FORCE_INSTALL="${PROTOC_FORCE_INSTALL:-0}"
+PROTOC_PRINT_PATH_ONLY="${PROTOC_PRINT_PATH_ONLY:-0}"
+PROTOC_ALLOW_INSECURE_DOWNLOAD="${PROTOC_ALLOW_INSECURE_DOWNLOAD:-0}"
+PROTOC_SKIP_CHECKSUM_VERIFY="${PROTOC_SKIP_CHECKSUM_VERIFY:-0}"
+
+usage() {
+ cat <<'EOF'
+Usage: bindings/cpp/scripts/ensure_protoc.sh [--print-path]
+
+Ensures a protoc binary matching the configured protobuf baseline is available.
+Installs into a local cache directory (default:
\$XDG_CACHE_HOME/fluss-cpp-tools or
+\$HOME/.cache/fluss-cpp-tools) and prints
+the protoc path on stdout.
+
+Env vars:
+ PROTOBUF_BASELINE_VERSION Baseline protobuf version (default: 3.25.5)
+ PROTOC_INSTALL_ROOT Local cache root (default: XDG/HOME cache dir)
+ PROTOC_OS protoc package OS (default: auto-detect host:
linux/osx)
+ PROTOC_ARCH protoc package arch (default: auto-detect host:
x86_64/aarch_64)
+ PROTOC_FORCE_INSTALL 1 to force re-download
+ PROTOC_ALLOW_INSECURE_DOWNLOAD
+ 1 to disable TLS verification (not recommended)
+ PROTOC_SKIP_CHECKSUM_VERIFY
+ 1 to skip pinned archive checksum verification
+ BAZEL_PROXY_URL Optional proxy (sets curl/wget proxy envs if
present)
+EOF
+}
+
+for arg in "$@"; do
+ case "$arg" in
+ --print-path)
+ PROTOC_PRINT_PATH_ONLY=1
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ ;;
+ *)
+ echo "Unknown argument: $arg" >&2
+ usage >&2
+ exit 1
+ ;;
+ esac
+done
+
+setup_proxy_env() {
+ if [[ -n "${BAZEL_PROXY_URL:-}" ]]; then
+ export http_proxy="${http_proxy:-$BAZEL_PROXY_URL}"
+ export https_proxy="${https_proxy:-$BAZEL_PROXY_URL}"
+ export HTTP_PROXY="${HTTP_PROXY:-$http_proxy}"
+ export HTTPS_PROXY="${HTTPS_PROXY:-$https_proxy}"
+ fi
+}
+
+normalize_version_for_protoc_release() {
+ local v="$1"
+ # Protobuf release packaging switched from v3.x.y to vX.Y for newer versions.
+ # For our current agreed baseline (3.25.5), the protoc archive/tag is 25.5.
+ if [[ "$v" =~ ^3\.([0-9]+\.[0-9]+)$ ]]; then
+ local stripped="${BASH_REMATCH[1]}"
+ local major="${stripped%%.*}"
+ if [[ "$major" -ge 21 ]]; then
+ echo "$stripped"
+ return 0
+ fi
+ fi
+ echo "$v"
+}
+
+version_matches_baseline() {
+ local actual="$1"
+ local baseline="$2"
+ local actual_norm baseline_norm
+ actual_norm="$(normalize_version_for_protoc_release "$actual")"
+ baseline_norm="$(normalize_version_for_protoc_release "$baseline")"
+ [[ "$actual" == "$baseline" || "$actual_norm" == "$baseline_norm" ]]
+}
+
+lookup_protoc_archive_sha256() {
+ local release_version="$1"
+ local os="$2"
+ local arch="$3"
+ case "${release_version}:${os}:${arch}" in
+ 25.5:linux:aarch_64)
+ echo "dc715bb5aab2ebf9653d7d3efbe55e01a035e45c26f391ff6d9b7923e22914b7"
+ ;;
+ 25.5:linux:x86_64)
+ echo "e1ed237a17b2e851cf9662cb5ad02b46e70ff8e060e05984725bc4b4228c6b28"
+ ;;
+ 25.5:osx:aarch_64)
+ echo "781a6fc4c265034872cadc65e63dd3c0fc49245b70917821b60e2d457a6876ab"
+ ;;
+ 25.5:osx:x86_64)
+ echo "c5447e4f0d5caffb18d9ff21eae7bc7faf2bb2000083d6f49e5b6000b30fceae"
+ ;;
+ *)
+ return 1
+ ;;
+ esac
+}
+
+verify_download_sha256() {
+ local file="$1"
+ local expected="$2"
+ local actual=""
+ if command -v sha256sum >/dev/null 2>&1; then
+ actual="$(sha256sum "$file" | awk '{print $1}')"
+ elif command -v shasum >/dev/null 2>&1; then
+ actual="$(shasum -a 256 "$file" | awk '{print $1}')"
+ else
+ echo "ERROR: neither sha256sum nor shasum is available for checksum
verification." >&2
+ return 1
+ fi
+ if [[ "$actual" != "$expected" ]]; then
+ echo "ERROR: protoc archive checksum mismatch." >&2
+ echo " expected: $expected" >&2
+ echo " actual: $actual" >&2
+ return 1
+ fi
+}
+
+download_file() {
+ local url="$1"
+ local out="$2"
+
+ if command -v curl >/dev/null 2>&1; then
+ local curl_args=(-fL)
+ if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then
+ curl_args+=(-k)
+ fi
+ curl "${curl_args[@]}" "$url" -o "$out"
+ return 0
+ fi
+
+ if command -v wget >/dev/null 2>&1; then
+ local wget_args=()
+ if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then
+ wget_args+=(-e use_proxy=yes)
+ if [[ -n "${https_proxy:-}" ]]; then
+ wget_args+=(-e "https_proxy=${https_proxy}")
+ fi
+ if [[ -n "${http_proxy:-}" ]]; then
+ wget_args+=(-e "http_proxy=${http_proxy}")
+ fi
+ fi
+ if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then
+ wget_args+=(--no-check-certificate)
+ fi
+ wget "${wget_args[@]}" -O "$out" "$url"
+ return 0
+ fi
+
+ echo "ERROR: neither curl nor wget is available for downloading protoc." >&2
+ return 1
+}
+
+ensure_zip_tools() {
+ command -v unzip >/dev/null 2>&1 || {
+ echo "ERROR: unzip not found." >&2
+ exit 1
+ }
+}
+
+setup_proxy_env
+ensure_zip_tools
+
+if command -v protoc >/dev/null 2>&1; then
+ existing_out="$(protoc --version 2>/dev/null || true)"
+ if [[ "$existing_out" =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then
+ existing_ver="${BASH_REMATCH[1]}"
+ if version_matches_baseline "$existing_ver" "$PROTOBUF_BASELINE_VERSION";
then
+ command -v protoc
+ exit 0
+ fi
+ fi
+fi
+
+PROTOC_RELEASE_VERSION="$(normalize_version_for_protoc_release
"$PROTOBUF_BASELINE_VERSION")"
+PROTOC_ARCHIVE="protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}.zip"
+PROTOC_URL="https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_RELEASE_VERSION}/${PROTOC_ARCHIVE}"
+PROTOC_PREFIX="${PROTOC_INSTALL_ROOT}/protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}"
+PROTOC_BIN="${PROTOC_PREFIX}/bin/protoc"
+
+if [[ "${PROTOC_FORCE_INSTALL}" != "1" && -x "${PROTOC_BIN}" ]]; then
+ if [[ "${PROTOC_PRINT_PATH_ONLY}" == "1" ]]; then
+ echo "${PROTOC_BIN}"
+ else
+ echo "${PROTOC_BIN}"
+ fi
+ exit 0
+fi
+
+mkdir -p "${PROTOC_INSTALL_ROOT}"
+tmpdir="$(mktemp -d "${PROTOC_INSTALL_ROOT}/.protoc-download.XXXXXX")"
+trap 'rm -rf "${tmpdir}"' EXIT
+
+archive_path="${tmpdir}/${PROTOC_ARCHIVE}"
+download_file "${PROTOC_URL}" "${archive_path}"
+if [[ "${PROTOC_SKIP_CHECKSUM_VERIFY}" != "1" ]]; then
+ if expected_sha256="$(lookup_protoc_archive_sha256
"${PROTOC_RELEASE_VERSION}" "${PROTOC_OS}" "${PROTOC_ARCH}")"; then
+ verify_download_sha256 "${archive_path}" "${expected_sha256}"
+ else
+ echo "ERROR: no pinned checksum for protoc archive ${PROTOC_ARCHIVE}. Set
PROTOC_SKIP_CHECKSUM_VERIFY=1 to bypass." >&2
+ exit 1
+ fi
+fi
+
+extract_dir="${tmpdir}/extract"
+mkdir -p "${extract_dir}"
+unzip -q "${archive_path}" -d "${extract_dir}"
+
+rm -rf "${PROTOC_PREFIX}"
+mkdir -p "${PROTOC_PREFIX}"
+cp -a "${extract_dir}/." "${PROTOC_PREFIX}/"
+chmod +x "${PROTOC_BIN}"
+
+echo "${PROTOC_BIN}"
diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md
new file mode 100644
index 0000000..61d861e
--- /dev/null
+++ b/docs/cpp-bazel-usage.md
@@ -0,0 +1,291 @@
+# Fluss C++ Bazel Usage Guide (System / Build Modes)
+
+This guide is for:
+
+- C++ application teams consuming Fluss C++ bindings via Bazel
+- Maintainers evolving the Bazel integration
+
+For the CMake flow with the same `system` / `build` dependency modes, see
+`docs/cpp-cmake-usage.md`.
+
+Current simplification scope:
+
+- Keep only two dependency modes in the mainline guidance:
+ - `system`
+ - `build`
+- Defer strict internal-registry-only module flow from the mainline path
+
+## Scope
+
+- Dependency model: **root module mode**
+- Consumer dependency target: `@fluss-cpp//bindings/cpp:fluss_cpp`
+- Root `MODULE.bazel` is required for root module mode.
+- Build systems covered by this document: **Bazel**
+- Dependency modes covered by this document: **system/build**
+
+Version baseline references currently used by examples:
+
+- `protobuf/protoc`: `3.25.5`
+- `arrow-cpp`: `19.0.1`
+
+## Common Consumer `BUILD.bazel`
+
+Both modes use the same dependency target:
+
+```starlark
+load("@rules_cc//cc:defs.bzl", "cc_binary")
+
+cc_binary(
+ name = "fluss_reader",
+ srcs = ["reader.cc"],
+ deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"],
+)
+```
+
+## Mode 1: `system` (Recommended in preinstalled environments)
+
+Use this mode when your environment already provides:
+
+- `protoc`
+- Arrow C++ (headers + shared libraries)
+
+### Consumer `MODULE.bazel` (pattern)
+
+```starlark
+module(name = "my_cpp_app")
+
+bazel_dep(name = "rules_cc", version = "0.2.14")
+bazel_dep(name = "fluss-cpp", version = "<released-version>")
+
+fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl",
"cpp_sdk")
+fluss_cpp.config(
+ mode = "system",
+ protobuf_version = "3.25.5",
+ arrow_cpp_version = "19.0.1",
+ # Adjust Arrow paths for your environment
+ system_arrow_prefix = "/usr",
+ system_arrow_include_dir = "include",
+ system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so",
+ system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*",
+)
+use_repo(fluss_cpp, "apache_arrow_cpp")
+```
+
+### Build and run (consumer workspace pattern)
+
+Run from your consumer workspace root (the directory containing
+`MODULE.bazel` and your top-level `BUILD.bazel`).
+
+```bash
+PROTOC_BIN="$(command -v protoc)"
+CARGO_BIN="$(command -v cargo)"
+bazel run \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ //:fluss_reader
+```
+
+### Runnable example
+
+- `bindings/cpp/examples/bazel-consumer/system`
+
+```bash
+cd bindings/cpp/examples/bazel-consumer/system
+PROTOC_BIN="$(command -v protoc)"
+CARGO_BIN="$(command -v cargo)"
+bazel run \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ //:consumer_system
+```
+
+## Mode 2: `build` (No internal registry / no preinstalled Arrow)
+
+Use this mode when Arrow C++ is not preinstalled and you want Bazel to
+provision it from source.
+
+### Consumer `MODULE.bazel` (pattern)
+
+```starlark
+module(name = "my_cpp_app")
+
+bazel_dep(name = "rules_cc", version = "0.2.14")
+bazel_dep(name = "fluss-cpp", version = "<released-version>")
+
+fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl",
"cpp_sdk")
+fluss_cpp.config(
+ mode = "build",
+ protobuf_version = "3.25.5",
+ arrow_cpp_version = "19.0.1",
+)
+use_repo(fluss_cpp, "apache_arrow_cpp")
+```
+
+Notes:
+
+- `build` mode in the core Bazel integration still uses `PROTOC` (env / PATH).
+- To auto-download a pinned `protoc` for `build` mode, use
+ `bindings/cpp/scripts/ensure_protoc.sh` and pass the result via
`--action_env=PROTOC=...`.
+- `ensure_protoc.sh` auto-detects host OS/arch (`linux`/`osx`,
`x86_64`/`aarch_64`).
+- Some environments may require `ep_cmake_ar/ranlib/nm` overrides.
+
+### Build and run (consumer workspace pattern, with auto-downloaded `protoc`)
+
+Run from the `fluss-rust` repository root, or adjust the script path if you
+copied it elsewhere.
+
+```bash
+PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)"
+```
+
+```bash
+bazel run --action_env=PROTOC="$PROTOC_BIN" //:fluss_reader
+```
+
+If `cargo` is not on Bazel action `PATH`, also pass:
+
+```bash
+CARGO_BIN="$(command -v cargo)"
+bazel run \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ //:fluss_reader
+```
+
+### Runnable example
+
+- `bindings/cpp/examples/bazel-consumer/build`
+
+```bash
+cd bindings/cpp/examples/bazel-consumer/build
+PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)"
+CARGO_BIN="$(command -v cargo)"
+bazel run \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ //:consumer_build
+```
+
+## Local Development Override (Optional)
+
+For repository-local validation only:
+
+```starlark
+local_path_override(
+ module_name = "fluss-cpp",
+ path = "/path/to/fluss-rust",
+)
+```
+
+Do not keep local overrides in long-lived branches.
+
+Repository-local examples in this repo use `version = "0.1.0"` together with
+`local_path_override(...)` for local validation before publishing to the Bazel
+registry.
+
+## Repository-local Validation (Direct Commands)
+
+These commands validate the repository examples directly.
+If your environment requires a proxy for Bazel external downloads, export it
+before running (replace the placeholder URL with your actual proxy):
+
+```bash
+export BAZEL_PROXY_URL="http://proxy.example.com:3128"
+export http_proxy="$BAZEL_PROXY_URL"
+export https_proxy="$BAZEL_PROXY_URL"
+export HTTP_PROXY="$http_proxy"
+export HTTPS_PROXY="$https_proxy"
+unset all_proxy ALL_PROXY
+```
+
+### Validate `build` example
+
+```bash
+cd bindings/cpp/examples/bazel-consumer/build
+PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)"
+CARGO_BIN="$(command -v cargo)"
+bazel --ignore_all_rc_files run \
+ --registry=https://bcr.bazel.build \
+ --lockfile_mode=off \
+ --repo_env=http_proxy="${http_proxy:-}" \
+ --repo_env=https_proxy="${https_proxy:-}" \
+ --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \
+ --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \
+ --action_env=http_proxy="${http_proxy:-}" \
+ --action_env=https_proxy="${https_proxy:-}" \
+ --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \
+ --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \
+ --action_env=all_proxy= \
+ --action_env=ALL_PROXY= \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ --strategy=CcCmakeMakeRule=local \
+ --strategy=BootstrapGNUMake=local \
+ --strategy=BootstrapPkgConfig=local \
+ //:consumer_build
+```
+
+### Validate `system` example (using a local Arrow prefix)
+
+The `system` example defaults to `/usr`. If your Arrow prefix is elsewhere
+(for example a locally built prefix), copy the example to a temp directory and
+patch `MODULE.bazel` before running:
+
+```bash
+tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)"
+FLUSS_RUST_ROOT="$(pwd)"
+cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/"
+sed -i \
+ -e "s|path = \"../../../../../\"|path = \"$FLUSS_RUST_ROOT\"|" \
+ -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix =
"/tmp/fluss-system-arrow-19.0.1"|' \
+ -e 's|system_arrow_shared_library =
"lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library =
"lib/libarrow.so"|' \
+ -e 's|system_arrow_runtime_glob =
"lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob =
"lib/libarrow.so*"|' \
+ "$tmp_dir/MODULE.bazel"
+cd "$tmp_dir"
+PROTOC_BIN="$(command -v protoc)"
+CARGO_BIN="$(command -v cargo)"
+bazel --ignore_all_rc_files run \
+ --registry=https://bcr.bazel.build \
+ --lockfile_mode=off \
+ --repo_env=http_proxy="${http_proxy:-}" \
+ --repo_env=https_proxy="${https_proxy:-}" \
+ --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \
+ --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \
+ --action_env=http_proxy="${http_proxy:-}" \
+ --action_env=https_proxy="${https_proxy:-}" \
+ --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \
+ --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \
+ --action_env=all_proxy= \
+ --action_env=ALL_PROXY= \
+ --action_env=PROTOC="$PROTOC_BIN" \
+ --action_env=CARGO="$CARGO_BIN" \
+ --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \
+ //:consumer_system
+```
+
+On macOS (BSD `sed`), replace `sed -i` with `sed -i ''` in the patch step
above.
+
+## Upgrade Procedure
+
+1. Update `bazel_dep(name = "fluss-cpp", version = "...")`
+2. Update mode version settings if needed (`protobuf_version`,
`arrow_cpp_version`)
+3. Run `bazel mod tidy`
+4. Commit `MODULE.bazel` and `MODULE.bazel.lock`
+5. Run build + tests
+6. Verify dependency graph:
+
+```bash
+bazel mod graph | rg "fluss-cpp@"
+```
+
+## Examples and Non-Mainline References
+
+Mainline examples:
+
+- `bindings/cpp/examples/bazel-consumer/build`
+- `bindings/cpp/examples/bazel-consumer/system`
diff --git a/docs/cpp-cmake-usage.md b/docs/cpp-cmake-usage.md
new file mode 100644
index 0000000..3002d1c
--- /dev/null
+++ b/docs/cpp-cmake-usage.md
@@ -0,0 +1,129 @@
+# Fluss C++ CMake Usage Guide (System / Build Modes)
+
+## Audience
+
+- C++ application teams building `bindings/cpp` with CMake
+- Maintainers evolving Fluss C++ dependency provisioning
+
+## Scope
+
+- Build system covered by this document: **CMake**
+- Dependency modes covered by this document: **system/build**
+
+Current tested baselines:
+
+- `protoc`: `3.25.5`
+- `arrow-cpp`: `19.0.1`
+
+Notes:
+
+- CMake currently warns (does not fail) when local `protoc`/Arrow versions
differ from the baselines.
+- `protoc` is required because Rust `prost-build` runs during the C++ build.
+
+## Common Prerequisites
+
+- Rust toolchain (`cargo` in `PATH`, or set `CARGO=/path/to/cargo`)
+- `protoc` in `PATH` (required for `system` mode; `build` mode can
auto-download via `bindings/cpp/scripts/ensure_protoc.sh`)
+- C++17 compiler
+- CMake 3.22+
+
+Examples below use `bindings/cpp` as the source directory.
+
+## Mode 1: `system`
+
+Use this mode when the environment already provides Arrow C++.
+
+### Configure
+
+```bash
+cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \
+ -DFLUSS_CPP_DEP_MODE=system \
+ -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/path/to/arrow/prefix
+```
+
+Typical prefixes:
+
+- Ubuntu package install: `/usr`
+- Custom install prefix: `/usr/local` or `/opt/arrow`
+
+### Build
+
+```bash
+cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j
+```
+
+## Mode 2: `build`
+
+Use this mode when Arrow C++ is not preinstalled and CMake should fetch/build
it.
+
+### Configure (with auto-downloaded `protoc`)
+
+```bash
+PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)"
+export PATH="$(dirname "$PROTOC_BIN"):$PATH"
+```
+
+Then configure:
+
+```bash
+cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \
+ -DFLUSS_CPP_DEP_MODE=build
+```
+
+Optional overrides:
+
+- `-DFLUSS_CPP_ARROW_VERSION=19.0.1`
+- `-DFLUSS_CPP_ARROW_SOURCE_URL=...` (internal mirror or pinned archive)
+- `-DFLUSS_CPP_PROTOBUF_VERSION=3.25.5` (baseline warning only)
+
+If your environment needs a proxy for CMake/FetchContent downloads, export
standard proxy vars before configure/build:
+
+```bash
+export http_proxy=http://host:port
+export https_proxy=http://host:port
+export HTTP_PROXY="$http_proxy"
+export HTTPS_PROXY="$https_proxy"
+```
+
+### Build
+
+```bash
+cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j
+```
+
+This mode is slower on first build because it compiles Arrow C++ from source.
+
+## Repository-local Validation (Direct Commands)
+
+### Validate `system` mode
+
+```bash
+PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)"
+export PATH="$(dirname "$PROTOC_BIN"):$PATH"
+cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \
+ -DFLUSS_CPP_DEP_MODE=system \
+ -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/tmp/fluss-system-arrow-19.0.1
+cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j
+```
+
+### Validate `build` mode
+
+```bash
+PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)"
+export PATH="$(dirname "$PROTOC_BIN"):$PATH"
+cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \
+ -DFLUSS_CPP_DEP_MODE=build
+cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j
+```
+
+## Troubleshooting
+
+- `cargo not found`
+ - Install Rust toolchain or set `CARGO=/path/to/cargo`.
+- `protoc not found`
+ - Install `protoc` and ensure it is in `PATH`.
+ - For `build` mode, use `bindings/cpp/scripts/ensure_protoc.sh` and prepend
the returned path to `PATH`.
+- `arrow/c/bridge.h` not found (build mode)
+ - Reconfigure after updating to the latest `bindings/cpp/CMakeLists.txt`;
build mode now adds Arrow source/build include dirs explicitly.
+- Long first build in `build` mode
+ - Expected. Arrow C++ source build dominates wall time.