This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c8fe26898c GH-46272: [C++] Build Arrow libraries with 
`-Wmissing-definitions` on gcc (#47042)
c8fe26898c is described below

commit c8fe26898ce49c58514f511be58afddce176826b
Author: Antoine Pitrou <anto...@python.org>
AuthorDate: Thu Jul 17 12:05:34 2025 +0200

    GH-46272: [C++] Build Arrow libraries with `-Wmissing-definitions` on gcc 
(#47042)
    
    ### Rationale for this change
    
    The warning option `-Wmissing-declarations` allows finding private 
functions that erroneously have global linkage (because they are neither static 
nor in the anonymous namespace).
    
    We only care about this for the public Arrow libraries, not for tests or 
utilities where it's harmless to have private functions that nevertheless have 
global linkage (and changing that would require a lot of pointless code churn).
    
    ### Are these changes tested?
    
    Yes, on builds using gcc.
    
    ### Are there any user-facing changes?
    
    No, because this is only enabled if the warning level is "CHECKIN". Release 
builds will by default use the "PRODUCTION" warning level.
    * GitHub Issue: #46272
    
    Authored-by: Antoine Pitrou <anto...@python.org>
    Signed-off-by: Antoine Pitrou <anto...@python.org>
---
 cpp/cmake_modules/BuildUtils.cmake                 |   3 +
 cpp/cmake_modules/SetupCxxFlags.cmake              |   6 +
 cpp/src/arrow/acero/aggregate_internal.cc          |   1 +
 cpp/src/arrow/acero/asof_join_node.cc              |   1 +
 cpp/src/arrow/acero/exec_plan.cc                   |  18 +--
 .../acero/exec_plan_internal.h}                    |  29 ++--
 cpp/src/arrow/acero/fetch_node.cc                  |   1 +
 cpp/src/arrow/acero/filter_node.cc                 |   1 +
 cpp/src/arrow/acero/hash_join_node.cc              |  33 +++--
 cpp/src/arrow/acero/order_by_node.cc               |   1 +
 cpp/src/arrow/acero/pivot_longer_node.cc           |   1 +
 cpp/src/arrow/acero/project_node.cc                |   1 +
 cpp/src/arrow/acero/sink_node.cc                   |   1 +
 cpp/src/arrow/acero/sorted_merge_node.cc           |   2 +
 cpp/src/arrow/acero/source_node.cc                 |   1 +
 cpp/src/arrow/acero/union_node.cc                  |   1 +
 cpp/src/arrow/array/data.cc                        |   4 +
 cpp/src/arrow/array/diff.cc                        | 134 +++++++++---------
 cpp/src/arrow/buffer.cc                            |   4 -
 cpp/src/arrow/c/bridge.cc                          |   4 +
 cpp/src/arrow/compute/api_aggregate.cc             |   1 +
 cpp/src/arrow/compute/api_scalar.cc                |   1 +
 cpp/src/arrow/compute/api_vector.cc                |   1 +
 cpp/src/arrow/compute/cast.cc                      |   1 +
 cpp/src/arrow/compute/kernels/aggregate_basic.cc   |   9 +-
 cpp/src/arrow/compute/kernels/aggregate_mode.cc    |   1 +
 cpp/src/arrow/compute/kernels/aggregate_pivot.cc   |   1 +
 .../arrow/compute/kernels/aggregate_quantile.cc    |   1 +
 cpp/src/arrow/compute/kernels/aggregate_tdigest.cc |   1 +
 cpp/src/arrow/compute/kernels/aggregate_var_std.cc |   1 +
 cpp/src/arrow/compute/kernels/hash_aggregate.cc    |   1 +
 .../compute/kernels/hash_aggregate_numeric.cc      |   1 +
 .../arrow/compute/kernels/hash_aggregate_pivot.cc  |   1 +
 cpp/src/arrow/compute/kernels/scalar_arithmetic.cc |   1 +
 cpp/src/arrow/compute/kernels/scalar_boolean.cc    |   1 +
 .../compute/kernels/scalar_cast_dictionary.cc      |   4 +
 .../arrow/compute/kernels/scalar_cast_numeric.cc   |   4 +
 .../arrow/compute/kernels/scalar_cast_temporal.cc  |   8 ++
 cpp/src/arrow/compute/kernels/scalar_compare.cc    |   1 +
 cpp/src/arrow/compute/kernels/scalar_if_else.cc    |   1 +
 cpp/src/arrow/compute/kernels/scalar_nested.cc     |   1 +
 cpp/src/arrow/compute/kernels/scalar_random.cc     |   1 +
 cpp/src/arrow/compute/kernels/scalar_round.cc      |   1 +
 cpp/src/arrow/compute/kernels/scalar_set_lookup.cc |   1 +
 .../arrow/compute/kernels/scalar_string_ascii.cc   |   1 +
 .../arrow/compute/kernels/scalar_string_utf8.cc    |   1 +
 .../compute/kernels/scalar_temporal_binary.cc      |   1 +
 .../arrow/compute/kernels/scalar_temporal_unary.cc |   1 +
 cpp/src/arrow/compute/kernels/scalar_validity.cc   |   1 +
 .../arrow/compute/kernels/test_util_internal.cc    |   4 +
 cpp/src/arrow/compute/kernels/vector_array_sort.cc |   1 +
 .../arrow/compute/kernels/vector_cumulative_ops.cc |   1 +
 cpp/src/arrow/compute/kernels/vector_hash.cc       |   1 +
 cpp/src/arrow/compute/kernels/vector_nested.cc     |   1 +
 cpp/src/arrow/compute/kernels/vector_pairwise.cc   |   1 +
 cpp/src/arrow/compute/kernels/vector_rank.cc       |   1 +
 cpp/src/arrow/compute/kernels/vector_replace.cc    |   5 +-
 .../arrow/compute/kernels/vector_run_end_encode.cc |   1 +
 cpp/src/arrow/compute/kernels/vector_select_k.cc   |   1 +
 cpp/src/arrow/compute/kernels/vector_selection.cc  |   1 +
 cpp/src/arrow/compute/kernels/vector_sort.cc       |   1 +
 cpp/src/arrow/compute/kernels/vector_statistics.cc |   1 +
 cpp/src/arrow/compute/kernels/vector_swizzle.cc    |   1 +
 cpp/src/arrow/compute/util_avx2.cc                 |   1 +
 cpp/src/arrow/dataset/discovery.cc                 |  28 ++--
 cpp/src/arrow/dataset/file_base.cc                 |   4 +-
 cpp/src/arrow/dataset/file_csv.cc                  |  12 +-
 cpp/src/arrow/dataset/file_parquet.cc              |   4 +
 .../arrow/engine/substrait/expression_internal.cc  |  26 ++--
 .../arrow/engine/substrait/relation_internal.cc    |  13 +-
 cpp/src/arrow/engine/substrait/serde.cc            |   4 +
 .../arrow/engine/substrait/test_plan_builder.cc    |   6 +-
 cpp/src/arrow/extension/tensor_internal.h          |   3 +-
 cpp/src/arrow/filesystem/filesystem.cc             |   5 -
 cpp/src/arrow/filesystem/filesystem_library.h      |   4 +-
 cpp/src/arrow/filesystem/test_util.cc              |  20 +--
 cpp/src/arrow/flight/CMakeLists.txt                |  30 ++--
 cpp/src/arrow/flight/serialization_internal.cc     |   4 +
 cpp/src/arrow/flight/sql/CMakeLists.txt            |   3 +-
 cpp/src/arrow/flight/sql/client.cc                 |  38 ++---
 cpp/src/arrow/flight/sql/protocol_internal.cc      |   4 +
 cpp/src/arrow/flight/sql/server.cc                 |   2 +-
 cpp/src/arrow/flight/test_definitions.cc           |   6 +-
 cpp/src/arrow/flight/test_util.h                   |   3 +
 .../transport/grpc/serialization_internal.cc       |   8 +-
 cpp/src/arrow/flight/types.cc                      |   3 +-
 cpp/src/arrow/ipc/message.cc                       |  64 +++++----
 cpp/src/arrow/ipc/reader.cc                        | 156 +++++++++++----------
 cpp/src/arrow/ipc/test_common.cc                   |  32 +++--
 cpp/src/arrow/ipc/writer.cc                        |  13 --
 cpp/src/arrow/tensor/csx_converter.cc              |   4 +
 cpp/src/arrow/testing/random.cc                    |  56 ++++----
 cpp/src/arrow/type.cc                              |  10 +-
 cpp/src/arrow/util/bitmap_ops.cc                   |   4 +
 cpp/src/arrow/util/io_util.cc                      |   5 -
 cpp/src/arrow/util/macros.h                        |  13 ++
 cpp/src/arrow/util/memory.cc                       |   4 +
 cpp/src/gandiva/encrypt_utils.cc                   |  12 --
 cpp/src/gandiva/engine.cc                          |  52 +++----
 cpp/src/gandiva/function_registry.cc               |  30 ++--
 cpp/src/gandiva/function_signature.cc              |   4 +
 cpp/src/gandiva/gdv_function_stubs.cc              |   4 +
 cpp/src/gandiva/gdv_hash_function_stubs.cc         |   4 +
 cpp/src/gandiva/gdv_string_function_stubs.cc       |   4 +
 cpp/src/gandiva/regex_functions_holder.cc          |   4 +
 cpp/src/parquet/arrow/reader.cc                    |   9 +-
 cpp/src/parquet/arrow/schema_internal.cc           |   4 +
 cpp/src/parquet/arrow/writer.cc                    |  54 +++----
 cpp/src/parquet/column_writer.cc                   |   4 +
 .../encryption/encryption_internal_nossl.cc        |   4 +
 cpp/src/parquet/file_reader.cc                     |   4 +
 cpp/src/parquet/level_comparison.cc                |   9 +-
 cpp/src/parquet/level_comparison_avx2.cc           |   2 +
 ...on_avx2.cc => level_comparison_avx2_internal.h} |  21 ++-
 cpp/src/parquet/level_conversion.cc                |  12 +-
 cpp/src/parquet/level_conversion_bmi2.cc           |   3 +-
 ...on_bmi2.cc => level_conversion_bmi2_internal.h} |  15 +-
 cpp/src/parquet/metadata.cc                        |   6 +-
 cpp/src/parquet/printer.cc                         |  22 +--
 119 files changed, 689 insertions(+), 504 deletions(-)

diff --git a/cpp/cmake_modules/BuildUtils.cmake 
b/cpp/cmake_modules/BuildUtils.cmake
index d92d3af2e4..db760400f7 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -265,6 +265,7 @@ function(ADD_ARROW_LIB LIB_NAME)
     if(ARG_DEFINITIONS)
       target_compile_definitions(${LIB_NAME}_objlib PRIVATE ${ARG_DEFINITIONS})
     endif()
+    target_compile_options(${LIB_NAME}_objlib PRIVATE 
${ARROW_LIBRARIES_ONLY_CXX_FLAGS})
     set(LIB_DEPS $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
     set(EXTRA_DEPS)
 
@@ -326,6 +327,7 @@ function(ADD_ARROW_LIB LIB_NAME)
     if(ARG_DEFINITIONS)
       target_compile_definitions(${LIB_NAME}_shared PRIVATE ${ARG_DEFINITIONS})
     endif()
+    target_compile_options(${LIB_NAME}_shared PRIVATE 
${ARROW_LIBRARIES_ONLY_CXX_FLAGS})
 
     if(ARG_OUTPUTS)
       list(APPEND ${ARG_OUTPUTS} ${LIB_NAME}_shared)
@@ -416,6 +418,7 @@ function(ADD_ARROW_LIB LIB_NAME)
     if(ARG_DEFINITIONS)
       target_compile_definitions(${LIB_NAME}_static PRIVATE ${ARG_DEFINITIONS})
     endif()
+    target_compile_options(${LIB_NAME}_static PRIVATE 
${ARROW_LIBRARIES_ONLY_CXX_FLAGS})
 
     if(ARG_OUTPUTS)
       list(APPEND ${ARG_OUTPUTS} ${LIB_NAME}_static)
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake 
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 15ebfef55d..afc0446a78 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -159,6 +159,9 @@ set(CMAKE_POSITION_INDEPENDENT_CODE 
${ARROW_POSITION_INDEPENDENT_CODE})
 set(UNKNOWN_COMPILER_MESSAGE
     "Unknown compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
 
+# Compiler flags used when building Arrow libraries (but not tests, utilities, 
etc.)
+set(ARROW_LIBRARIES_ONLY_CXX_FLAGS)
+
 # compiler flags that are common across debug/release builds
 if(WIN32)
   # TODO(wesm): Change usages of C runtime functions that MSVC says are
@@ -322,6 +325,9 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wimplicit-fallthrough")
     string(APPEND CXX_ONLY_FLAGS " -Wredundant-move")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result")
+    # Flag non-static functions that don't have corresponding declaration in a 
.h file.
+    # Only for Arrow libraries, since this is not a problem in tests or 
utilities.
+    list(APPEND ARROW_LIBRARIES_ONLY_CXX_FLAGS "-Wmissing-declarations")
   elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID 
STREQUAL
                                                    "IntelLLVM")
     if(WIN32)
diff --git a/cpp/src/arrow/acero/aggregate_internal.cc 
b/cpp/src/arrow/acero/aggregate_internal.cc
index 87424ae1bb..ac47921bf4 100644
--- a/cpp/src/arrow/acero/aggregate_internal.cc
+++ b/cpp/src/arrow/acero/aggregate_internal.cc
@@ -23,6 +23,7 @@
 #include "arrow/acero/aggregate_internal.h"
 #include "arrow/acero/aggregate_node.h"
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function.h"
diff --git a/cpp/src/arrow/acero/asof_join_node.cc 
b/cpp/src/arrow/acero/asof_join_node.cc
index 1be4110751..55fa45543e 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -32,6 +32,7 @@
 #include <unordered_set>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/unmaterialized_table_internal.h"
 #ifndef NDEBUG
diff --git a/cpp/src/arrow/acero/exec_plan.cc b/cpp/src/arrow/acero/exec_plan.cc
index e27ae7b65a..ff5e5d8bdd 100644
--- a/cpp/src/arrow/acero/exec_plan.cc
+++ b/cpp/src/arrow/acero/exec_plan.cc
@@ -23,6 +23,7 @@
 #include <unordered_map>
 #include <unordered_set>
 
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/task_util.h"
@@ -1101,23 +1102,6 @@ Result<std::unique_ptr<RecordBatchReader>> 
DeclarationToReader(
   return DeclarationToReader(std::move(declaration), std::move(options));
 }
 
-namespace internal {
-
-void RegisterSourceNode(ExecFactoryRegistry*);
-void RegisterFetchNode(ExecFactoryRegistry*);
-void RegisterFilterNode(ExecFactoryRegistry*);
-void RegisterOrderByNode(ExecFactoryRegistry*);
-void RegisterPivotLongerNode(ExecFactoryRegistry*);
-void RegisterProjectNode(ExecFactoryRegistry*);
-void RegisterUnionNode(ExecFactoryRegistry*);
-void RegisterAggregateNode(ExecFactoryRegistry*);
-void RegisterSinkNode(ExecFactoryRegistry*);
-void RegisterHashJoinNode(ExecFactoryRegistry*);
-void RegisterAsofJoinNode(ExecFactoryRegistry*);
-void RegisterSortedMergeNode(ExecFactoryRegistry*);
-
-}  // namespace internal
-
 ExecFactoryRegistry* default_exec_factory_registry() {
   class DefaultRegistry : public ExecFactoryRegistry {
    public:
diff --git a/cpp/src/parquet/level_comparison_avx2.cc 
b/cpp/src/arrow/acero/exec_plan_internal.h
similarity index 53%
copy from cpp/src/parquet/level_comparison_avx2.cc
copy to cpp/src/arrow/acero/exec_plan_internal.h
index b33eb2e295..e9fe87b69e 100644
--- a/cpp/src/parquet/level_comparison_avx2.cc
+++ b/cpp/src/arrow/acero/exec_plan_internal.h
@@ -15,20 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#define PARQUET_IMPL_NAMESPACE avx2
-#include "parquet/level_comparison_inc.h"
-#undef PARQUET_IMPL_NAMESPACE
+#pragma once
 
-namespace parquet {
-namespace internal {
+#include "arrow/acero/exec_plan.h"
 
-uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, 
int16_t rhs) {
-  return avx2::GreaterThanBitmapImpl(levels, num_levels, rhs);
-}
+namespace arrow::acero::internal {
 
-MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels) {
-  return avx2::FindMinMaxImpl(levels, num_levels);
-}
+void RegisterSourceNode(ExecFactoryRegistry*);
+void RegisterFetchNode(ExecFactoryRegistry*);
+void RegisterFilterNode(ExecFactoryRegistry*);
+void RegisterOrderByNode(ExecFactoryRegistry*);
+void RegisterPivotLongerNode(ExecFactoryRegistry*);
+void RegisterProjectNode(ExecFactoryRegistry*);
+void RegisterUnionNode(ExecFactoryRegistry*);
+void RegisterAggregateNode(ExecFactoryRegistry*);
+void RegisterSinkNode(ExecFactoryRegistry*);
+void RegisterHashJoinNode(ExecFactoryRegistry*);
+void RegisterAsofJoinNode(ExecFactoryRegistry*);
+void RegisterSortedMergeNode(ExecFactoryRegistry*);
 
-}  // namespace internal
-}  // namespace parquet
+}  // namespace arrow::acero::internal
diff --git a/cpp/src/arrow/acero/fetch_node.cc 
b/cpp/src/arrow/acero/fetch_node.cc
index 2b168b1e53..bf352698a9 100644
--- a/cpp/src/arrow/acero/fetch_node.cc
+++ b/cpp/src/arrow/acero/fetch_node.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/acero/accumulation_queue.h"
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/filter_node.cc 
b/cpp/src/arrow/acero/filter_node.cc
index b0d500abac..67de82497e 100644
--- a/cpp/src/arrow/acero/filter_node.cc
+++ b/cpp/src/arrow/acero/filter_node.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.cc 
b/cpp/src/arrow/acero/hash_join_node.cc
index 89a94d4a16..28e3eb0e04 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -21,6 +21,7 @@
 #include <utility>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/hash_join.h"
 #include "arrow/acero/hash_join_dict.h"
 #include "arrow/acero/hash_join_node.h"
@@ -45,6 +46,24 @@ using compute::KeyColumnArray;
 
 namespace acero {
 
+namespace {
+
+Status ValidateHashJoinNodeOptions(const HashJoinNodeOptions& join_options) {
+  if (join_options.key_cmp.empty() || join_options.left_keys.empty() ||
+      join_options.right_keys.empty()) {
+    return Status::Invalid("key_cmp and keys cannot be empty");
+  }
+
+  if ((join_options.key_cmp.size() != join_options.left_keys.size()) ||
+      (join_options.key_cmp.size() != join_options.right_keys.size())) {
+    return Status::Invalid("key_cmp and keys must have the same size");
+  }
+
+  return Status::OK();
+}
+
+}  // namespace
+
 // Check if a type is supported in a join (as either a key or non-key column)
 bool HashJoinSchema::IsTypeSupported(const DataType& type) {
   const Type::type id = type.id();
@@ -468,20 +487,6 @@ Status 
HashJoinSchema::CollectFilterColumns(std::vector<FieldRef>& left_filter,
   return Status::OK();
 }
 
-Status ValidateHashJoinNodeOptions(const HashJoinNodeOptions& join_options) {
-  if (join_options.key_cmp.empty() || join_options.left_keys.empty() ||
-      join_options.right_keys.empty()) {
-    return Status::Invalid("key_cmp and keys cannot be empty");
-  }
-
-  if ((join_options.key_cmp.size() != join_options.left_keys.size()) ||
-      (join_options.key_cmp.size() != join_options.right_keys.size())) {
-    return Status::Invalid("key_cmp and keys must have the same size");
-  }
-
-  return Status::OK();
-}
-
 class HashJoinNode;
 
 // This is a struct encapsulating things related to Bloom filters and pushing 
them around
diff --git a/cpp/src/arrow/acero/order_by_node.cc 
b/cpp/src/arrow/acero/order_by_node.cc
index 65aa83247f..213730e6f9 100644
--- a/cpp/src/arrow/acero/order_by_node.cc
+++ b/cpp/src/arrow/acero/order_by_node.cc
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/util.h"
diff --git a/cpp/src/arrow/acero/pivot_longer_node.cc 
b/cpp/src/arrow/acero/pivot_longer_node.cc
index f261a9c402..c8f2a5c7b0 100644
--- a/cpp/src/arrow/acero/pivot_longer_node.cc
+++ b/cpp/src/arrow/acero/pivot_longer_node.cc
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/util.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/acero/project_node.cc 
b/cpp/src/arrow/acero/project_node.cc
index 98b11cff00..188a270883 100644
--- a/cpp/src/arrow/acero/project_node.cc
+++ b/cpp/src/arrow/acero/project_node.cc
@@ -18,6 +18,7 @@
 #include <sstream>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc
index ab06dd8ffd..0efb365a51 100644
--- a/cpp/src/arrow/acero/sink_node.cc
+++ b/cpp/src/arrow/acero/sink_node.cc
@@ -23,6 +23,7 @@
 
 #include "arrow/acero/accumulation_queue.h"
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/order_by_impl.h"
 #include "arrow/acero/query_context.h"
diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc 
b/cpp/src/arrow/acero/sorted_merge_node.cc
index 374e672a84..37997232cd 100644
--- a/cpp/src/arrow/acero/sorted_merge_node.cc
+++ b/cpp/src/arrow/acero/sorted_merge_node.cc
@@ -23,8 +23,10 @@
 #include <tuple>
 #include <unordered_map>
 #include <vector>
+
 #include "arrow/acero/concurrent_queue_internal.h"
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/time_series_util.h"
diff --git a/cpp/src/arrow/acero/source_node.cc 
b/cpp/src/arrow/acero/source_node.cc
index 0f58406760..888f6e23c1 100644
--- a/cpp/src/arrow/acero/source_node.cc
+++ b/cpp/src/arrow/acero/source_node.cc
@@ -20,6 +20,7 @@
 #include <optional>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/util.h"
diff --git a/cpp/src/arrow/acero/union_node.cc 
b/cpp/src/arrow/acero/union_node.cc
index 9b1211e9d1..47699b4874 100644
--- a/cpp/src/arrow/acero/union_node.cc
+++ b/cpp/src/arrow/acero/union_node.cc
@@ -18,6 +18,7 @@
 #include <mutex>
 
 #include "arrow/acero/exec_plan.h"
+#include "arrow/acero/exec_plan_internal.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/util.h"
 #include "arrow/compute/api.h"
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index b2fe52f9bb..1c56a48506 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -100,11 +100,15 @@ bool DictionaryMayHaveLogicalNulls(const ArrayData& data) 
{
   return ArraySpan(data).MayHaveLogicalNulls();
 }
 
+namespace {
+
 BufferSpan PackVariadicBuffers(util::span<const std::shared_ptr<Buffer>> 
buffers) {
   return {const_cast<uint8_t*>(reinterpret_cast<const 
uint8_t*>(buffers.data())),
           static_cast<int64_t>(buffers.size() * 
sizeof(std::shared_ptr<Buffer>))};
 }
 
+}  // namespace
+
 }  // namespace internal
 
 std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, 
int64_t length,
diff --git a/cpp/src/arrow/array/diff.cc b/cpp/src/arrow/array/diff.cc
index cf53c32155..fd907e3c7b 100644
--- a/cpp/src/arrow/array/diff.cc
+++ b/cpp/src/arrow/array/diff.cc
@@ -58,6 +58,8 @@ using internal::checked_cast;
 using internal::checked_pointer_cast;
 using internal::MakeLazyRange;
 
+namespace {
+
 template <typename ArrayType>
 auto GetView(const ArrayType& array, int64_t index) -> 
decltype(array.GetView(index)) {
   return array.GetView(index);
@@ -93,11 +95,11 @@ struct UnitSlice {
 
 // FIXME(bkietz) this is inefficient;
 // StructArray's fields can be diffed independently then merged
-static UnitSlice GetView(const StructArray& array, int64_t index) {
+UnitSlice GetView(const StructArray& array, int64_t index) {
   return UnitSlice{&array, index};
 }
 
-static UnitSlice GetView(const UnionArray& array, int64_t index) {
+UnitSlice GetView(const UnionArray& array, int64_t index) {
   return UnitSlice{&array, index};
 }
 
@@ -583,28 +585,6 @@ Result<std::shared_ptr<StructArray>> NullDiff(const Array& 
base, const Array& ta
                            {field("insert", boolean()), field("run_length", 
int64())});
 }
 
-Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& 
target,
-                                          MemoryPool* pool) {
-  if (!base.type()->Equals(target.type())) {
-    return Status::TypeError("only taking the diff of like-typed arrays is 
supported.");
-  }
-
-  if (base.type()->id() == Type::NA) {
-    return NullDiff(base, target, pool);
-  } else if (base.type()->id() == Type::EXTENSION) {
-    auto base_storage = checked_cast<const ExtensionArray&>(base).storage();
-    auto target_storage = checked_cast<const 
ExtensionArray&>(target).storage();
-    return Diff(*base_storage, *target_storage, pool);
-  } else if (base.type()->id() == Type::DICTIONARY) {
-    return Status::NotImplemented("diffing arrays of type ", *base.type());
-  } else if (base.type()->id() == Type::LIST_VIEW ||
-             base.type()->id() == Type::LARGE_LIST_VIEW) {
-    return Status::NotImplemented("diffing arrays of type ", *base.type());
-  } else {
-    return QuadraticSpaceMyersDiff(base, target, pool).Diff();
-  }
-}
-
 using Formatter = std::function<void(const Array&, int64_t index, 
std::ostream*)>;
 
 static Result<Formatter> MakeFormatter(const DataType& type);
@@ -616,10 +596,6 @@ class MakeFormatterImpl {
     return std::move(impl_);
   }
 
- private:
-  template <typename VISITOR, typename... ARGS>
-  friend Status VisitTypeInline(const DataType&, VISITOR*, ARGS&&... args);
-
   // factory implementation
   Status Visit(const BooleanType&) {
     impl_ = [](const Array& array, int64_t index, std::ostream* os) {
@@ -922,48 +898,10 @@ class MakeFormatterImpl {
   Formatter impl_;
 };
 
-static Result<Formatter> MakeFormatter(const DataType& type) {
+Result<Formatter> MakeFormatter(const DataType& type) {
   return MakeFormatterImpl{}.Make(type);
 }
 
-Status VisitEditScript(
-    const Array& edits,
-    const std::function<Status(int64_t delete_begin, int64_t delete_end,
-                               int64_t insert_begin, int64_t insert_end)>& 
visitor) {
-  static const auto edits_type =
-      struct_({field("insert", boolean()), field("run_length", int64())});
-  DCHECK(edits.type()->Equals(*edits_type));
-  DCHECK_GE(edits.length(), 1);
-
-  auto insert = checked_pointer_cast<BooleanArray>(
-      checked_cast<const StructArray&>(edits).field(0));
-  auto run_lengths =
-      checked_pointer_cast<Int64Array>(checked_cast<const 
StructArray&>(edits).field(1));
-
-  DCHECK(!insert->Value(0));
-
-  auto length = run_lengths->Value(0);
-  int64_t base_begin, base_end, target_begin, target_end;
-  base_begin = base_end = target_begin = target_end = length;
-  for (int64_t i = 1; i < edits.length(); ++i) {
-    if (insert->Value(i)) {
-      ++target_end;
-    } else {
-      ++base_end;
-    }
-    length = run_lengths->Value(i);
-    if (length != 0) {
-      RETURN_NOT_OK(visitor(base_begin, base_end, target_begin, target_end));
-      base_begin = base_end = base_end + length;
-      target_begin = target_end = target_end + length;
-    }
-  }
-  if (length == 0) {
-    return visitor(base_begin, base_end, target_begin, target_end);
-  }
-  return Status::OK();
-}
-
 class UnifiedDiffFormatter {
  public:
   UnifiedDiffFormatter(std::ostream* os, Formatter formatter)
@@ -1013,6 +951,8 @@ class UnifiedDiffFormatter {
   Formatter formatter_;
 };
 
+}  // namespace
+
 Result<std::function<Status(const Array& edits, const Array& base, const 
Array& target)>>
 MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os) {
   if (type.id() == Type::NA) {
@@ -1030,4 +970,64 @@ MakeUnifiedDiffFormatter(const DataType& type, 
std::ostream* os) {
   return UnifiedDiffFormatter(os, std::move(formatter));
 }
 
+Status VisitEditScript(
+    const Array& edits,
+    const std::function<Status(int64_t delete_begin, int64_t delete_end,
+                               int64_t insert_begin, int64_t insert_end)>& 
visitor) {
+  static const auto edits_type =
+      struct_({field("insert", boolean()), field("run_length", int64())});
+  DCHECK(edits.type()->Equals(*edits_type));
+  DCHECK_GE(edits.length(), 1);
+
+  auto insert = checked_pointer_cast<BooleanArray>(
+      checked_cast<const StructArray&>(edits).field(0));
+  auto run_lengths =
+      checked_pointer_cast<Int64Array>(checked_cast<const 
StructArray&>(edits).field(1));
+
+  DCHECK(!insert->Value(0));
+
+  auto length = run_lengths->Value(0);
+  int64_t base_begin, base_end, target_begin, target_end;
+  base_begin = base_end = target_begin = target_end = length;
+  for (int64_t i = 1; i < edits.length(); ++i) {
+    if (insert->Value(i)) {
+      ++target_end;
+    } else {
+      ++base_end;
+    }
+    length = run_lengths->Value(i);
+    if (length != 0) {
+      RETURN_NOT_OK(visitor(base_begin, base_end, target_begin, target_end));
+      base_begin = base_end = base_end + length;
+      target_begin = target_end = target_end + length;
+    }
+  }
+  if (length == 0) {
+    return visitor(base_begin, base_end, target_begin, target_end);
+  }
+  return Status::OK();
+}
+
+Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& 
target,
+                                          MemoryPool* pool) {
+  if (!base.type()->Equals(target.type())) {
+    return Status::TypeError("only taking the diff of like-typed arrays is 
supported.");
+  }
+
+  if (base.type()->id() == Type::NA) {
+    return NullDiff(base, target, pool);
+  } else if (base.type()->id() == Type::EXTENSION) {
+    auto base_storage = checked_cast<const ExtensionArray&>(base).storage();
+    auto target_storage = checked_cast<const 
ExtensionArray&>(target).storage();
+    return Diff(*base_storage, *target_storage, pool);
+  } else if (base.type()->id() == Type::DICTIONARY) {
+    return Status::NotImplemented("diffing arrays of type ", *base.type());
+  } else if (base.type()->id() == Type::LIST_VIEW ||
+             base.type()->id() == Type::LARGE_LIST_VIEW) {
+    return Status::NotImplemented("diffing arrays of type ", *base.type());
+  } else {
+    return QuadraticSpaceMyersDiff(base, target, pool).Diff();
+  }
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 2254b6e067..ab20ce7fb9 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -203,10 +203,6 @@ Result<std::shared_ptr<Buffer>> 
AllocateEmptyBitmap(int64_t length, int64_t alig
   return std::shared_ptr<Buffer>(std::move(buf));
 }
 
-Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out) {
-  return AllocateEmptyBitmap(length).Value(out);
-}
-
 Result<std::shared_ptr<Buffer>> ConcatenateBuffers(
     const std::vector<std::shared_ptr<Buffer>>& buffers, MemoryPool* pool) {
   int64_t out_length = 0;
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 9c01300df4..cf1c9f580a 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -713,6 +713,8 @@ Status ExportRecordBatch(const RecordBatch& batch, struct 
ArrowArray* out,
 //////////////////////////////////////////////////////////////////////////
 // C device arrays
 
+namespace {
+
 Status ValidateDeviceInfo(const ArrayData& data,
                           std::optional<DeviceAllocationType>* device_type,
                           int64_t* device_id) {
@@ -753,6 +755,8 @@ Result<std::pair<std::optional<DeviceAllocationType>, 
int64_t>> ValidateDeviceIn
   return std::make_pair(device_type, device_id);
 }
 
+}  // namespace
+
 Status ExportDeviceArray(const Array& array, 
std::shared_ptr<Device::SyncEvent> sync,
                          struct ArrowDeviceArray* out, struct ArrowSchema* 
out_schema) {
   void* sync_event = sync ? sync->get_raw() : nullptr;
diff --git a/cpp/src/arrow/compute/api_aggregate.cc 
b/cpp/src/arrow/compute/api_aggregate.cc
index b2ed64dc59..0ed5eb88b7 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -20,6 +20,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 
diff --git a/cpp/src/arrow/compute/api_scalar.cc 
b/cpp/src/arrow/compute/api_scalar.cc
index 68c310cb5b..b43eca542f 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -26,6 +26,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/compute/api_vector.cc 
b/cpp/src/arrow/compute/api_vector.cc
index 06e6cf6c1a..538cdccaf2 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 268873788d..4a8bca3f1d 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -31,6 +31,7 @@
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/reflection_internal.h"
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc 
b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index b19a9f58e5..03fba53ac0 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -21,6 +21,7 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/hashing.h"
 
@@ -852,6 +853,8 @@ void AddBasicAggKernels(KernelInit init,
   }
 }
 
+namespace {
+
 void AddScalarAggKernels(KernelInit init,
                          const std::vector<std::shared_ptr<DataType>>& types,
                          std::shared_ptr<DataType> out_ty,
@@ -871,16 +874,12 @@ void AddArrayScalarAggKernels(KernelInit init,
   AddScalarAggKernels(init, types, out_ty, func);
 }
 
-namespace {
-
 Result<TypeHolder> MinMaxType(KernelContext*, const std::vector<TypeHolder>& 
types) {
   // T -> struct<min: T, max: T>
   auto ty = types.front().GetSharedPtr();
   return struct_({field("min", ty), field("max", ty)});
 }
 
-}  // namespace
-
 Result<TypeHolder> FirstLastType(KernelContext*, const 
std::vector<TypeHolder>& types) {
   auto ty = types.front().GetSharedPtr();
   return struct_({field("first", ty), field("last", ty)});
@@ -900,6 +899,8 @@ void AddFirstLastKernels(KernelInit init,
   }
 }
 
+}  // namespace
+
 void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func, SimdLevel::type 
simd_level) {
   auto sig = KernelSignature::Make({InputType(get_id.id)}, MinMaxType);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc 
b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index e9723cef7b..fbafa663b8 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/stl_allocator.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/compute/kernels/aggregate_pivot.cc 
b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
index f4b8f5ea0b..504c7cdd26 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
@@ -19,6 +19,7 @@
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/pivot_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/scalar.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/logging_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc 
b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 4355c32cfa..4e3894d2f2 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -22,6 +22,7 @@
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/stl_allocator.h"
 #include "arrow/util/logging_internal.h"
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc 
b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index 10fa00689d..7ebc859411 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -18,6 +18,7 @@
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/tdigest_internal.h"
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc 
b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index 021ca712c5..9a43f18864 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int128_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 0e3e359bde..19f7fc2e5b 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -32,6 +32,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/hash_aggregate_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/record_batch.h"
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
index a46f790548..acd485f530 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc
@@ -28,6 +28,7 @@
 #include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/hash_aggregate_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int128_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
index e9ae1dd25d..f60aa367ca 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
@@ -28,6 +28,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/hash_aggregate_internal.h"
 #include "arrow/compute/kernels/pivot_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/util/bit_block_counter.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc 
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 96b3864ad2..e536b3d886 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc 
b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index a5e2893d77..ce864d1340 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -18,6 +18,7 @@
 #include <array>
 
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap.h"
 #include "arrow/util/bitmap_ops.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index 9ea167f555..53eb5c9791 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -33,6 +33,8 @@ using internal::CopyBitmap;
 namespace compute {
 namespace internal {
 
+namespace {
+
 Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
   const CastOptions& options = CastState::Get(ctx);
   const auto& out_type = checked_cast<const DictionaryType&>(*out->type());
@@ -95,6 +97,8 @@ void AddDictionaryCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+}  // namespace
+
 std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
   auto cast_dict = std::make_shared<CastFunction>("cast_dictionary", 
Type::DICTIONARY);
   AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dict.get());
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 9cb2d9a1b2..ba2a8c4b56 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -41,6 +41,8 @@ using util::Float16;
 namespace compute {
 namespace internal {
 
+namespace {
+
 Status CastIntegerToInteger(KernelContext* ctx, const ExecSpan& batch, 
ExecResult* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
@@ -276,6 +278,8 @@ Status CastIntegerToFloating(KernelContext* ctx, const 
ExecSpan& batch, ExecResu
   return Status::OK();
 }
 
+}  // namespace
+
 // ----------------------------------------------------------------------
 // Boolean to number
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 4c62da5a39..d076186e56 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -36,6 +36,8 @@ using internal::ParseYYYY_MM_DD;
 namespace compute {
 namespace internal {
 
+namespace {
+
 constexpr int64_t kMillisecondsInDay = 86400000;
 
 // ----------------------------------------------------------------------
@@ -142,6 +144,8 @@ Status ExtractTemporal(KernelContext* ctx, const ExecSpan& 
batch, ExecResult* ou
   return Status::Invalid("Unknown timestamp unit: ", ty);
 }
 
+}  // namespace
+
 // <TimestampType, TimestampType> and <DurationType, DurationType>
 template <typename O, typename I>
 struct CastFunctor<
@@ -491,6 +495,8 @@ struct CastFunctor<O, I,
   }
 };
 
+namespace {
+
 template <typename Type>
 void AddCrossUnitCast(CastFunction* func) {
   ScalarKernel kernel;
@@ -652,6 +658,8 @@ std::shared_ptr<CastFunction> GetTimestampCast() {
   return func;
 }
 
+}  // namespace
+
 std::vector<std::shared_ptr<CastFunction>> GetTemporalCasts() {
   std::vector<std::shared_ptr<CastFunction>> functions;
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc 
b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index f40a6d6b28..ef5b6fb4aa 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -22,6 +22,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc 
b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index e6d723661d..753cc4de9f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/api.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/copy_data_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/util/bit_block_counter.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc 
b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 674ae6050e..1fb0df56bb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -22,6 +22,7 @@
 #include "arrow/array/builder_nested.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/bit_block_counter.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc 
b/cpp/src/arrow/compute/kernels/scalar_random.cc
index d6c96f94e9..5bcb7c36da 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/pcg_random.h"
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_round.cc 
b/cpp/src/arrow/compute/kernels/scalar_round.cc
index 00c448f470..208b9875a1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc 
b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 9a6cc98ca4..c6a3562233 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -20,6 +20,7 @@
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_writer.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc 
b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index 233facf61c..06e6f4bb50 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -24,6 +24,7 @@
 #include "arrow/array/builder_nested.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/kernels/scalar_string_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/config.h"
 #include "arrow/util/logging_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc 
b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 6f62db86f0..fd340bba62 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -20,6 +20,7 @@
 #include <string>
 
 #include "arrow/compute/kernels/scalar_string_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/config.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/utf8_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc 
b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc
index 6ef2a369d9..b14f9193eb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_binary.cc
@@ -23,6 +23,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/temporal_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/time.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc 
b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index 2864234f8a..2eee308f50 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -24,6 +24,7 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/temporal_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/time.h"
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc 
b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 4e4023a862..5913b756f1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
diff --git a/cpp/src/arrow/compute/kernels/test_util_internal.cc 
b/cpp/src/arrow/compute/kernels/test_util_internal.cc
index d48b3b0781..3b0c9bdd85 100644
--- a/cpp/src/arrow/compute/kernels/test_util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/test_util_internal.cc
@@ -172,6 +172,8 @@ void CheckScalar(std::string func_name, const DatumVector& 
inputs, Datum expecte
   }
 }
 
+namespace {
+
 Datum CheckDictionaryNonRecursive(const std::string& func_name, const 
DatumVector& args,
                                   bool result_is_encoded) {
   EXPECT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, args));
@@ -204,6 +206,8 @@ Datum CheckDictionaryNonRecursive(const std::string& 
func_name, const DatumVecto
   return actual;
 }
 
+}  // namespace
+
 void CheckDictionary(const std::string& func_name, const DatumVector& args,
                      bool result_is_encoded) {
   auto actual = CheckDictionaryNonRecursive(func_name, args, 
result_is_encoded);
diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc 
b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
index 4a3a5eef89..950de47733 100644
--- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
@@ -28,6 +28,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
 #include "arrow/util/bitmap.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc 
b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
index a9ed720e2d..3c4f833b75 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
@@ -24,6 +24,7 @@
 #include "arrow/compute/kernels/base_arithmetic_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc 
b/cpp/src/arrow/compute/kernels/vector_hash.cc
index c969f330b7..e666f2b9f7 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -31,6 +31,7 @@
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/hashing.h"
 #include "arrow/util/int_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc 
b/cpp/src/arrow/compute/kernels/vector_nested.cc
index 1c5cced359..d515e60fbe 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -20,6 +20,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_pairwise.cc 
b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
index e5a1eb3c88..2c61afcc25 100644
--- a/cpp/src/arrow/compute/kernels/vector_pairwise.cc
+++ b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
@@ -28,6 +28,7 @@
 #include "arrow/compute/kernels/base_arithmetic_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/compute/util.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc 
b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 1338ebedbe..ef7419ea7c 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -21,6 +21,7 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/math_internal.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc 
b/cpp/src/arrow/compute/kernels/vector_replace.cc
index d1d356b43e..3fda3f8425 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -20,6 +20,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/copy_data_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/logging_internal.h"
 
@@ -800,8 +801,6 @@ struct FillNullBackwardChunked {
   }
 };
 
-}  // namespace
-
 void AddKernel(Type::type type_id, std::shared_ptr<KernelSignature> signature,
                ArrayKernelExec exec, VectorKernel::ChunkedExec exec_chunked,
                FunctionRegistry* registry, VectorFunction* func) {
@@ -869,6 +868,8 @@ void RegisterVectorFunction(FunctionRegistry* registry,
   // TODO(ARROW-9431): "replace_with_indices"
 }
 
+}  // namespace
+
 const FunctionDoc replace_with_mask_doc(
     "Replace items selected with a mask",
     ("Given an array and a boolean mask (either scalar or of equal length),\n"
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc 
b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index f756ee0c22..bc8b25de4e 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -21,6 +21,7 @@
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/ree_util_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc 
b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index eba7873e51..591a250967 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -20,6 +20,7 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/logging_internal.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc 
b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 6c6f1b36b8..6f8dd52a4f 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -34,6 +34,7 @@
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/compute/kernels/vector_selection_filter_internal.h"
 #include "arrow/compute/kernels/vector_selection_take_internal.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/extension_type.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc 
b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 28868849fc..41cb0a357a 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -20,6 +20,7 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/logging_internal.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/vector_statistics.cc 
b/cpp/src/arrow/compute/kernels/vector_statistics.cc
index 3965b7f84b..074f2ec0a7 100644
--- a/cpp/src/arrow/compute/kernels/vector_statistics.cc
+++ b/cpp/src/arrow/compute/kernels/vector_statistics.cc
@@ -27,6 +27,7 @@
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/result.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle.cc 
b/cpp/src/arrow/compute/kernels/vector_swizzle.cc
index 0e6a4e0a2f..aa82f55c2b 100644
--- a/cpp/src/arrow/compute/kernels/vector_swizzle.cc
+++ b/cpp/src/arrow/compute/kernels/vector_swizzle.cc
@@ -19,6 +19,7 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/compute/registry_internal.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 
diff --git a/cpp/src/arrow/compute/util_avx2.cc 
b/cpp/src/arrow/compute/util_avx2.cc
index f0ff4575bb..a554e0463f 100644
--- a/cpp/src/arrow/compute/util_avx2.cc
+++ b/cpp/src/arrow/compute/util_avx2.cc
@@ -17,6 +17,7 @@
 
 #include <cstring>
 
+#include "arrow/compute/util.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/simd.h"
diff --git a/cpp/src/arrow/dataset/discovery.cc 
b/cpp/src/arrow/dataset/discovery.cc
index b502d1d194..5686e50e3c 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -39,6 +39,22 @@ using internal::StartsWith;
 
 namespace dataset {
 
+namespace {
+
+bool StartsWithAnyOf(const std::string& path, const std::vector<std::string>& 
prefixes) {
+  if (prefixes.empty()) {
+    return false;
+  }
+
+  auto parts = fs::internal::SplitAbstractPath(path);
+  return std::any_of(parts.cbegin(), parts.cend(), [&](std::string_view part) {
+    return std::any_of(prefixes.cbegin(), prefixes.cend(),
+                       [&](std::string_view prefix) { return StartsWith(part, 
prefix); });
+  });
+}
+
+}  // namespace
+
 DatasetFactory::DatasetFactory() : root_partition_(compute::literal(true)) {}
 
 Result<std::shared_ptr<Schema>> DatasetFactory::Inspect(InspectOptions 
options) {
@@ -157,18 +173,6 @@ Result<std::shared_ptr<DatasetFactory>> 
FileSystemDatasetFactory::Make(
                                    std::move(format), std::move(options)));
 }
 
-bool StartsWithAnyOf(const std::string& path, const std::vector<std::string>& 
prefixes) {
-  if (prefixes.empty()) {
-    return false;
-  }
-
-  auto parts = fs::internal::SplitAbstractPath(path);
-  return std::any_of(parts.cbegin(), parts.cend(), [&](std::string_view part) {
-    return std::any_of(prefixes.cbegin(), prefixes.cend(),
-                       [&](std::string_view prefix) { return StartsWith(part, 
prefix); });
-  });
-}
-
 Result<std::shared_ptr<DatasetFactory>> FileSystemDatasetFactory::Make(
     std::shared_ptr<fs::FileSystem> filesystem, fs::FileSelector selector,
     std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options) {
diff --git a/cpp/src/arrow/dataset/file_base.cc 
b/cpp/src/arrow/dataset/file_base.cc
index b03818314c..ccc79dfa9b 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -489,6 +489,8 @@ Status FileSystemDataset::Write(const 
FileSystemDatasetWriteOptions& write_optio
   return acero::DeclarationToStatus(std::move(plan), 
scanner->options()->use_threads);
 }
 
+namespace {
+
 Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan* plan,
                                        std::vector<acero::ExecNode*> inputs,
                                        const acero::ExecNodeOptions& options) {
@@ -557,8 +559,6 @@ Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan* 
plan,
   return node;
 }
 
-namespace {
-
 class TeeNode : public acero::MapNode {
  public:
   TeeNode(acero::ExecPlan* plan, std::vector<acero::ExecNode*> inputs,
diff --git a/cpp/src/arrow/dataset/file_csv.cc 
b/cpp/src/arrow/dataset/file_csv.cc
index 5691e806cc..cede268107 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -53,6 +53,10 @@ using internal::SerialExecutor;
 
 namespace dataset {
 
+namespace {
+
+using RecordBatchGenerator = 
std::function<Future<std::shared_ptr<RecordBatch>>()>;
+
 struct CsvInspectedFragment : public InspectedFragment {
   CsvInspectedFragment(std::vector<std::string> column_names,
                        std::shared_ptr<io::InputStream> input_stream, int64_t 
num_bytes)
@@ -142,8 +146,6 @@ class CsvFileScanner : public FragmentScanner {
   int scanned_so_far_ = 0;
 };
 
-using RecordBatchGenerator = 
std::function<Future<std::shared_ptr<RecordBatch>>()>;
-
 Result<std::vector<std::string>> GetOrderedColumnNames(
     const csv::ReadOptions& read_options, const csv::ParseOptions& 
parse_options,
     std::string_view first_block, MemoryPool* pool) {
@@ -348,6 +350,8 @@ static RecordBatchGenerator GeneratorFromReader(
   return MakeFromFuture(std::move(gen_fut));
 }
 
+}  // namespace
+
 CsvFileFormat::CsvFileFormat() : 
FileFormat(std::make_shared<CsvFragmentScanOptions>()) {}
 
 bool CsvFileFormat::Equals(const FileFormat& format) const {
@@ -420,6 +424,8 @@ Future<std::shared_ptr<FragmentScanner>> 
CsvFileFormat::BeginScan(
                               exec_context->executor());
 }
 
+namespace {
+
 Result<std::shared_ptr<InspectedFragment>> DoInspectFragment(
     const FileSource& source, const CsvFragmentScanOptions& csv_options,
     compute::ExecContext* exec_context) {
@@ -442,6 +448,8 @@ Result<std::shared_ptr<InspectedFragment>> 
DoInspectFragment(
                                                 source.Size());
 }
 
+}  // namespace
+
 Future<std::shared_ptr<InspectedFragment>> CsvFileFormat::InspectFragment(
     const FileSource& source, const FragmentScanOptions* format_options,
     compute::ExecContext* exec_context) const {
diff --git a/cpp/src/arrow/dataset/file_parquet.cc 
b/cpp/src/arrow/dataset/file_parquet.cc
index 62b8f57ba1..1912da40fc 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -1048,6 +1048,8 @@ static inline Result<std::string> FileFromRowGroup(
   return filesystem->NormalizePath(std::move(path));
 }
 
+namespace {
+
 Result<std::shared_ptr<Schema>> GetSchema(
     const parquet::FileMetaData& metadata,
     const parquet::ArrowReaderProperties& properties) {
@@ -1057,6 +1059,8 @@ Result<std::shared_ptr<Schema>> GetSchema(
   return schema;
 }
 
+}  // namespace
+
 Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
     const std::string& metadata_path, std::shared_ptr<fs::FileSystem> 
filesystem,
     std::shared_ptr<ParquetFileFormat> format, ParquetFactoryOptions options) {
diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc 
b/cpp/src/arrow/engine/substrait/expression_internal.cc
index 464e6d2670..f121b8034f 100644
--- a/cpp/src/arrow/engine/substrait/expression_internal.cc
+++ b/cpp/src/arrow/engine/substrait/expression_internal.cc
@@ -88,8 +88,6 @@ Id NormalizeFunctionName(Id id) {
   return {id.uri, func_name};
 }
 
-}  // namespace
-
 Status DecodeArg(const substrait::FunctionArgument& arg, int idx, 
SubstraitCall* call,
                  const ExtensionSet& ext_set,
                  const ConversionOptions& conversion_options) {
@@ -136,15 +134,6 @@ Result<SubstraitCall> DecodeScalarFunction(
   return call;
 }
 
-std::string EnumToString(int value, const google::protobuf::EnumDescriptor* 
descriptor) {
-  const google::protobuf::EnumValueDescriptor* value_desc =
-      descriptor->FindValueByNumber(value);
-  if (value_desc == nullptr) {
-    return "unknown";
-  }
-  return std::string(value_desc->name());
-}
-
 Result<compute::Expression> FromProto(const 
substrait::Expression::ReferenceSegment* ref,
                                       const ExtensionSet& ext_set,
                                       const ConversionOptions& 
conversion_options,
@@ -229,6 +218,8 @@ Result<compute::Expression> FromProto(const 
substrait::Expression::FieldReferenc
   return FromProto(&dref, ext_set, conversion_options, std::move(in_expr));
 }
 
+}  // namespace
+
 Result<FieldRef> DirectReferenceFromProto(
     const substrait::Expression::FieldReference* fref, const ExtensionSet& 
ext_set,
     const ConversionOptions& conversion_options) {
@@ -1128,6 +1119,7 @@ struct ScalarToProtoImpl {
   ExtensionSet* ext_set_;
   const ConversionOptions& conversion_options_;
 };
+
 }  // namespace
 
 Result<std::unique_ptr<substrait::Expression::Literal>> ToProto(
@@ -1152,7 +1144,9 @@ Result<std::unique_ptr<substrait::Expression::Literal>> 
ToProto(
   return out;
 }
 
-static Status AddChildToReferenceSegment(
+namespace {
+
+Status AddChildToReferenceSegment(
     substrait::Expression::ReferenceSegment& segment,
     std::unique_ptr<substrait::Expression::ReferenceSegment>&& child) {
   auto status = Status::Invalid("Attempt to add child to incomplete reference 
segment");
@@ -1197,7 +1191,7 @@ static Status AddChildToReferenceSegment(
 
 // Indexes the given Substrait expression or root (if expr is empty) using the 
given
 // ReferenceSegment.
-static Result<std::unique_ptr<substrait::Expression>> MakeDirectReference(
+Result<std::unique_ptr<substrait::Expression>> MakeDirectReference(
     std::unique_ptr<substrait::Expression>&& expr,
     std::unique_ptr<substrait::Expression::ReferenceSegment>&& ref_segment) {
   // If expr is already a selection expression, add the index to its index 
stack.
@@ -1227,7 +1221,7 @@ static Result<std::unique_ptr<substrait::Expression>> 
MakeDirectReference(
 
 // Indexes the given Substrait struct-typed expression or root (if expr is 
empty) using
 // the given field index.
-static Result<std::unique_ptr<substrait::Expression>> MakeStructFieldReference(
+Result<std::unique_ptr<substrait::Expression>> MakeStructFieldReference(
     std::unique_ptr<substrait::Expression>&& expr, int field) {
   auto struct_field =
       std::make_unique<substrait::Expression::ReferenceSegment::StructField>();
@@ -1240,7 +1234,7 @@ static Result<std::unique_ptr<substrait::Expression>> 
MakeStructFieldReference(
 }
 
 // Indexes the given Substrait list-typed expression using the given offset.
-static Result<std::unique_ptr<substrait::Expression>> MakeListElementReference(
+Result<std::unique_ptr<substrait::Expression>> MakeListElementReference(
     std::unique_ptr<substrait::Expression>&& expr, int offset) {
   auto list_element =
       std::make_unique<substrait::Expression::ReferenceSegment::ListElement>();
@@ -1340,6 +1334,8 @@ 
Result<std::vector<std::unique_ptr<substrait::Expression>>> DatumToLiterals(
   return literals;
 }
 
+}  // namespace
+
 Result<std::unique_ptr<substrait::Expression>> ToProto(
     const compute::Expression& expr, ExtensionSet* ext_set,
     const ConversionOptions& conversion_options) {
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc 
b/cpp/src/arrow/engine/substrait/relation_internal.cc
index 4d52ba3820..1ea143f9c5 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.cc
+++ b/cpp/src/arrow/engine/substrait/relation_internal.cc
@@ -94,6 +94,8 @@ Result<EmitInfo> GetEmitInfo(const RelMessage& rel,
   return emit_info;
 }
 
+namespace {
+
 Result<DeclarationInfo> ProcessEmitProject(
     std::optional<substrait::RelCommon> rel_common_opt,
     const DeclarationInfo& project_declr, const std::shared_ptr<Schema>& 
input_schema) {
@@ -130,6 +132,8 @@ Result<DeclarationInfo> ProcessEmitProject(
   }
 }
 
+}  // namespace
+
 template <typename RelMessage>
 Result<DeclarationInfo> ProcessEmit(const RelMessage& rel,
                                     const DeclarationInfo& no_emit_declr,
@@ -153,6 +157,7 @@ Result<DeclarationInfo> ProcessEmit(const RelMessage& rel,
     return no_emit_declr;
   }
 }
+
 /// In the specialization, a single ProjectNode is being used to
 /// get the Acero relation with or without emit.
 template <>
@@ -163,6 +168,8 @@ Result<DeclarationInfo> ProcessEmit(const 
substrait::ProjectRel& rel,
                             no_emit_declr, schema);
 }
 
+namespace {
+
 Result<DeclarationInfo> ProcessExtensionEmit(const DeclarationInfo& 
no_emit_declr,
                                              const std::vector<int>& 
emit_order) {
   const std::shared_ptr<Schema>& input_schema = no_emit_declr.output_schema;
@@ -289,6 +296,8 @@ Status DiscoverFilesFromDir(const 
std::shared_ptr<fs::LocalFileSystem>& local_fs
   return Status::OK();
 }
 
+}  // namespace
+
 namespace internal {
 
 Result<compute::Aggregate> ParseAggregateMeasure(
@@ -1100,8 +1109,6 @@ Result<std::unique_ptr<substrait::FilterRel>> 
FilterRelationConverter(
   return filter_rel;
 }
 
-}  // namespace
-
 Status SerializeAndCombineRelations(const acero::Declaration& declaration,
                                     ExtensionSet* ext_set,
                                     std::unique_ptr<substrait::Rel>* rel,
@@ -1141,6 +1148,8 @@ Status SerializeAndCombineRelations(const 
acero::Declaration& declaration,
   return Status::OK();
 }
 
+}  // namespace
+
 Result<std::unique_ptr<substrait::Rel>> ToProto(
     const acero::Declaration& declr, ExtensionSet* ext_set,
     const ConversionOptions& conversion_options) {
diff --git a/cpp/src/arrow/engine/substrait/serde.cc 
b/cpp/src/arrow/engine/substrait/serde.cc
index db2dcb5928..5ce97cb0cc 100644
--- a/cpp/src/arrow/engine/substrait/serde.cc
+++ b/cpp/src/arrow/engine/substrait/serde.cc
@@ -48,6 +48,8 @@
 namespace arrow {
 namespace engine {
 
+namespace {
+
 Status ParseFromBufferImpl(const Buffer& buf, const std::string& full_name,
                            google::protobuf::Message* message) {
   google::protobuf::io::ArrayInputStream buf_stream{buf.data(),
@@ -59,6 +61,8 @@ Status ParseFromBufferImpl(const Buffer& buf, const 
std::string& full_name,
   return Status::Invalid("ParseFromZeroCopyStream failed for ", full_name);
 }
 
+}  // namespace
+
 template <typename Message>
 Result<Message> ParseFromBuffer(const Buffer& buf) {
   Message message;
diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.cc 
b/cpp/src/arrow/engine/substrait/test_plan_builder.cc
index 724c58277e..a8302145f5 100644
--- a/cpp/src/arrow/engine/substrait/test_plan_builder.cc
+++ b/cpp/src/arrow/engine/substrait/test_plan_builder.cc
@@ -42,7 +42,9 @@ namespace arrow {
 namespace engine {
 namespace internal {
 
-static const ConversionOptions kPlanBuilderConversionOptions;
+namespace {
+
+const ConversionOptions kPlanBuilderConversionOptions;
 
 Result<std::unique_ptr<substrait::ReadRel>> CreateRead(const Table& table,
                                                        ExtensionSet* ext_set) {
@@ -185,6 +187,8 @@ Result<std::unique_ptr<substrait::Plan>> 
CreatePlan(std::unique_ptr<substrait::R
   return plan;
 }
 
+}  // namespace
+
 Result<std::shared_ptr<Buffer>> CreateScanProjectSubstrait(
     Id function_id, const std::shared_ptr<Table>& input_table,
     const std::vector<std::string>& arguments,
diff --git a/cpp/src/arrow/extension/tensor_internal.h 
b/cpp/src/arrow/extension/tensor_internal.h
index ed5a7ffc79..62b1dba614 100644
--- a/cpp/src/arrow/extension/tensor_internal.h
+++ b/cpp/src/arrow/extension/tensor_internal.h
@@ -25,8 +25,7 @@
 
 namespace arrow::internal {
 
-ARROW_EXPORT
-Status IsPermutationValid(const std::vector<int64_t>& permutation) {
+inline Status IsPermutationValid(const std::vector<int64_t>& permutation) {
   const auto size = static_cast<int64_t>(permutation.size());
   std::vector<uint8_t> dim_seen(size, 0);
 
diff --git a/cpp/src/arrow/filesystem/filesystem.cc 
b/cpp/src/arrow/filesystem/filesystem.cc
index c02512c493..8281bed7ce 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -991,11 +991,6 @@ Result<std::shared_ptr<FileSystem>> 
FileSystemFromUriOrPath(
   return FileSystemFromUri(uri_string, io_context, out_path);
 }
 
-Status FileSystemFromUri(const std::string& uri, std::shared_ptr<FileSystem>* 
out_fs,
-                         std::string* out_path) {
-  return FileSystemFromUri(uri, out_path).Value(out_fs);
-}
-
 Status Initialize(const FileSystemGlobalOptions& options) {
   internal::global_options = options;
   return Status::OK();
diff --git a/cpp/src/arrow/filesystem/filesystem_library.h 
b/cpp/src/arrow/filesystem/filesystem_library.h
index d610c72237..1d65690130 100644
--- a/cpp/src/arrow/filesystem/filesystem_library.h
+++ b/cpp/src/arrow/filesystem/filesystem_library.h
@@ -26,7 +26,9 @@ extern "C" {
 // _declspec(dllexport)/[[gnu::visibility("default")]] even when
 // this header is #included by a non-arrow source, as in a third
 // party filesystem implementation.
-ARROW_FORCE_EXPORT void* arrow_filesystem_get_registry() {
+ARROW_FORCE_EXPORT void* arrow_filesystem_get_registry();
+
+void* arrow_filesystem_get_registry() {
   // In the case where libarrow is linked statically both to the executable 
and to a
   // dynamically loaded filesystem implementation library, the library 
contains a
   // duplicate definition of the registry into which the library's instances of
diff --git a/cpp/src/arrow/filesystem/test_util.cc 
b/cpp/src/arrow/filesystem/test_util.cc
index efe7cff495..da73a8ec16 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -97,6 +97,16 @@ void AssertRaisesWithErrno(int expected_errno, const 
Result<T>& result) {
   AssertRaisesWithErrno(expected_errno, result.status());
 }
 
+void GetSortedInfos(FileSystem* fs, FileSelector s, std::vector<FileInfo>& 
infos) {
+  ASSERT_OK_AND_ASSIGN(infos, fs->GetFileInfo(s));
+  // Clear mtime & size for easier testing.
+  for_each(infos.begin(), infos.end(), [](FileInfo& info) {
+    info.set_mtime(kNoTime);
+    info.set_size(kNoSize);
+  });
+  SortInfos(&infos);
+}
+
 };  // namespace
 
 void AssertFileContents(FileSystem* fs, const std::string& path,
@@ -862,16 +872,6 @@ void 
GenericFileSystemTest::TestGetFileInfoGenerator(FileSystem* fs) {
   ASSERT_EQ(infos.size(), 0);
 }
 
-void GetSortedInfos(FileSystem* fs, FileSelector s, std::vector<FileInfo>& 
infos) {
-  ASSERT_OK_AND_ASSIGN(infos, fs->GetFileInfo(s));
-  // Clear mtime & size for easier testing.
-  for_each(infos.begin(), infos.end(), [](FileInfo& info) {
-    info.set_mtime(kNoTime);
-    info.set_size(kNoSize);
-  });
-  SortInfos(&infos);
-}
-
 void GenericFileSystemTest::TestGetFileInfoSelectorWithRecursion(FileSystem* 
fs) {
   ASSERT_OK(fs->CreateDir("01/02/03/04"));
   ASSERT_OK(fs->CreateDir("AA"));
diff --git a/cpp/src/arrow/flight/CMakeLists.txt 
b/cpp/src/arrow/flight/CMakeLists.txt
index 663e7e2410..a827a7307f 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -113,7 +113,23 @@ add_custom_command(OUTPUT ${FLIGHT_GENERATED_PROTO_FILES}
                            
"--plugin=protoc-gen-grpc=$<TARGET_FILE:gRPC::grpc_cpp_plugin>"
                            "${FLIGHT_PROTO}")
 
-set_source_files_properties(${FLIGHT_GENERATED_PROTO_FILES} PROPERTIES 
GENERATED TRUE)
+# Set common properties for C++ sources files generated by protoc
+function(arrow_set_generated_proto_files_properties)
+  set(GENERATED_FILES ${ARGN})
+  set_source_files_properties(${GENERATED_FILES} PROPERTIES GENERATED TRUE)
+  if(MSVC)
+    # Suppress missing dll-interface warning
+    set_source_files_properties(${GENERATED_FILES}
+                                PROPERTIES COMPILE_OPTIONS "/wd4251"
+                                           SKIP_UNITY_BUILD_INCLUSION TRUE)
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    # Disable -Wmissing-declarations
+    set_source_files_properties(${GENERATED_FILES} PROPERTIES COMPILE_OPTIONS
+                                                              
"-Wno-missing-declarations")
+  endif()
+endfunction()
+
+arrow_set_generated_proto_files_properties(${FLIGHT_GENERATED_PROTO_FILES})
 
 add_custom_target(flight_grpc_gen ALL DEPENDS ${FLIGHT_GENERATED_PROTO_FILES})
 
@@ -177,18 +193,6 @@ if(ARROW_WITH_OPENTELEMETRY)
   list(APPEND ARROW_FLIGHT_SRCS otel_logging.cc)
 endif()
 
-if(MSVC)
-  # Protobuf generated files trigger spurious warnings on MSVC.
-  foreach(GENERATED_SOURCE "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc"
-                           "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h")
-    # Suppress missing dll-interface warning
-    set_source_files_properties("${GENERATED_SOURCE}"
-                                PROPERTIES COMPILE_OPTIONS "/wd4251"
-                                           GENERATED TRUE
-                                           SKIP_UNITY_BUILD_INCLUSION TRUE)
-  endforeach()
-endif()
-
 add_arrow_lib(arrow_flight
               CMAKE_PACKAGE_NAME
               ArrowFlight
diff --git a/cpp/src/arrow/flight/serialization_internal.cc 
b/cpp/src/arrow/flight/serialization_internal.cc
index a64ab713dd..34fcef1f83 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -457,6 +457,8 @@ Status ToPayload(const FlightDescriptor& descr, 
std::shared_ptr<Buffer>* out) {
   return Status::OK();
 }
 
+namespace {
+
 // SessionOptionValue
 
 Status FromProto(const pb::SessionOptionValue& pb_val, SessionOptionValue* 
val) {
@@ -524,6 +526,8 @@ Status ToProto(const std::map<std::string, 
SessionOptionValue>& map,
   return Status::OK();
 }
 
+}  // namespace
+
 // SetSessionOptionsRequest
 
 Status FromProto(const pb::SetSessionOptionsRequest& pb_request,
diff --git a/cpp/src/arrow/flight/sql/CMakeLists.txt 
b/cpp/src/arrow/flight/sql/CMakeLists.txt
index 4b6764bd8c..6fcdaba2ec 100644
--- a/cpp/src/arrow/flight/sql/CMakeLists.txt
+++ b/cpp/src/arrow/flight/sql/CMakeLists.txt
@@ -48,7 +48,8 @@ add_custom_command(OUTPUT ${FLIGHT_SQL_GENERATED_PROTO_FILES}
                    COMMAND ${FLIGHT_SQL_PROTOC_COMMAND}
                    DEPENDS ${PROTO_DEPENDS})
 
-set_source_files_properties(${FLIGHT_SQL_GENERATED_PROTO_FILES} PROPERTIES 
GENERATED TRUE)
+arrow_set_generated_proto_files_properties(${FLIGHT_SQL_GENERATED_PROTO_FILES})
+
 add_custom_target(flight_sql_protobuf_gen ALL DEPENDS 
${FLIGHT_SQL_GENERATED_PROTO_FILES})
 
 set(ARROW_FLIGHT_SQL_SRCS
diff --git a/cpp/src/arrow/flight/sql/client.cc 
b/cpp/src/arrow/flight/sql/client.cc
index fe087cc947..6fe4a40566 100644
--- a/cpp/src/arrow/flight/sql/client.cc
+++ b/cpp/src/arrow/flight/sql/client.cc
@@ -40,6 +40,7 @@ namespace flight {
 namespace sql {
 
 namespace {
+
 arrow::Result<FlightDescriptor> GetFlightDescriptorForCommand(
     const google::protobuf::Message& command) {
   FlightDescriptor descriptor;
@@ -96,6 +97,25 @@ Status ReadResult(ResultStream* results, 
google::protobuf::Message* message) {
   }
   return Status::OK();
 }
+
+arrow::Result<std::shared_ptr<Buffer>> BindParameters(FlightClient* client,
+                                                      const FlightCallOptions& 
options,
+                                                      const FlightDescriptor& 
descriptor,
+                                                      RecordBatchReader* 
params) {
+  ARROW_ASSIGN_OR_RAISE(auto stream,
+                        client->DoPut(options, descriptor, params->schema()));
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, params->Next());
+    if (!batch) break;
+    ARROW_RETURN_NOT_OK(stream.writer->WriteRecordBatch(*batch));
+  }
+  ARROW_RETURN_NOT_OK(stream.writer->DoneWriting());
+  std::shared_ptr<Buffer> metadata;
+  ARROW_RETURN_NOT_OK(stream.reader->ReadMetadata(&metadata));
+  ARROW_RETURN_NOT_OK(stream.writer->Close());
+  return metadata;
+}
+
 }  // namespace
 
 const Transaction& no_transaction() {
@@ -615,24 +635,6 @@ arrow::Result<std::shared_ptr<PreparedStatement>> 
PreparedStatement::ParseRespon
                                              parameter_schema);
 }
 
-arrow::Result<std::shared_ptr<Buffer>> BindParameters(FlightClient* client,
-                                                      const FlightCallOptions& 
options,
-                                                      const FlightDescriptor& 
descriptor,
-                                                      RecordBatchReader* 
params) {
-  ARROW_ASSIGN_OR_RAISE(auto stream,
-                        client->DoPut(options, descriptor, params->schema()));
-  while (true) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, params->Next());
-    if (!batch) break;
-    ARROW_RETURN_NOT_OK(stream.writer->WriteRecordBatch(*batch));
-  }
-  ARROW_RETURN_NOT_OK(stream.writer->DoneWriting());
-  std::shared_ptr<Buffer> metadata;
-  ARROW_RETURN_NOT_OK(stream.reader->ReadMetadata(&metadata));
-  ARROW_RETURN_NOT_OK(stream.writer->Close());
-  return metadata;
-}
-
 arrow::Result<std::unique_ptr<FlightInfo>> PreparedStatement::Execute(
     const FlightCallOptions& options) {
   if (is_closed_) {
diff --git a/cpp/src/arrow/flight/sql/protocol_internal.cc 
b/cpp/src/arrow/flight/sql/protocol_internal.cc
index 984e782223..fdf8a119f7 100644
--- a/cpp/src/arrow/flight/sql/protocol_internal.cc
+++ b/cpp/src/arrow/flight/sql/protocol_internal.cc
@@ -20,9 +20,13 @@
 ARROW_SUPPRESS_DEPRECATION_WARNING
 #include "arrow/flight/sql/protocol_internal.h"
 
+ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING
+
 // NOTE(lidavidm): Normally this is forbidden, but on Windows to get
 // the dllexport/dllimport macro in the right places, we need to
 // ensure our header gets included (and Protobuf will not insert the
 // include for you)
 #include "arrow/flight/sql/FlightSql.pb.cc"  // NOLINT
+
+ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING
 ARROW_UNSUPPRESS_DEPRECATION_WARNING
diff --git a/cpp/src/arrow/flight/sql/server.cc 
b/cpp/src/arrow/flight/sql/server.cc
index f68d884c62..8471fa8a2b 100644
--- a/cpp/src/arrow/flight/sql/server.cc
+++ b/cpp/src/arrow/flight/sql/server.cc
@@ -1314,7 +1314,7 @@ const std::shared_ptr<Schema>& 
SqlSchema::GetPrimaryKeysSchema() {
   return kSchema;
 }
 
-const std::shared_ptr<Schema>& 
GetImportedExportedKeysAndCrossReferenceSchema() {
+static const std::shared_ptr<Schema>& 
GetImportedExportedKeysAndCrossReferenceSchema() {
   static std::shared_ptr<Schema> kSchema = arrow::schema(
       {field("pk_catalog_name", utf8(), true), field("pk_db_schema_name", 
utf8(), true),
        field("pk_table_name", utf8(), false), field("pk_column_name", utf8(), 
false),
diff --git a/cpp/src/arrow/flight/test_definitions.cc 
b/cpp/src/arrow/flight/test_definitions.cc
index ea6576088f..c6b8e2b422 100644
--- a/cpp/src/arrow/flight/test_definitions.cc
+++ b/cpp/src/arrow/flight/test_definitions.cc
@@ -1194,6 +1194,8 @@ void IpcOptionsTest::TestDoExchangeServerWriteOptions() {
 
 #if defined(ARROW_CUDA)
 
+namespace {
+
 Status CheckBuffersOnDevice(const Array& array, const Device& device) {
   if (array.num_fields() != 0) {
     return Status::NotImplemented("Nested arrays");
@@ -1284,6 +1286,8 @@ class CudaTestServer : public FlightServerBase {
   std::shared_ptr<cuda::CudaContext> context_;
 };
 
+}  // namespace
+
 // Store CUDA objects without exposing them in the public header
 class CudaDataTest::Impl {
  public:
@@ -1750,7 +1754,7 @@ void ErrorHandlingTest::TestGetFlightInfoMetadata() {
                             }));
 }
 
-void CheckErrorDetail(const Status& status) {
+static void CheckErrorDetail(const Status& status) {
   auto detail = FlightStatusDetail::UnwrapStatus(status);
   ASSERT_NE(detail, nullptr) << status.ToString();
   ASSERT_EQ(detail->code(), FlightStatusCode::Unauthorized);
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 02963cd699..fd0f3c88b6 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -143,6 +143,9 @@ class ARROW_FLIGHT_EXPORT NumberingStream : public 
FlightDataStream {
 ARROW_FLIGHT_EXPORT
 std::shared_ptr<Schema> ExampleIntSchema();
 
+ARROW_FLIGHT_EXPORT
+std::shared_ptr<Schema> ExampleFloatSchema();
+
 ARROW_FLIGHT_EXPORT
 std::shared_ptr<Schema> ExampleStringSchema();
 
diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc 
b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
index dbd3778066..0b8c90a08e 100644
--- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
+++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
@@ -69,6 +69,8 @@ using google::protobuf::io::CodedOutputStream;
 
 using ::grpc::ByteBuffer;
 
+namespace {
+
 bool ReadBytesZeroCopy(const std::shared_ptr<Buffer>& source_data,
                        CodedInputStream* input, std::shared_ptr<Buffer>* out) {
   uint32_t length;
@@ -151,7 +153,7 @@ class GrpcBuffer : public MutableBuffer {
 };
 
 // Destructor callback for grpc::Slice
-static void ReleaseBuffer(void* buf_ptr) {
+void ReleaseBuffer(void* buf_ptr) {
   delete reinterpret_cast<std::shared_ptr<Buffer>*>(buf_ptr);
 }
 
@@ -174,7 +176,7 @@ arrow::Result<::grpc::Slice> SliceFromBuffer(const 
std::shared_ptr<Buffer>& buf)
   return slice;
 }
 
-static const uint8_t kPaddingBytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+const uint8_t kPaddingBytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 // Update the sizes of our Protobuf fields based on the given IPC payload.
 ::grpc::Status IpcMessageHeaderSize(const arrow::ipc::IpcPayload& ipc_msg, 
bool has_body,
@@ -195,6 +197,8 @@ static const uint8_t kPaddingBytes[8] = {0, 0, 0, 0, 0, 0, 
0, 0};
   return ::grpc::Status::OK;
 }
 
+}  // namespace
+
 ::grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
                                    bool* own_buffer) {
   // Size of the IPC body (protobuf: data_body)
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 5614c20c6b..759b1410bd 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -579,7 +579,8 @@ arrow::Status 
SetSessionOptionsRequest::Deserialize(std::string_view serialized,
 
 // SetSessionOptionsResult
 
-std::ostream& operator<<(std::ostream& os, const 
SetSessionOptionsResult::Error& e) {
+static std::ostream& operator<<(std::ostream& os,
+                                const SetSessionOptionsResult::Error& e) {
   os << '{' << e.value << '}';
   return os;
 }
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index bed6085420..f18f15d413 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -48,6 +48,38 @@ class MemoryPool;
 
 namespace ipc {
 
+namespace {
+
+Status MaybeAlignMetadata(std::shared_ptr<Buffer>* metadata) {
+  if (reinterpret_cast<uintptr_t>((*metadata)->data()) % 8 != 0) {
+    // If the metadata memory is not aligned, we copy it here to avoid
+    // potential UBSAN issues from Flatbuffers
+    ARROW_ASSIGN_OR_RAISE(*metadata, (*metadata)->CopySlice(0, 
(*metadata)->size()));
+  }
+  return Status::OK();
+}
+
+Status CheckMetadataAndGetBodyLength(const Buffer& metadata, int64_t* 
body_length) {
+  const flatbuf::Message* fb_message = nullptr;
+  RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), 
&fb_message));
+  *body_length = fb_message->bodyLength();
+  if (*body_length < 0) {
+    return Status::IOError("Invalid IPC message: negative bodyLength");
+  }
+  return Status::OK();
+}
+
+Status WritePadding(io::OutputStream* stream, int64_t nbytes) {
+  while (nbytes > 0) {
+    const int64_t bytes_to_write = std::min<int64_t>(nbytes, kArrowAlignment);
+    RETURN_NOT_OK(stream->Write(kPaddingBytes, bytes_to_write));
+    nbytes -= bytes_to_write;
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 class Message::MessageImpl {
  public:
   explicit MessageImpl(std::shared_ptr<Buffer> metadata, 
std::shared_ptr<Buffer> body)
@@ -176,25 +208,6 @@ bool Message::Equals(const Message& other) const {
   }
 }
 
-Status MaybeAlignMetadata(std::shared_ptr<Buffer>* metadata) {
-  if (reinterpret_cast<uintptr_t>((*metadata)->data()) % 8 != 0) {
-    // If the metadata memory is not aligned, we copy it here to avoid
-    // potential UBSAN issues from Flatbuffers
-    ARROW_ASSIGN_OR_RAISE(*metadata, (*metadata)->CopySlice(0, 
(*metadata)->size()));
-  }
-  return Status::OK();
-}
-
-Status CheckMetadataAndGetBodyLength(const Buffer& metadata, int64_t* 
body_length) {
-  const flatbuf::Message* fb_message = nullptr;
-  RETURN_NOT_OK(internal::VerifyMessage(metadata.data(), metadata.size(), 
&fb_message));
-  *body_length = fb_message->bodyLength();
-  if (*body_length < 0) {
-    return Status::IOError("Invalid IPC message: negative bodyLength");
-  }
-  return Status::OK();
-}
-
 Result<std::unique_ptr<Message>> Message::ReadFrom(std::shared_ptr<Buffer> 
metadata,
                                                    io::InputStream* stream) {
   std::unique_ptr<Message> result;
@@ -228,15 +241,6 @@ Result<std::unique_ptr<Message>> Message::ReadFrom(const 
int64_t offset,
   return result;
 }
 
-Status WritePadding(io::OutputStream* stream, int64_t nbytes) {
-  while (nbytes > 0) {
-    const int64_t bytes_to_write = std::min<int64_t>(nbytes, kArrowAlignment);
-    RETURN_NOT_OK(stream->Write(kPaddingBytes, bytes_to_write));
-    nbytes -= bytes_to_write;
-  }
-  return Status::OK();
-}
-
 Status Message::SerializeTo(io::OutputStream* stream, const IpcWriteOptions& 
options,
                             int64_t* output_length) const {
   int32_t metadata_length = 0;
@@ -281,6 +285,8 @@ std::string FormatMessageType(MessageType type) {
   return "unknown";
 }
 
+namespace {
+
 Status ReadFieldsSubset(int64_t offset, int32_t metadata_length,
                         io::RandomAccessFile* file,
                         const FieldsLoaderFunction& fields_loader,
@@ -311,6 +317,8 @@ Status ReadFieldsSubset(int64_t offset, int32_t 
metadata_length,
   return Status::OK();
 }
 
+}  // namespace
+
 Result<std::unique_ptr<Message>> ReadMessage(std::shared_ptr<Buffer> metadata,
                                              std::shared_ptr<Buffer> body) {
   std::unique_ptr<Message> result;
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 00cabf7670..1ec2836626 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -1162,9 +1162,11 @@ Result<std::shared_ptr<RecordBatchStreamReader>> 
RecordBatchStreamReader::Open(
 // ----------------------------------------------------------------------
 // Reader implementation
 
+namespace {
+
 // Common functions used in both the random-access file reader and the
 // asynchronous generator
-static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
+inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
   return FileBlock{block->offset(), block->metaDataLength(), 
block->bodyLength()};
 }
 
@@ -1177,7 +1179,7 @@ Status CheckAligned(const FileBlock& block) {
   return Status::OK();
 }
 
-static Result<std::unique_ptr<Message>> ReadMessageFromBlock(
+Result<std::unique_ptr<Message>> ReadMessageFromBlock(
     const FileBlock& block, io::RandomAccessFile* file,
     const FieldsLoaderFunction& fields_loader) {
   RETURN_NOT_OK(CheckAligned(block));
@@ -1189,7 +1191,7 @@ static Result<std::unique_ptr<Message>> 
ReadMessageFromBlock(
   return message;
 }
 
-static Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
+Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
     const FileBlock& block, io::RandomAccessFile* file, const io::IOContext& 
io_context) {
   if (!bit_util::IsMultipleOf8(block.offset) ||
       !bit_util::IsMultipleOf8(block.metadata_length) ||
@@ -1209,7 +1211,7 @@ class RecordBatchFileReaderImpl;
 /// A generator of record batches.
 ///
 /// All batches are yielded in order.
-class ARROW_EXPORT WholeIpcFileRecordBatchGenerator {
+class WholeIpcFileRecordBatchGenerator {
  public:
   using Item = std::shared_ptr<RecordBatch>;
 
@@ -1246,7 +1248,7 @@ class ARROW_EXPORT WholeIpcFileRecordBatchGenerator {
 /// a subset of columns from the file.
 ///
 /// All batches are yielded in order.
-class ARROW_EXPORT SelectiveIpcFileRecordBatchGenerator {
+class SelectiveIpcFileRecordBatchGenerator {
  public:
   using Item = std::shared_ptr<RecordBatch>;
 
@@ -1888,75 +1890,6 @@ class RecordBatchFileReaderImpl : public 
RecordBatchFileReader {
   bool swap_endian_;
 };
 
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
-    io::RandomAccessFile* file, const IpcReadOptions& options) {
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return Open(file, footer_offset, options);
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
-    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& 
options) {
-  auto result = std::make_shared<RecordBatchFileReaderImpl>();
-  RETURN_NOT_OK(result->Open(file, footer_offset, options));
-  return result;
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
-    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& 
options) {
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return Open(file, footer_offset, options);
-}
-
-Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
-    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
-    const IpcReadOptions& options) {
-  auto result = std::make_shared<RecordBatchFileReaderImpl>();
-  RETURN_NOT_OK(result->Open(file, footer_offset, options));
-  return result;
-}
-
-Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
-    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& 
options) {
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return OpenAsync(file, footer_offset, options);
-}
-
-Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
-    io::RandomAccessFile* file, const IpcReadOptions& options) {
-  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return OpenAsync(file, footer_offset, options);
-}
-
-Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
-    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
-    const IpcReadOptions& options) {
-  auto result = std::make_shared<RecordBatchFileReaderImpl>();
-  return result->OpenAsync(file, footer_offset, options)
-      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return 
result; });
-}
-
-Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
-    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& 
options) {
-  auto result = std::make_shared<RecordBatchFileReaderImpl>();
-  return result->OpenAsync(file, footer_offset, options)
-      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return 
result; });
-}
-
-Result<RecordBatchVector> RecordBatchFileReader::ToRecordBatches() {
-  RecordBatchVector batches;
-  const auto n = num_record_batches();
-  for (int i = 0; i < n; ++i) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, ReadRecordBatch(i));
-    batches.emplace_back(std::move(batch));
-  }
-  return batches;
-}
-
-Result<std::shared_ptr<Table>> RecordBatchFileReader::ToTable() {
-  ARROW_ASSIGN_OR_RAISE(auto batches, ToRecordBatches());
-  return Table::FromRecordBatches(schema(), std::move(batches));
-}
-
 Future<SelectiveIpcFileRecordBatchGenerator::Item>
 SelectiveIpcFileRecordBatchGenerator::operator()() {
   int index = index_++;
@@ -2046,6 +1979,77 @@ Result<std::shared_ptr<RecordBatch>> 
WholeIpcFileRecordBatchGenerator::ReadRecor
   return batch_with_metadata.batch;
 }
 
+}  // namespace
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+    io::RandomAccessFile* file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return Open(file, footer_offset, options);
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& 
options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  RETURN_NOT_OK(result->Open(file, footer_offset, options));
+  return result;
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& 
options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return Open(file, footer_offset, options);
+}
+
+Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
+    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+    const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  RETURN_NOT_OK(result->Open(file, footer_offset, options));
+  return result;
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& 
options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(file, footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(file, footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+    const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return 
result; });
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> 
RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& 
options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return 
result; });
+}
+
+Result<RecordBatchVector> RecordBatchFileReader::ToRecordBatches() {
+  RecordBatchVector batches;
+  const auto n = num_record_batches();
+  for (int i = 0; i < n; ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, ReadRecordBatch(i));
+    batches.emplace_back(std::move(batch));
+  }
+  return batches;
+}
+
+Result<std::shared_ptr<Table>> RecordBatchFileReader::ToTable() {
+  ARROW_ASSIGN_OR_RAISE(auto batches, ToRecordBatches());
+  return Table::FromRecordBatches(schema(), std::move(batches));
+}
+
 Status Listener::OnEOS() { return Status::OK(); }
 
 Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return 
Status::OK(); }
@@ -2530,6 +2534,8 @@ Result<std::shared_ptr<SparseTensor>> 
ReadSparseTensorPayload(const IpcPayload&
 
 }  // namespace internal
 
+namespace {
+
 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Buffer& metadata,
                                                        io::RandomAccessFile* 
file) {
   std::shared_ptr<DataType> type;
@@ -2580,6 +2586,8 @@ Result<std::shared_ptr<SparseTensor>> 
ReadSparseTensor(const Buffer& metadata,
   }
 }
 
+}  // namespace
+
 Result<std::shared_ptr<SparseTensor>> ReadSparseTensor(const Message& message) 
{
   CHECK_HAS_BODY(message);
   ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message.body()));
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index a739990fc9..dea40f18e8 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -183,14 +183,6 @@ Status MakeListArray(const std::shared_ptr<Array>& 
child_array, int num_lists,
   return (**out).Validate();
 }
 
-}  // namespace
-
-Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
-                           bool include_nulls, MemoryPool* pool,
-                           std::shared_ptr<Array>* out) {
-  return MakeListArray<ListType>(child_array, num_lists, include_nulls, pool, 
out);
-}
-
 Status MakeRandomListViewArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
                                bool include_nulls, MemoryPool* pool,
                                std::shared_ptr<Array>* out) {
@@ -217,12 +209,6 @@ Status MakeRandomLargeListViewArray(const 
std::shared_ptr<Array>& child_array,
   return Status::OK();
 }
 
-Status MakeRandomLargeListArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
-                                bool include_nulls, MemoryPool* pool,
-                                std::shared_ptr<Array>* out) {
-  return MakeListArray<LargeListType>(child_array, num_lists, include_nulls, 
pool, out);
-}
-
 Status MakeRandomMapArray(const std::shared_ptr<Array>& key_array,
                           const std::shared_ptr<Array>& item_array, int 
num_maps,
                           bool include_nulls, MemoryPool* pool,
@@ -240,6 +226,20 @@ Status MakeRandomMapArray(const std::shared_ptr<Array>& 
key_array,
   return (**out).Validate();
 }
 
+}  // namespace
+
+Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
+                           bool include_nulls, MemoryPool* pool,
+                           std::shared_ptr<Array>* out) {
+  return MakeListArray<ListType>(child_array, num_lists, include_nulls, pool, 
out);
+}
+
+Status MakeRandomLargeListArray(const std::shared_ptr<Array>& child_array, int 
num_lists,
+                                bool include_nulls, MemoryPool* pool,
+                                std::shared_ptr<Array>* out) {
+  return MakeListArray<LargeListType>(child_array, num_lists, include_nulls, 
pool, out);
+}
+
 Status MakeRandomBooleanArray(const int length, bool include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
@@ -614,6 +614,8 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   return Status::OK();
 }
 
+namespace {
+
 Status AddArtificialOffsetInChildArray(ArrayData* array, int64_t offset) {
   auto& child = array->child_data[1];
   auto builder = MakeBuilder(child->type).ValueOrDie();
@@ -623,6 +625,8 @@ Status AddArtificialOffsetInChildArray(ArrayData* array, 
int64_t offset) {
   return Status::OK();
 }
 
+}  // namespace
+
 Status MakeRunEndEncoded(std::shared_ptr<RecordBatch>* out) {
   const int64_t logical_length = 10000;
   const int64_t slice_offset = 2000;
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 4238ecbf3a..cba484af15 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -1549,12 +1549,6 @@ Result<std::shared_ptr<RecordBatchWriter>> 
MakeStreamWriter(
       options, /*is_file_format=*/false);
 }
 
-Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
-    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
-    const IpcWriteOptions& options) {
-  return MakeStreamWriter(sink, schema, options);
-}
-
 Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
     io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
     const IpcWriteOptions& options,
@@ -1574,13 +1568,6 @@ Result<std::shared_ptr<RecordBatchWriter>> 
MakeFileWriter(
       schema, options, /*is_file_format=*/true);
 }
 
-Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
-    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
-    const IpcWriteOptions& options,
-    const std::shared_ptr<const KeyValueMetadata>& metadata) {
-  return MakeFileWriter(sink, schema, options, metadata);
-}
-
 namespace internal {
 
 Result<std::unique_ptr<RecordBatchWriter>> OpenRecordBatchWriter(
diff --git a/cpp/src/arrow/tensor/csx_converter.cc 
b/cpp/src/arrow/tensor/csx_converter.cc
index f30e71f5c4..679c3a0f1a 100644
--- a/cpp/src/arrow/tensor/csx_converter.cc
+++ b/cpp/src/arrow/tensor/csx_converter.cc
@@ -157,6 +157,8 @@ Status 
MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
   return Status::OK();
 }
 
+namespace {
+
 Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(
     SparseMatrixCompressedAxis axis, MemoryPool* pool,
     const std::shared_ptr<Tensor>& indptr, const std::shared_ptr<Tensor>& 
indices,
@@ -211,6 +213,8 @@ Result<std::shared_ptr<Tensor>> 
MakeTensorFromSparseCSXMatrix(
                                   dim_names);
 }
 
+}  // namespace
+
 Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSRMatrix(
     MemoryPool* pool, const SparseCSRMatrix* sparse_tensor) {
   const auto& sparse_index =
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index 40a67b40d7..2d6ba44d7e 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -128,6 +128,35 @@ struct GenerateOptions {
   double nan_probability_;
 };
 
+void GenerateFullDayMillisNoNan(uint8_t* buffer, size_t n) {
+  int64_t* data = reinterpret_cast<int64_t*>(buffer);
+  constexpr int64_t kFullDayMillis = 1000 * 60 * 60 * 24;
+  std::for_each(data, data + n, [&](int64_t& v) { return v *= kFullDayMillis; 
});
+}
+
+template <typename ArrowType, typename OptionType>
+std::shared_ptr<NumericArray<ArrowType>> GenerateNumericArray(int64_t size,
+                                                              OptionType 
options,
+                                                              int64_t 
alignment,
+                                                              MemoryPool* 
memory_pool) {
+  using CType = typename ArrowType::c_type;
+  auto type = TypeTraits<ArrowType>::type_singleton();
+  BufferVector buffers{2};
+
+  int64_t null_count = 0;
+  buffers[0] = *AllocateEmptyBitmap(size, alignment, memory_pool);
+  options.GenerateBitmap(buffers[0]->mutable_data(), size, &null_count);
+
+  buffers[1] = *AllocateBuffer(sizeof(CType) * size, alignment, memory_pool);
+  options.GenerateData(buffers[1]->mutable_data(), size);
+  if (std::is_same<ArrowType, Date64Type>::value) {
+    GenerateFullDayMillisNoNan(buffers[1]->mutable_data(), size);
+  }
+
+  auto array_data = ArrayData::Make(type, size, buffers, null_count);
+  return std::make_shared<NumericArray<ArrowType>>(array_data);
+}
+
 }  // namespace
 
 std::shared_ptr<Buffer> RandomArrayGenerator::NullBitmap(int64_t size,
@@ -176,33 +205,6 @@ std::shared_ptr<Array> 
RandomArrayGenerator::Boolean(int64_t size,
   return std::make_shared<BooleanArray>(array_data);
 }
 
-void GenerateFullDayMillisNoNan(uint8_t* buffer, size_t n) {
-  int64_t* data = reinterpret_cast<int64_t*>(buffer);
-  constexpr int64_t kFullDayMillis = 1000 * 60 * 60 * 24;
-  std::for_each(data, data + n, [&](int64_t& v) { return v *= kFullDayMillis; 
});
-}
-
-template <typename ArrowType, typename OptionType>
-static std::shared_ptr<NumericArray<ArrowType>> GenerateNumericArray(
-    int64_t size, OptionType options, int64_t alignment, MemoryPool* 
memory_pool) {
-  using CType = typename ArrowType::c_type;
-  auto type = TypeTraits<ArrowType>::type_singleton();
-  BufferVector buffers{2};
-
-  int64_t null_count = 0;
-  buffers[0] = *AllocateEmptyBitmap(size, alignment, memory_pool);
-  options.GenerateBitmap(buffers[0]->mutable_data(), size, &null_count);
-
-  buffers[1] = *AllocateBuffer(sizeof(CType) * size, alignment, memory_pool);
-  options.GenerateData(buffers[1]->mutable_data(), size);
-  if (std::is_same<ArrowType, Date64Type>::value) {
-    GenerateFullDayMillisNoNan(buffers[1]->mutable_data(), size);
-  }
-
-  auto array_data = ArrayData::Make(type, size, buffers, null_count);
-  return std::make_shared<NumericArray<ArrowType>>(array_data);
-}
-
 #define PRIMITIVE_RAND_IMPL(Name, CType, ArrowType, Distribution)              
  \
   std::shared_ptr<Array> RandomArrayGenerator::Name(                           
  \
       int64_t size, CType min, CType max, double probability, int64_t 
alignment, \
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 99e51cdbe2..2e9d860a8d 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -3226,12 +3226,6 @@ std::shared_ptr<DataType> map(std::shared_ptr<DataType> 
key_type,
                                    keys_sorted);
 }
 
-std::shared_ptr<DataType> map(std::shared_ptr<Field> key_field,
-                              std::shared_ptr<Field> item_field, bool 
keys_sorted) {
-  return std::make_shared<MapType>(std::move(key_field), std::move(item_field),
-                                   keys_sorted);
-}
-
 std::shared_ptr<DataType> fixed_size_list(std::shared_ptr<DataType> value_type,
                                           int32_t list_size) {
   return std::make_shared<FixedSizeListType>(std::move(value_type), list_size);
@@ -3289,6 +3283,8 @@ std::shared_ptr<DataType> dense_union(FieldVector 
child_fields,
   return std::make_shared<DenseUnionType>(std::move(child_fields), 
std::move(type_codes));
 }
 
+namespace {
+
 FieldVector FieldsFromArraysAndNames(std::vector<std::string> names,
                                      const ArrayVector& arrays) {
   FieldVector fields(arrays.size());
@@ -3308,6 +3304,8 @@ FieldVector 
FieldsFromArraysAndNames(std::vector<std::string> names,
   return fields;
 }
 
+}  // namespace
+
 std::shared_ptr<DataType> sparse_union(const ArrayVector& children,
                                        std::vector<std::string> field_names,
                                        std::vector<int8_t> type_codes) {
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index e623e65911..c27cfd5265 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -101,6 +101,8 @@ int64_t CountAndSetBits(const uint8_t* left_bitmap, int64_t 
left_offset,
   return count;
 }
 
+namespace {
+
 enum class TransferMode : bool { Copy, Invert };
 
 // Reverse all bits from entire byte(uint8)
@@ -213,6 +215,8 @@ void ReverseBlockOffsets(const uint8_t* data, int64_t 
offset, int64_t length,
   }
 }
 
+}  // namespace
+
 template <TransferMode mode>
 Result<std::shared_ptr<Buffer>> TransferBitmap(MemoryPool* pool, const 
uint8_t* data,
                                                int64_t offset, int64_t length,
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 1ed28d717d..661634e2c3 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -2141,11 +2141,6 @@ uint64_t GetThreadId() {
   return equiv;
 }
 
-uint64_t GetOptionalThreadId() {
-  auto tid = GetThreadId();
-  return (tid == 0) ? tid - 1 : tid;
-}
-
 // Returns the current resident set size (physical memory use) measured
 // in bytes, or zero if the value cannot be determined on this OS.
 int64_t GetCurrentRSS() {
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index af29fd636b..55bc1eeb1d 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -171,6 +171,19 @@
 
 // ----------------------------------------------------------------------
 
+// Macros to disable warnings about undeclared global functions
+#if defined(__GNUC__)
+#  define ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING \
+    _Pragma("GCC diagnostic push");                   \
+    _Pragma("GCC diagnostic ignored \"-Wmissing-declarations\"")
+#  define ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING _Pragma("GCC 
diagnostic pop")
+#else
+#  define ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING
+#  define ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING
+#endif
+
+// ----------------------------------------------------------------------
+
 // macros to disable padding
 // these macros are portable across different compilers and platforms
 
//[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
diff --git a/cpp/src/arrow/util/memory.cc b/cpp/src/arrow/util/memory.cc
index 20f5ca7aae..89e9b32ee1 100644
--- a/cpp/src/arrow/util/memory.cc
+++ b/cpp/src/arrow/util/memory.cc
@@ -29,10 +29,14 @@ inline uint8_t* pointer_logical_and(const uint8_t* address, 
uintptr_t bits) {
   return reinterpret_cast<uint8_t*>(value & bits);
 }
 
+namespace {
+
 // This function is just for avoiding MinGW-w64 32bit crash.
 // See also: https://sourceforge.net/p/mingw-w64/bugs/767/
 void* wrap_memcpy(void* dst, const void* src, size_t n) { return memcpy(dst, 
src, n); }
 
+}  // namespace
+
 void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
                       uintptr_t block_size, int num_threads) {
   // XXX This function is really using `num_threads + 1` threads.
diff --git a/cpp/src/gandiva/encrypt_utils.cc b/cpp/src/gandiva/encrypt_utils.cc
index 16c195d494..c39cf3cf0b 100644
--- a/cpp/src/gandiva/encrypt_utils.cc
+++ b/cpp/src/gandiva/encrypt_utils.cc
@@ -109,16 +109,4 @@ int32_t aes_decrypt(const char* ciphertext, int32_t 
ciphertext_len, const char*
   return plaintext_len;
 }
 
-const EVP_CIPHER* get_cipher_algo(int32_t key_length) {
-  switch (key_length) {
-    case 16:
-      return EVP_aes_128_ecb();
-    case 24:
-      return EVP_aes_192_ecb();
-    case 32:
-      return EVP_aes_256_ecb();
-    default:
-      throw std::runtime_error("unsupported key length");
-  }
-}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 95f7ed70c2..6148cfab74 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -115,10 +115,12 @@ namespace gandiva {
 extern const unsigned char kPrecompiledBitcode[];
 extern const size_t kPrecompiledBitcodeSize;
 
+namespace {
+
 std::once_flag llvm_init_once_flag;
-static bool llvm_init = false;
-static llvm::StringRef cpu_name;
-static std::vector<std::string> cpu_attrs;
+bool llvm_init = false;
+llvm::StringRef cpu_name;
+std::vector<std::string> cpu_attrs;
 std::once_flag register_exported_funcs_flag;
 
 template <typename T>
@@ -241,6 +243,29 @@ Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
   return jit;
 }
 
+arrow::Status VerifyAndLinkModule(
+    llvm::Module& dest_module,
+    llvm::Expected<std::unique_ptr<llvm::Module>> src_module_or_error) {
+  ARROW_ASSIGN_OR_RAISE(
+      auto src_ir_module,
+      AsArrowResult(src_module_or_error, "Failed to verify and link module: 
"));
+
+  src_ir_module->setDataLayout(dest_module.getDataLayout());
+
+  std::string error_info;
+  llvm::raw_string_ostream error_stream(error_info);
+  ARROW_RETURN_IF(
+      llvm::verifyModule(*src_ir_module, &error_stream),
+      Status::CodeGenError("verify of IR Module failed: " + 
error_stream.str()));
+
+  ARROW_RETURN_IF(llvm::Linker::linkModules(dest_module, 
std::move(src_ir_module)),
+                  Status::CodeGenError("failed to link IR Modules"));
+
+  return Status::OK();
+}
+
+}  // namespace
+
 Status Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
   auto cached_buffer = object_cache.getObject(nullptr);
   if (cached_buffer) {
@@ -348,27 +373,6 @@ Result<std::unique_ptr<Engine>> Engine::Make(
   return engine;
 }
 
-static arrow::Status VerifyAndLinkModule(
-    llvm::Module& dest_module,
-    llvm::Expected<std::unique_ptr<llvm::Module>> src_module_or_error) {
-  ARROW_ASSIGN_OR_RAISE(
-      auto src_ir_module,
-      AsArrowResult(src_module_or_error, "Failed to verify and link module: 
"));
-
-  src_ir_module->setDataLayout(dest_module.getDataLayout());
-
-  std::string error_info;
-  llvm::raw_string_ostream error_stream(error_info);
-  ARROW_RETURN_IF(
-      llvm::verifyModule(*src_ir_module, &error_stream),
-      Status::CodeGenError("verify of IR Module failed: " + 
error_stream.str()));
-
-  ARROW_RETURN_IF(llvm::Linker::linkModules(dest_module, 
std::move(src_ir_module)),
-                  Status::CodeGenError("failed to link IR Modules"));
-
-  return Status::OK();
-}
-
 llvm::Module* Engine::module() {
   DCHECK(!module_finalized_) << "module cannot be accessed after finalized";
   return module_.get();
diff --git a/cpp/src/gandiva/function_registry.cc 
b/cpp/src/gandiva/function_registry.cc
index 0955a2e47f..8329c3e12b 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -32,7 +32,9 @@
 
 namespace gandiva {
 
-static constexpr uint32_t kMaxFunctionSignatures = 2048;
+namespace {
+
+constexpr uint32_t kMaxFunctionSignatures = 2048;
 
 // encapsulates an llvm memory buffer in an arrow buffer
 // this is needed because we don't expose the llvm memory buffer to the 
outside world in
@@ -48,6 +50,20 @@ class LLVMMemoryArrowBuffer : public arrow::Buffer {
   std::unique_ptr<llvm::MemoryBuffer> llvm_buffer_;
 };
 
+arrow::Result<std::unique_ptr<llvm::MemoryBuffer>> GetBufferFromFile(
+    const std::string& bitcode_file_path) {
+  auto buffer_or_error = llvm::MemoryBuffer::getFile(bitcode_file_path);
+
+  ARROW_RETURN_IF(!buffer_or_error,
+                  Status::IOError("Could not load module from bitcode file: ",
+                                  bitcode_file_path +
+                                      " Error: " + 
buffer_or_error.getError().message()));
+
+  return std::move(buffer_or_error.get());
+}
+
+}  // namespace
+
 FunctionRegistry::FunctionRegistry() { 
pc_registry_.reserve(kMaxFunctionSignatures); }
 
 FunctionRegistry::iterator FunctionRegistry::begin() const {
@@ -81,18 +97,6 @@ Status FunctionRegistry::Add(NativeFunction func) {
   return arrow::Status::OK();
 }
 
-arrow::Result<std::unique_ptr<llvm::MemoryBuffer>> GetBufferFromFile(
-    const std::string& bitcode_file_path) {
-  auto buffer_or_error = llvm::MemoryBuffer::getFile(bitcode_file_path);
-
-  ARROW_RETURN_IF(!buffer_or_error,
-                  Status::IOError("Could not load module from bitcode file: ",
-                                  bitcode_file_path +
-                                      " Error: " + 
buffer_or_error.getError().message()));
-
-  return std::move(buffer_or_error.get());
-}
-
 Status FunctionRegistry::Register(const std::vector<NativeFunction>& funcs,
                                   const std::string& bitcode_path) {
   ARROW_ASSIGN_OR_RAISE(auto llvm_buffer, GetBufferFromFile(bitcode_path));
diff --git a/cpp/src/gandiva/function_signature.cc 
b/cpp/src/gandiva/function_signature.cc
index 43064b6686..136afca2d9 100644
--- a/cpp/src/gandiva/function_signature.cc
+++ b/cpp/src/gandiva/function_signature.cc
@@ -35,6 +35,8 @@ using arrow::internal::hash_combine;
 
 namespace gandiva {
 
+namespace {
+
 bool DataTypeEquals(const DataTypePtr& left, const DataTypePtr& right) {
   if (left->id() == right->id()) {
     switch (left->id()) {
@@ -53,6 +55,8 @@ bool DataTypeEquals(const DataTypePtr& left, const 
DataTypePtr& right) {
   }
 }
 
+}  // namespace
+
 FunctionSignature::FunctionSignature(std::string base_name, DataTypeVector 
param_types,
                                      DataTypePtr ret_type)
     : base_name_(std::move(base_name)),
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc 
b/cpp/src/gandiva/gdv_function_stubs.cc
index 76bbdb902c..dff15e6fd2 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -41,6 +41,8 @@
 
 extern "C" {
 
+ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING
+
 static char mask_array[256] = {
     (char)0,  (char)1,  (char)2,  (char)3,   (char)4,   (char)5,   (char)6,   
(char)7,
     (char)8,  (char)9,  (char)10, (char)11,  (char)12,  (char)13,  (char)14,  
(char)15,
@@ -843,6 +845,8 @@ const char* gdv_mask_show_last_n_utf8_int32(int64_t 
context, const char* data,
   int32_t n_to_mask = num_of_chars - n_to_show;
   return gdv_mask_first_n_utf8_int32(context, data, data_len, n_to_mask, 
out_len);
 }
+
+ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING
 }
 
 namespace gandiva {
diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc 
b/cpp/src/gandiva/gdv_hash_function_stubs.cc
index aac70a06be..41eef32445 100644
--- a/cpp/src/gandiva/gdv_hash_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc
@@ -27,6 +27,8 @@
 
 extern "C" {
 
+ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING
+
 #define MD5_HASH_FUNCTION(TYPE)                                                
    \
   GANDIVA_EXPORT                                                               
    \
   const char* gdv_fn_md5_##TYPE(int64_t context, gdv_##TYPE value, bool 
validity,  \
@@ -212,6 +214,8 @@ const char* gdv_fn_sha1_decimal128(int64_t context, int64_t 
x_high, uint64_t x_l
   const gandiva::BasicDecimal128 decimal_128(x_high, x_low);
   return gandiva::gdv_sha1_hash(context, decimal_128.ToBytes().data(), 16, 
out_length);
 }
+
+ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING
 }
 
 namespace gandiva {
diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc 
b/cpp/src/gandiva/gdv_string_function_stubs.cc
index 42af8fde61..855bccb818 100644
--- a/cpp/src/gandiva/gdv_string_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_string_function_stubs.cc
@@ -35,6 +35,8 @@
 
 extern "C" {
 
+ARROW_SUPPRESS_MISSING_DECLARATIONS_WARNING
+
 bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
                            const char* pattern, int pattern_len) {
   gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
@@ -755,6 +757,8 @@ const char* translate_utf8_utf8_utf8(int64_t context, const 
char* in, int32_t in
   *out_len = result_len;
   return result;
 }
+
+ARROW_UNSUPPRESS_MISSING_DECLARATIONS_WARNING
 }
 
 namespace gandiva {
diff --git a/cpp/src/gandiva/regex_functions_holder.cc 
b/cpp/src/gandiva/regex_functions_holder.cc
index ef07a9ef0b..1c9e44d61b 100644
--- a/cpp/src/gandiva/regex_functions_holder.cc
+++ b/cpp/src/gandiva/regex_functions_holder.cc
@@ -23,11 +23,15 @@
 
 namespace gandiva {
 
+namespace {
+
 std::string& RemovePatternEscapeChars(const FunctionNode& node, std::string& 
pattern) {
   pattern.erase(std::remove(pattern.begin(), pattern.end(), '\\'), 
pattern.end());
   return pattern;
 }
 
+}  // namespace
+
 // Short-circuit pattern matches for the following common sub cases :
 // - starts_with, ends_with and is_substr
 const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index d9a2b98914..d42fdc5034 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -1387,11 +1387,6 @@ Result<std::unique_ptr<FileReader>> 
FileReaderBuilder::Build() {
   return out;
 }
 
-Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile> file, 
MemoryPool* pool,
-                std::unique_ptr<FileReader>* reader) {
-  return OpenFile(std::move(file), pool).Value(reader);
-}
-
 Result<std::unique_ptr<FileReader>> OpenFile(
     std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool) {
   FileReaderBuilder builder;
@@ -1401,6 +1396,8 @@ Result<std::unique_ptr<FileReader>> OpenFile(
 
 namespace internal {
 
+namespace {
+
 Status FuzzReader(std::unique_ptr<FileReader> reader) {
   auto st = Status::OK();
   for (int i = 0; i < reader->num_row_groups(); ++i) {
@@ -1414,6 +1411,8 @@ Status FuzzReader(std::unique_ptr<FileReader> reader) {
   return st;
 }
 
+}  // namespace
+
 Status FuzzReader(const uint8_t* data, int64_t size) {
   auto buffer = std::make_shared<::arrow::Buffer>(data, size);
   Status st;
diff --git a/cpp/src/parquet/arrow/schema_internal.cc 
b/cpp/src/parquet/arrow/schema_internal.cc
index 2b9576bb43..72b8f0d992 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -37,6 +37,8 @@ using ::arrow::Result;
 using ::arrow::Status;
 using ::arrow::internal::checked_cast;
 
+namespace {
+
 Result<std::shared_ptr<ArrowType>> MakeArrowDecimal(const LogicalType& 
logical_type) {
   const auto& decimal = checked_cast<const DecimalLogicalType&>(logical_type);
   if (decimal.precision() <= ::arrow::Decimal128Type::kMaxPrecision) {
@@ -208,6 +210,8 @@ Result<std::shared_ptr<ArrowType>> FromFLBA(
   }
 }
 
+}  // namespace
+
 ::arrow::Result<std::shared_ptr<ArrowType>> FromInt32(const LogicalType& 
logical_type) {
   switch (logical_type.type()) {
     case LogicalType::Type::INT:
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index b40a32eaf8..4b2b06e5e0 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -107,6 +107,33 @@ bool HasNullableRoot(const SchemaManifest& schema_manifest,
   return nullable;
 }
 
+Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* 
pool,
+                         const ArrowWriterProperties& properties,
+                         std::shared_ptr<const KeyValueMetadata>* out) {
+  if (!properties.store_schema()) {
+    *out = nullptr;
+    return Status::OK();
+  }
+
+  static const std::string kArrowSchemaKey = "ARROW:schema";
+  std::shared_ptr<KeyValueMetadata> result;
+  if (schema.metadata()) {
+    result = schema.metadata()->Copy();
+  } else {
+    result = ::arrow::key_value_metadata({}, {});
+  }
+
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> serialized,
+                        ::arrow::ipc::SerializeSchema(schema, pool));
+
+  // The serialized schema is not UTF-8, which is required for Thrift
+  std::string schema_as_string = serialized->ToString();
+  std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string);
+  result->Append(kArrowSchemaKey, std::move(schema_base64));
+  *out = std::move(result);
+  return Status::OK();
+}
+
 // Manages writing nested parquet columns with support for all nested types
 // supported by parquet.
 class ArrowColumnWriterV2 {
@@ -522,33 +549,6 @@ Status FileWriter::Make(::arrow::MemoryPool* pool,
   return Status::OK();
 }
 
-Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* 
pool,
-                         const ArrowWriterProperties& properties,
-                         std::shared_ptr<const KeyValueMetadata>* out) {
-  if (!properties.store_schema()) {
-    *out = nullptr;
-    return Status::OK();
-  }
-
-  static const std::string kArrowSchemaKey = "ARROW:schema";
-  std::shared_ptr<KeyValueMetadata> result;
-  if (schema.metadata()) {
-    result = schema.metadata()->Copy();
-  } else {
-    result = ::arrow::key_value_metadata({}, {});
-  }
-
-  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> serialized,
-                        ::arrow::ipc::SerializeSchema(schema, pool));
-
-  // The serialized schema is not UTF-8, which is required for Thrift
-  std::string schema_as_string = serialized->ToString();
-  std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string);
-  result->Append(kArrowSchemaKey, std::move(schema_base64));
-  *out = std::move(result);
-  return Status::OK();
-}
-
 Result<std::unique_ptr<FileWriter>> FileWriter::Open(
     const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
     std::shared_ptr<::arrow::io::OutputStream> sink,
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index a56faa3aae..288a656d20 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1211,6 +1211,8 @@ inline void DoInBatches(const int16_t* def_levels, const 
int16_t* rep_levels,
   }
 }
 
+namespace {
+
 bool DictionaryDirectWriteSupported(const ::arrow::Array& array) {
   DCHECK_EQ(array.type_id(), ::arrow::Type::DICTIONARY);
   const ::arrow::DictionaryType& dict_type =
@@ -1231,6 +1233,8 @@ Status ConvertDictionaryToDense(const ::arrow::Array& 
array, MemoryPool* pool,
   return Status::OK();
 }
 
+}  // namespace
+
 template <typename ParquetType>
 class TypedColumnWriterImpl : public ColumnWriterImpl,
                               public TypedColumnWriter<ParquetType> {
diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc 
b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
index 2448d9efa9..2450f8654d 100644
--- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc
+++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
@@ -20,11 +20,15 @@
 
 namespace parquet::encryption {
 
+namespace {
+
 void ThrowOpenSSLRequiredException() {
   throw ParquetException(
       "Calling encryption method in Arrow/Parquet built without OpenSSL");
 }
 
+}  // namespace
+
 class AesEncryptor::AesEncryptorImpl {};
 
 AesEncryptor::~AesEncryptor() {}
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index a7f50162da..b246feaf73 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -165,6 +165,8 @@ std::unique_ptr<PageReader> 
RowGroupReader::GetColumnPageReader(int i) {
 // Returns the rowgroup metadata
 const RowGroupMetaData* RowGroupReader::metadata() const { return 
contents_->metadata(); }
 
+namespace {
+
 /// Compute the section of the file that should be read for the given
 /// row group and column chunk.
 ::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
@@ -206,6 +208,8 @@ const RowGroupMetaData* RowGroupReader::metadata() const { 
return contents_->met
   return {col_start, col_length};
 }
 
+}  // namespace
+
 // RowGroupReader::Contents implementation for the Parquet file specification
 class SerializedRowGroup : public RowGroupReader::Contents {
  public:
diff --git a/cpp/src/parquet/level_comparison.cc 
b/cpp/src/parquet/level_comparison.cc
index 199e137cea..f8d972e6c6 100644
--- a/cpp/src/parquet/level_comparison.cc
+++ b/cpp/src/parquet/level_comparison.cc
@@ -17,6 +17,10 @@
 
 #include "parquet/level_comparison.h"
 
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+#  include "parquet/level_comparison_avx2_internal.h"
+#endif
+
 #define PARQUET_IMPL_NAMESPACE standard
 #include "parquet/level_comparison_inc.h"
 #undef PARQUET_IMPL_NAMESPACE
@@ -27,11 +31,6 @@
 
 namespace parquet::internal {
 
-#if defined(ARROW_HAVE_RUNTIME_AVX2)
-MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels);
-uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, 
int16_t rhs);
-#endif
-
 namespace {
 
 using ::arrow::internal::DispatchLevel;
diff --git a/cpp/src/parquet/level_comparison_avx2.cc 
b/cpp/src/parquet/level_comparison_avx2.cc
index b33eb2e295..db8935d403 100644
--- a/cpp/src/parquet/level_comparison_avx2.cc
+++ b/cpp/src/parquet/level_comparison_avx2.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "parquet/level_comparison_avx2_internal.h"
+
 #define PARQUET_IMPL_NAMESPACE avx2
 #include "parquet/level_comparison_inc.h"
 #undef PARQUET_IMPL_NAMESPACE
diff --git a/cpp/src/parquet/level_comparison_avx2.cc 
b/cpp/src/parquet/level_comparison_avx2_internal.h
similarity index 73%
copy from cpp/src/parquet/level_comparison_avx2.cc
copy to cpp/src/parquet/level_comparison_avx2_internal.h
index b33eb2e295..3f68a6ff05 100644
--- a/cpp/src/parquet/level_comparison_avx2.cc
+++ b/cpp/src/parquet/level_comparison_avx2_internal.h
@@ -15,20 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#define PARQUET_IMPL_NAMESPACE avx2
-#include "parquet/level_comparison_inc.h"
-#undef PARQUET_IMPL_NAMESPACE
+#pragma once
 
-namespace parquet {
-namespace internal {
+#include <cstdint>
 
-uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, 
int16_t rhs) {
-  return avx2::GreaterThanBitmapImpl(levels, num_levels, rhs);
-}
+#include "parquet/level_comparison.h"
 
-MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels) {
-  return avx2::FindMinMaxImpl(levels, num_levels);
-}
+namespace parquet::internal {
 
-}  // namespace internal
-}  // namespace parquet
+MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels);
+uint64_t GreaterThanBitmapAvx2(const int16_t* levels, int64_t num_levels, 
int16_t rhs);
+
+}  // namespace parquet::internal
diff --git a/cpp/src/parquet/level_conversion.cc 
b/cpp/src/parquet/level_conversion.cc
index 1271afd866..c84007c5cd 100644
--- a/cpp/src/parquet/level_conversion.cc
+++ b/cpp/src/parquet/level_conversion.cc
@@ -14,6 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 #include "parquet/level_conversion.h"
 
 #include <algorithm>
@@ -27,6 +28,10 @@
 #include "parquet/exception.h"
 
 #include "parquet/level_comparison.h"
+#if defined(ARROW_HAVE_RUNTIME_BMI2)
+#  include "parquet/level_conversion_bmi2_internal.h"
+#endif
+
 #define PARQUET_IMPL_NAMESPACE standard
 #include "parquet/level_conversion_inc.h"
 #undef PARQUET_IMPL_NAMESPACE
@@ -123,13 +128,6 @@ void DefRepLevelsToListInfo(const int16_t* def_levels, 
const int16_t* rep_levels
 
 }  // namespace
 
-#if defined(ARROW_HAVE_RUNTIME_BMI2)
-// defined in level_conversion_bmi2.cc for dynamic dispatch.
-void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels,
-                                             int64_t num_def_levels, LevelInfo 
level_info,
-                                             ValidityBitmapInputOutput* 
output);
-#endif
-
 void DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
                        LevelInfo level_info, ValidityBitmapInputOutput* 
output) {
   // It is simpler to rely on rep_level here until PARQUET-1899 is done and 
the code
diff --git a/cpp/src/parquet/level_conversion_bmi2.cc 
b/cpp/src/parquet/level_conversion_bmi2.cc
index a39d1fd1eb..faa88e81c6 100644
--- a/cpp/src/parquet/level_conversion_bmi2.cc
+++ b/cpp/src/parquet/level_conversion_bmi2.cc
@@ -14,7 +14,8 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#include "parquet/level_conversion.h"
+
+#include "parquet/level_conversion_bmi2_internal.h"
 
 #define PARQUET_IMPL_NAMESPACE bmi2
 #include "parquet/level_conversion_inc.h"
diff --git a/cpp/src/parquet/level_conversion_bmi2.cc 
b/cpp/src/parquet/level_conversion_bmi2_internal.h
similarity index 79%
copy from cpp/src/parquet/level_conversion_bmi2.cc
copy to cpp/src/parquet/level_conversion_bmi2_internal.h
index a39d1fd1eb..81cc8c29d4 100644
--- a/cpp/src/parquet/level_conversion_bmi2.cc
+++ b/cpp/src/parquet/level_conversion_bmi2_internal.h
@@ -14,18 +14,17 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#include "parquet/level_conversion.h"
 
-#define PARQUET_IMPL_NAMESPACE bmi2
-#include "parquet/level_conversion_inc.h"
-#undef PARQUET_IMPL_NAMESPACE
+#pragma once
+
+#include <cstdint>
+
+#include "parquet/level_conversion.h"
 
 namespace parquet::internal {
+
 void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels,
                                              int64_t num_def_levels, LevelInfo 
level_info,
-                                             ValidityBitmapInputOutput* 
output) {
-  bmi2::DefLevelsToBitmapSimd</*has_repeated_parent=*/true>(def_levels, 
num_def_levels,
-                                                            level_info, 
output);
-}
+                                             ValidityBitmapInputOutput* 
output);
 
 }  // namespace parquet::internal
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 22e3fd4d05..30d69f4db5 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -111,7 +111,9 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
       metadata.statistics.__isset.null_count, 
metadata.statistics.__isset.distinct_count);
 }
 
-static std::shared_ptr<geospatial::GeoStatistics> MakeColumnGeometryStats(
+namespace {
+
+std::shared_ptr<geospatial::GeoStatistics> MakeColumnGeometryStats(
     const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
   if (metadata.__isset.geospatial_statistics) {
     geospatial::EncodedGeoStatistics encoded_geo_stats =
@@ -186,6 +188,8 @@ void ToThriftKeyValueMetadata(const KeyValueMetadata& 
source, Metadata* metadata
   metadata->__set_key_value_metadata(std::move(key_value_metadata));
 }
 
+}  // namespace
+
 // MetaData Accessor
 
 // ColumnCryptoMetaData
diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc
index 15dd49dd16..59e8935e14 100644
--- a/cpp/src/parquet/printer.cc
+++ b/cpp/src/parquet/printer.cc
@@ -56,14 +56,6 @@ void PrintPageEncodingStats(std::ostream& stream,
   }
 }
 
-}  // namespace
-
-// ----------------------------------------------------------------------
-// ParquetFilePrinter::DebugPrint
-
-// the fixed initial size is just for an example
-#define COL_WIDTH 30
-
 void PutChars(std::ostream& stream, char c, int n) {
   for (int i = 0; i < n; ++i) {
     stream.put(c);
@@ -83,6 +75,14 @@ void PrintKeyValueMetadata(std::ostream& stream,
   }
 }
 
+// the fixed initial size is just for an example
+constexpr int kColWidth = 30;
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// ParquetFilePrinter::DebugPrint
+
 void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> 
selected_columns,
                                     bool print_values, bool format_dump,
                                     bool print_key_value_metadata, const char* 
filename) {
@@ -196,7 +196,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, 
std::list<int> selecte
     }
     stream << "--- Values ---\n";
 
-    static constexpr int bufsize = COL_WIDTH + 1;
+    static constexpr int bufsize = kColWidth + 1;
     char buffer[bufsize];
 
     // Create readers for selected columns and print contents
@@ -217,7 +217,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, 
std::list<int> selecte
         continue;
       }
 
-      snprintf(buffer, bufsize, "%-*s", COL_WIDTH,
+      snprintf(buffer, bufsize, "%-*s", kColWidth,
                file_metadata->schema()->Column(i)->name().c_str());
       stream << buffer << '|';
     }
@@ -232,7 +232,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, 
std::list<int> selecte
       for (const auto& scanner : scanners) {
         if (scanner->HasNext()) {
           hasRow = true;
-          scanner->PrintNext(stream, COL_WIDTH);
+          scanner->PrintNext(stream, kColWidth);
           stream << '|';
         }
       }


Reply via email to