This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new baf97fd12a GH-46157: [C++] Move test utility RunEndEncodeTableColumns 
that uses REE to test_util_internal on acero instead of common gtest_util 
(#46161)
baf97fd12a is described below

commit baf97fd12ac0254b3aef8e8329a3373050bcb8f1
Author: Raúl Cumplido <raulcumpl...@gmail.com>
AuthorDate: Wed Apr 30 09:38:59 2025 +0200

    GH-46157: [C++] Move test utility RunEndEncodeTableColumns that uses REE to 
test_util_internal on acero instead of common gtest_util (#46161)
    
    ### Rationale for this change
    
    When compiling without `ARROW_COMPUTE` we should not require the run end 
encode kernel to be available. The current test utility 
`RunEndEncodeTableColumns` should not be on gtest_util as this is going to be 
compiled whether `ARROW_COMPUTE` flag is enabled or not.
    
    ### What changes are included in this PR?
    
    Move the `RunEndEncodeTableColumns` utility that requires REE to be 
available to `arrow/acero/test_util_internal` as is only used there, move 
corresponding test to its own test module and remove the requirement for it to 
be exported with `ARROW_TESTING_EXPORT`
    
    ### Are these changes tested?
    
    Yes, on CI and tested locally that `ARROW_COMPUTE=ON` and 
`ARROW_COMPUTE=OFF` behave as expected.
    
    ### Are there any user-facing changes?
    
    If someone was using the `RunEndEncodeTableColumns` from `libarrow_testing` 
this is not being exported anymore.
    
    * GitHub Issue: #46157
    
    Lead-authored-by: Raúl Cumplido <raulcumpl...@gmail.com>
    Co-authored-by: Antoine Pitrou <pit...@free.fr>
    Signed-off-by: Raúl Cumplido <raulcumpl...@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                   |  1 +
 cpp/src/arrow/acero/CMakeLists.txt             |  2 ++
 cpp/src/arrow/acero/test_util_internal.cc      | 26 +++++++++++++++++
 cpp/src/arrow/acero/test_util_internal.h       |  4 +++
 cpp/src/arrow/acero/test_util_internal_test.cc | 39 ++++++++++++++++++++++++++
 cpp/src/arrow/testing/gtest_util.cc            | 26 -----------------
 cpp/src/arrow/testing/gtest_util.h             |  4 ---
 cpp/src/arrow/testing/gtest_util_test.cc       | 13 ---------
 8 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index a4f67408bf..0168e93626 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -785,6 +785,7 @@ if(ARROW_COMPUTE)
   append_runtime_avx2_src(ARROW_COMPUTE_SRCS 
compute/row/compare_internal_avx2.cc)
   append_runtime_avx2_src(ARROW_COMPUTE_SRCS 
compute/row/encode_internal_avx2.cc)
   append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
+
 endif()
 
 arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS})
diff --git a/cpp/src/arrow/acero/CMakeLists.txt 
b/cpp/src/arrow/acero/CMakeLists.txt
index 8034ea2570..c3b08af84e 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -181,6 +181,8 @@ add_arrow_acero_test(aggregate_node_test SOURCES 
aggregate_node_test.cc)
 add_arrow_acero_test(util_test SOURCES util_test.cc task_util_test.cc)
 add_arrow_acero_test(hash_aggregate_test SOURCES hash_aggregate_test.cc)
 
+add_arrow_acero_test(test_util_internal_test SOURCES 
test_util_internal_test.cc)
+
 if(ARROW_BUILD_BENCHMARKS)
   function(add_arrow_acero_benchmark REL_BENCHMARK_NAME)
     set(options)
diff --git a/cpp/src/arrow/acero/test_util_internal.cc 
b/cpp/src/arrow/acero/test_util_internal.cc
index 2748d4107e..3e27917581 100644
--- a/cpp/src/arrow/acero/test_util_internal.cc
+++ b/cpp/src/arrow/acero/test_util_internal.cc
@@ -613,5 +613,31 @@ Result<std::shared_ptr<Table>> MakeRandomTimeSeriesTable(
   return Table::Make(schema, columns, num_rows);
 }
 
+Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
+    const Table& table, const std::vector<int>& column_indices) {
+  const int num_columns = table.num_columns();
+  std::vector<std::shared_ptr<ChunkedArray>> encoded_columns;
+  encoded_columns.reserve(num_columns);
+  std::vector<std::shared_ptr<Field>> encoded_fields;
+  encoded_fields.reserve(num_columns);
+  for (int i = 0; i < num_columns; i++) {
+    const auto& field = table.schema()->field(i);
+    if (std::find(column_indices.begin(), column_indices.end(), i) !=
+        column_indices.end()) {
+      ARROW_ASSIGN_OR_RAISE(auto run_end_encoded,
+                            arrow::compute::RunEndEncode(table.column(i)));
+      ARROW_DCHECK_EQ(run_end_encoded.kind(), Datum::CHUNKED_ARRAY);
+      encoded_columns.push_back(run_end_encoded.chunked_array());
+      auto encoded_type = arrow::run_end_encoded(arrow::int32(), 
field->type());
+      encoded_fields.push_back(field->WithType(encoded_type));
+    } else {
+      encoded_columns.push_back(table.column(i));
+      encoded_fields.push_back(field);
+    }
+  }
+  auto updated_schema = arrow::schema(std::move(encoded_fields));
+  return Table::Make(std::move(updated_schema), std::move(encoded_columns));
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/test_util_internal.h 
b/cpp/src/arrow/acero/test_util_internal.h
index 2367524a56..5fab9bfcd4 100644
--- a/cpp/src/arrow/acero/test_util_internal.h
+++ b/cpp/src/arrow/acero/test_util_internal.h
@@ -30,6 +30,7 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/kernel.h"
+#include "arrow/table.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/pcg_random.h"
@@ -195,4 +196,7 @@ struct TableGenerationProperties {
 Result<std::shared_ptr<Table>> MakeRandomTimeSeriesTable(
     const TableGenerationProperties& properties);
 
+Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
+    const Table& table, const std::vector<int>& column_indices);
+
 }  // namespace arrow::acero
diff --git a/cpp/src/arrow/acero/test_util_internal_test.cc 
b/cpp/src/arrow/acero/test_util_internal_test.cc
new file mode 100644
index 0000000000..7a42b9d172
--- /dev/null
+++ b/cpp/src/arrow/acero/test_util_internal_test.cc
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/acero/test_util_internal.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow::acero {
+
+TEST(RunEndEncodeTableColumnsTest, SchemaTypeIsModified) {
+  std::shared_ptr<Table> table =
+      arrow::TableFromJSON(arrow::schema({arrow::field("col", 
arrow::utf8())}), {R"([
+            {"col": "a"},
+            {"col": "b"},
+            {"col": "c"},
+            {"col": "d"}
+          ])"});
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> ree_table,
+                       RunEndEncodeTableColumns(*table, {0}));
+  ASSERT_OK(ree_table->ValidateFull());
+  ASSERT_TRUE(ree_table->schema()->field(0)->type()->Equals(
+      arrow::run_end_encoded(arrow::int32(), arrow::utf8())));
+}
+}  // namespace arrow::acero
diff --git a/cpp/src/arrow/testing/gtest_util.cc 
b/cpp/src/arrow/testing/gtest_util.cc
index 9eeca32e72..061956ab1a 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -47,7 +47,6 @@
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
-#include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
 #include "arrow/extension/json.h"
 #include "arrow/io/memory.h"
@@ -473,31 +472,6 @@ std::shared_ptr<Tensor> TensorFromJSON(const 
std::shared_ptr<DataType>& type,
   return *Tensor::Make(type, array->data()->buffers[1], shape, strides, 
dim_names);
 }
 
-Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
-    const Table& table, const std::vector<int>& column_indices) {
-  const int num_columns = table.num_columns();
-  std::vector<std::shared_ptr<ChunkedArray>> encoded_columns;
-  encoded_columns.reserve(num_columns);
-  std::vector<std::shared_ptr<Field>> encoded_fields;
-  encoded_fields.reserve(num_columns);
-  for (int i = 0; i < num_columns; i++) {
-    const auto& field = table.schema()->field(i);
-    if (std::find(column_indices.begin(), column_indices.end(), i) !=
-        column_indices.end()) {
-      ARROW_ASSIGN_OR_RAISE(auto run_end_encoded, 
compute::RunEndEncode(table.column(i)));
-      DCHECK_EQ(run_end_encoded.kind(), Datum::CHUNKED_ARRAY);
-      encoded_columns.push_back(run_end_encoded.chunked_array());
-      auto encoded_type = arrow::run_end_encoded(arrow::int32(), 
field->type());
-      encoded_fields.push_back(field->WithType(encoded_type));
-    } else {
-      encoded_columns.push_back(table.column(i));
-      encoded_fields.push_back(field);
-    }
-  }
-  auto updated_schema = arrow::schema(std::move(encoded_fields));
-  return Table::Make(std::move(updated_schema), std::move(encoded_columns));
-}
-
 Result<std::optional<std::string>> PrintArrayDiff(const ChunkedArray& expected,
                                                   const ChunkedArray& actual) {
   if (actual.Equals(expected)) {
diff --git a/cpp/src/arrow/testing/gtest_util.h 
b/cpp/src/arrow/testing/gtest_util.h
index 2ff4b72af4..29bb52c216 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -373,10 +373,6 @@ std::shared_ptr<Tensor> TensorFromJSON(const 
std::shared_ptr<DataType>& type,
                                        const std::vector<int64_t>& strides = 
{},
                                        const std::vector<std::string>& 
dim_names = {});
 
-ARROW_TESTING_EXPORT
-Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
-    const Table& table, const std::vector<int>& column_indices);
-
 // Given an array, return a new identical array except for one validity bit
 // set to a new value.
 // This is useful to force the underlying "value" of null entries to otherwise
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc 
b/cpp/src/arrow/testing/gtest_util_test.cc
index 8dc496fa50..4ad5159a6b 100644
--- a/cpp/src/arrow/testing/gtest_util_test.cc
+++ b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -292,17 +292,4 @@ TEST(AssertTestWithinUlp, Basics) {
   EXPECT_FATAL_FAILURE(AssertWithinUlp(123.456f, 123.456085f, 10), "not within 
10 ulps");
 }
 
-TEST(RunEndEncodeGtestUtilTest, SchemaTypeIsModified) {
-  std::shared_ptr<Table> table =
-      arrow::TableFromJSON(arrow::schema({arrow::field("col", 
arrow::utf8())}), {R"([
-      {"col": "a"},
-      {"col": "b"},
-      {"col": "c"},
-      {"col": "d"}
-    ])"});
-  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> ree_table,
-                       RunEndEncodeTableColumns(*table, {0}));
-  ASSERT_TRUE(ree_table->schema()->field(0)->type()->Equals(
-      arrow::run_end_encoded(arrow::int32(), arrow::utf8())));
-}
 }  // namespace arrow

Reply via email to