This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7aff9d572d GH-41558: [C++] Improve fixed_width_test_util.h  (#41575)
7aff9d572d is described below

commit 7aff9d572d57c3e29702db9b1511c00f04926007
Author: Hyunseok Seo <[email protected]>
AuthorDate: Sat May 18 07:34:08 2024 +0900

    GH-41558: [C++] Improve fixed_width_test_util.h  (#41575)
    
    
    
    ### Rationale for this change
    
    Improve the `fixed_width_test_util.h`.
    
    ### What changes are included in this PR?
    
    - Move the `fixed_width_test_util.h` to `arrow/testing`
    - Divide the `fixed_width_test_util` to `.cc` and `.h`
    - Remove unused headers
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #41558
    
    Authored-by: Hyunseok Seo <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt                       |   1 +
 cpp/src/arrow/compute/kernels/test_util.cc         |   1 +
 .../arrow/compute/kernels/vector_selection_test.cc |   2 +-
 cpp/src/arrow/testing/fixed_width_test_util.cc     | 181 ++++++++++++++++++
 cpp/src/arrow/testing/fixed_width_test_util.h      |  76 ++++++++
 cpp/src/arrow/util/fixed_width_test_util.h         | 203 ---------------------
 6 files changed, 260 insertions(+), 204 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 0f4824ec99..57a0b383a6 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -637,6 +637,7 @@ endif()
 set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
+    testing/fixed_width_test_util.cc
     testing/gtest_util.cc
     testing/random.cc
     testing/generator.cc
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc 
b/cpp/src/arrow/compute/kernels/test_util.cc
index 23d0fd18d5..2217787663 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -31,6 +31,7 @@
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/table.h"
+#include "arrow/testing/fixed_width_test_util.h"
 #include "arrow/testing/gtest_util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc 
b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index 4c7d85b103..6261fa2dae 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -30,10 +30,10 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/table.h"
 #include "arrow/testing/builder.h"
+#include "arrow/testing/fixed_width_test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
-#include "arrow/util/fixed_width_test_util.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/testing/fixed_width_test_util.cc 
b/cpp/src/arrow/testing/fixed_width_test_util.cc
new file mode 100644
index 0000000000..9c305ed1df
--- /dev/null
+++ b/cpp/src/arrow/testing/fixed_width_test_util.cc
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/testing/fixed_width_test_util.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow::util::internal {
+
+namespace {
+template <typename ArrowType>
+inline Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) {
+  using NumericBuilder = ::arrow::NumericBuilder<ArrowType>;
+  using value_type = typename NumericBuilder::value_type;
+  auto* numeric_builder = 
::arrow::internal::checked_cast<NumericBuilder*>(builder);
+  auto cast_next_value =
+      static_cast<value_type>(*next_value % 
std::numeric_limits<value_type>::max());
+  RETURN_NOT_OK(numeric_builder->Append(cast_next_value));
+  *next_value += 1;
+  return Status::OK();
+}
+}  // namespace
+
+std::shared_ptr<DataType> NestedListGenerator::NestedFSLType(
+    const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
sizes) {
+  auto type = inner_type;
+  for (auto it = sizes.rbegin(); it != sizes.rend(); it++) {
+    type = fixed_size_list(type, *it);
+  }
+  return type;
+}
+
+std::shared_ptr<DataType> NestedListGenerator::NestedListType(
+    const std::shared_ptr<DataType>& inner_type, size_t depth) {
+  auto list_type = list(inner_type);
+  for (size_t i = 1; i < depth; i++) {
+    list_type = list(std::move(list_type));
+  }
+  return list_type;
+}
+
+Result<std::shared_ptr<Array>> NestedListGenerator::NestedFSLArray(
+    const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
+    int64_t length) {
+  auto nested_type = NestedFSLType(inner_type, list_sizes);
+  ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type));
+  return NestedListArray(builder.get(), list_sizes, length);
+}
+
+Result<std::shared_ptr<Array>> NestedListGenerator::NestedListArray(
+    const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
+    int64_t length) {
+  auto nested_type = NestedListType(inner_type, list_sizes.size());
+  ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type));
+  return NestedListArray(builder.get(), list_sizes, length);
+}
+
+void NestedListGenerator::VisitAllNestedListConfigurations(
+    const std::vector<std::shared_ptr<DataType>>& inner_value_types,
+    const std::function<void(const std::shared_ptr<DataType>&, const 
std::vector<int>&)>&
+        visit,
+    int max_depth, int max_power_of_2_size) {
+  for (int depth = 1; depth <= max_depth; depth++) {
+    for (auto& type : inner_value_types) {
+      assert(is_fixed_width(*type));
+      int value_width = type->byte_width();
+
+      std::vector<int> list_sizes;  // stack of list sizes
+      auto pop = [&]() {            // pop the list_sizes stack
+        assert(!list_sizes.empty());
+        value_width /= list_sizes.back();
+        list_sizes.pop_back();
+      };
+      auto next = [&]() {  // double the top of the stack
+        assert(!list_sizes.empty());
+        value_width *= 2;
+        list_sizes.back() *= 2;
+        return value_width;
+      };
+      auto push_1s = [&]() {  // fill the stack with 1s
+        while (list_sizes.size() < static_cast<size_t>(depth)) {
+          list_sizes.push_back(1);
+        }
+      };
+
+      // Loop invariants:
+      //   value_width == product(list_sizes) * type->byte_width()
+      //   value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32)
+      push_1s();
+      do {
+        // for (auto x : list_sizes) printf("%d * ", x);
+        // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(),
+        // value_width);
+        visit(type, list_sizes);
+        while (!list_sizes.empty()) {
+          if (next() <= max_power_of_2_size) {
+            push_1s();
+            break;
+          }
+          pop();
+        }
+      } while (!list_sizes.empty());
+    }
+  }
+}
+
+Status NestedListGenerator::AppendNestedList(ArrayBuilder* nested_builder,
+                                             const int* list_sizes,
+                                             int64_t* next_inner_value) {
+  using ::arrow::internal::checked_cast;
+  ArrayBuilder* builder = nested_builder;
+  auto type = builder->type();
+  if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) {
+    const int list_size = *list_sizes;
+    if (type->id() == Type::FIXED_SIZE_LIST) {
+      auto* fsl_builder = checked_cast<FixedSizeListBuilder*>(builder);
+      assert(list_size == checked_cast<FixedSizeListType&>(*type).list_size());
+      RETURN_NOT_OK(fsl_builder->Append());
+      builder = fsl_builder->value_builder();
+    } else {  // type->id() == Type::LIST)
+      auto* list_builder = checked_cast<ListBuilder*>(builder);
+      RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size));
+      builder = list_builder->value_builder();
+    }
+    list_sizes++;
+    for (int i = 0; i < list_size; i++) {
+      RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value));
+    }
+  } else {
+    switch (type->id()) {
+      case Type::INT8:
+        RETURN_NOT_OK(AppendNumeric<Int8Type>(builder, next_inner_value));
+        break;
+      case Type::INT16:
+        RETURN_NOT_OK(AppendNumeric<Int16Type>(builder, next_inner_value));
+        break;
+      case Type::INT32:
+        RETURN_NOT_OK(AppendNumeric<Int32Type>(builder, next_inner_value));
+        break;
+      case Type::INT64:
+        RETURN_NOT_OK(AppendNumeric<Int64Type>(builder, next_inner_value));
+        break;
+      default:
+        return Status::NotImplemented("Unsupported type: ", *type);
+    }
+  }
+  return Status::OK();
+}
+
+Result<std::shared_ptr<Array>> NestedListGenerator::NestedListArray(
+    ArrayBuilder* nested_builder, const std::vector<int>& list_sizes, int64_t 
length) {
+  int64_t next_inner_value = 0;
+  for (int64_t i = 0; i < length; i++) {
+    RETURN_NOT_OK(AppendNestedList(nested_builder, list_sizes.data(), 
&next_inner_value));
+  }
+  return nested_builder->Finish();
+}
+
+}  // namespace arrow::util::internal
diff --git a/cpp/src/arrow/testing/fixed_width_test_util.h 
b/cpp/src/arrow/testing/fixed_width_test_util.h
new file mode 100644
index 0000000000..9e5e6fa685
--- /dev/null
+++ b/cpp/src/arrow/testing/fixed_width_test_util.h
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "arrow/testing/visibility.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow::util::internal {
+
+class ARROW_TESTING_EXPORT NestedListGenerator {
+ public:
+  /// \brief Create a nested FixedSizeListType.
+  ///
+  /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])`
+  static std::shared_ptr<DataType> NestedFSLType(
+      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
sizes);
+
+  /// \brief Create a nested FixedListType.
+  ///
+  /// \return `list(list(...))`
+  static std::shared_ptr<DataType> NestedListType(
+      const std::shared_ptr<DataType>& inner_type, size_t depth);
+
+  static Result<std::shared_ptr<Array>> NestedFSLArray(
+      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
+      int64_t length);
+
+  static Result<std::shared_ptr<Array>> NestedListArray(
+      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
+      int64_t length);
+
+  /// \brief Generate all possible nested list configurations of depth 1 to 
max_depth.
+  ///
+  /// Each configuration consists of a single inner value type and a list of 
sizes.
+  /// Both can be used with NestedFSLArray and NestedListArray to generate 
test data.
+  ///
+  /// The product of the list sizes and the size of the inner value type is 
always a power
+  /// of 2 no greater than max_power_of_2_size. For max_depth=3 and
+  /// max_power_of_2_size=32, this generates 108 configurations.
+  static void VisitAllNestedListConfigurations(
+      const std::vector<std::shared_ptr<DataType>>& inner_value_types,
+      const std::function<void(const std::shared_ptr<DataType>&,
+                               const std::vector<int>&)>& visit,
+      int max_depth = 3, int max_power_of_2_size = 32);
+
+ private:
+  // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...])
+  static Status AppendNestedList(ArrayBuilder* nested_builder, const int* 
list_sizes,
+                                 int64_t* next_inner_value);
+
+  static Result<std::shared_ptr<Array>> NestedListArray(
+      ArrayBuilder* nested_builder, const std::vector<int>& list_sizes, 
int64_t length);
+};
+
+}  // namespace arrow::util::internal
diff --git a/cpp/src/arrow/util/fixed_width_test_util.h 
b/cpp/src/arrow/util/fixed_width_test_util.h
deleted file mode 100644
index ca141b7ca2..0000000000
--- a/cpp/src/arrow/util/fixed_width_test_util.h
+++ /dev/null
@@ -1,203 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/array/builder_primitive.h"
-#include "arrow/builder.h"
-#include "arrow/type.h"
-#include "arrow/util/checked_cast.h"
-
-namespace arrow::util::internal {
-
-class NestedListGenerator {
- public:
-  /// \brief Create a nested FixedSizeListType.
-  ///
-  /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])`
-  static std::shared_ptr<DataType> NestedFSLType(
-      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
sizes) {
-    auto type = inner_type;
-    for (auto it = sizes.rbegin(); it != sizes.rend(); it++) {
-      type = fixed_size_list(std::move(type), *it);
-    }
-    return type;
-  }
-
-  /// \brief Create a nested FixedListType.
-  ///
-  /// \return `list(list(...))`
-  static std::shared_ptr<DataType> NestedListType(
-      const std::shared_ptr<DataType>& inner_type, size_t depth) {
-    auto list_type = list(inner_type);
-    for (size_t i = 1; i < depth; i++) {
-      list_type = list(std::move(list_type));
-    }
-    return list_type;
-  }
-
- private:
-  template <typename ArrowType>
-  static Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) {
-    using NumericBuilder = ::arrow::NumericBuilder<ArrowType>;
-    using value_type = typename NumericBuilder::value_type;
-    auto* numeric_builder = 
::arrow::internal::checked_cast<NumericBuilder*>(builder);
-    auto cast_next_value =
-        static_cast<value_type>(*next_value % 
std::numeric_limits<value_type>::max());
-    RETURN_NOT_OK(numeric_builder->Append(cast_next_value));
-    *next_value += 1;
-    return Status::OK();
-  }
-
-  // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...])
-  static Status AppendNestedList(ArrayBuilder* nested_builder, const int* 
list_sizes,
-                                 int64_t* next_inner_value) {
-    using ::arrow::internal::checked_cast;
-    ArrayBuilder* builder = nested_builder;
-    auto type = builder->type();
-    if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) {
-      const int list_size = *list_sizes;
-      if (type->id() == Type::FIXED_SIZE_LIST) {
-        auto* fsl_builder = checked_cast<FixedSizeListBuilder*>(builder);
-        assert(list_size == 
checked_cast<FixedSizeListType&>(*type).list_size());
-        RETURN_NOT_OK(fsl_builder->Append());
-        builder = fsl_builder->value_builder();
-      } else {  // type->id() == Type::LIST)
-        auto* list_builder = checked_cast<ListBuilder*>(builder);
-        RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size));
-        builder = list_builder->value_builder();
-      }
-      list_sizes++;
-      for (int i = 0; i < list_size; i++) {
-        RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value));
-      }
-    } else {
-      switch (type->id()) {
-        case Type::INT8:
-          RETURN_NOT_OK(AppendNumeric<Int8Type>(builder, next_inner_value));
-          break;
-        case Type::INT16:
-          RETURN_NOT_OK(AppendNumeric<Int16Type>(builder, next_inner_value));
-          break;
-        case Type::INT32:
-          RETURN_NOT_OK(AppendNumeric<Int32Type>(builder, next_inner_value));
-          break;
-        case Type::INT64:
-          RETURN_NOT_OK(AppendNumeric<Int64Type>(builder, next_inner_value));
-          break;
-        default:
-          return Status::NotImplemented("Unsupported type: ", *type);
-      }
-    }
-    return Status::OK();
-  }
-
-  static Result<std::shared_ptr<Array>> NestedListArray(
-      ArrayBuilder* nested_builder, const std::vector<int>& list_sizes, 
int64_t length) {
-    int64_t next_inner_value = 0;
-    for (int64_t i = 0; i < length; i++) {
-      RETURN_NOT_OK(
-          AppendNestedList(nested_builder, list_sizes.data(), 
&next_inner_value));
-    }
-    return nested_builder->Finish();
-  }
-
- public:
-  static Result<std::shared_ptr<Array>> NestedFSLArray(
-      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
-      int64_t length) {
-    auto nested_type = NestedFSLType(inner_type, list_sizes);
-    ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type));
-    return NestedListArray(builder.get(), list_sizes, length);
-  }
-
-  static Result<std::shared_ptr<Array>> NestedListArray(
-      const std::shared_ptr<DataType>& inner_type, const std::vector<int>& 
list_sizes,
-      int64_t length) {
-    auto nested_type = NestedListType(inner_type, list_sizes.size());
-    ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type));
-    return NestedListArray(builder.get(), list_sizes, length);
-  }
-
-  /// \brief Generate all possible nested list configurations of depth 1 to 
max_depth.
-  ///
-  /// Each configuration consists of a single inner value type and a list of 
sizes.
-  /// Both can be used with NestedFSLArray and NestedListArray to generate 
test data.
-  ///
-  /// The product of the list sizes and the size of the inner value type is 
always a power
-  /// of 2 no greater than max_power_of_2_size. For max_depth=3 and
-  /// max_power_of_2_size=32, this generates 108 configurations.
-  ///
-  /// \tparam Visit a function type with signature
-  ///     void(const std::shared_ptr<DataType>& inner_type,
-  ///          const std::vector<int>& list_sizes)
-  template <class Visit>
-  static void VisitAllNestedListConfigurations(
-      const std::vector<std::shared_ptr<DataType>>& inner_value_types, Visit&& 
visit,
-      int max_depth = 3, int max_power_of_2_size = 32) {
-    for (int depth = 1; depth <= max_depth; depth++) {
-      for (auto& type : inner_value_types) {
-        assert(is_fixed_width(*type));
-        int value_width = type->byte_width();
-
-        std::vector<int> list_sizes;  // stack of list sizes
-        auto pop = [&]() {            // pop the list_sizes stack
-          assert(!list_sizes.empty());
-          value_width /= list_sizes.back();
-          list_sizes.pop_back();
-        };
-        auto next = [&]() {  // double the top of the stack
-          assert(!list_sizes.empty());
-          value_width *= 2;
-          list_sizes.back() *= 2;
-          return value_width;
-        };
-        auto push_1s = [&]() {  // fill the stack with 1s
-          while (list_sizes.size() < static_cast<size_t>(depth)) {
-            list_sizes.push_back(1);
-          }
-        };
-
-        // Loop invariants:
-        //   value_width == product(list_sizes) * type->byte_width()
-        //   value_width is a power-of-2 (1, 2, 4, 8, 16, 
max_power_of_2_size=32)
-        push_1s();
-        do {
-          // for (auto x : list_sizes) printf("%d * ", x);
-          // printf("(%s) %d = %2d\n", type->name().c_str(), 
type->byte_width(),
-          // value_width);
-          visit(type, list_sizes);
-          // Advance to the next test case
-          while (!list_sizes.empty()) {
-            if (next() <= max_power_of_2_size) {
-              push_1s();
-              break;
-            }
-            pop();
-          }
-        } while (!list_sizes.empty());
-      }
-    }
-  }
-};
-
-}  // namespace arrow::util::internal

Reply via email to