This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 59f30f0898 GH-36892: [C++] Fix performance regressions in
`FieldPath::Get` (#37032)
59f30f0898 is described below
commit 59f30f089879196910c4eca0f0530ec1d039cc71
Author: Ben Harkins <[email protected]>
AuthorDate: Tue Aug 8 13:40:08 2023 -0400
GH-36892: [C++] Fix performance regressions in `FieldPath::Get` (#37032)
### Rationale for this change
https://github.com/apache/arrow/pull/35197 appears to have introduced
significant performance regressions in `FieldPath::Get` - indicated
[here](https://conbench.ursa.dev/compare/runs/9cf73ac83f0a44179e6538b2c1c7babd...3d76cb5ffb8849bf8c3ea9b32d08b3b7/),
in a benchmark that uses a wide (10K column) dataframe.
### What changes are included in this PR?
- Adds basic benchmarks for `FieldPath::Get` across various input types, as
they didn't previously exist
- Addresses several performance issues. These came in the form of extremely
high upfront costs for the `RecordBatch` and `ArrayData` overloads specifically
- Some minor refactoring of `NestedSelector`
### Are these changes tested?
Yes (covered by existing tests)
### Are there any user-facing changes?
No
* Closes: #36892
Lead-authored-by: benibus <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/type.cc | 102 ++++++++++++++++++++------------
cpp/src/arrow/type_benchmark.cc | 125 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 191 insertions(+), 36 deletions(-)
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 68dc2aabe9..9267f1e499 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -1066,17 +1066,29 @@ std::string FieldPath::ToString() const {
return repr;
}
-static Status NonStructError() {
- return Status::NotImplemented("Get child data of non-struct array");
-}
+struct NestedSelectorUtil {
+ static Status NonStructError() {
+ return Status::NotImplemented("Get child data of non-struct array");
+ }
+
+ template <typename T>
+ static const DataType* GetType(const T& input) {
+ if constexpr (std::is_same_v<T, ArrayData>) {
+ return input.type.get();
+ } else {
+ return input.type().get();
+ }
+ }
+};
-// Utility class for retrieving a child field/column from a top-level Field,
Array, or
-// ChunkedArray. The "root" value can either be a single parent or a vector of
its
-// children.
+// Utility class for retrieving a child field/column from a top-level Field,
Array,
+// ArrayData, or ChunkedArray. The "root" value can either be a single parent
or a vector
+// of its children.
template <typename T, bool IsFlattening = false>
class NestedSelector {
public:
using ArrowType = T;
+ using Util = NestedSelectorUtil;
explicit NestedSelector(const std::vector<std::shared_ptr<T>>& children)
: parent_or_children_(&children) {}
@@ -1095,7 +1107,18 @@ class NestedSelector {
Result<NestedSelector> GetChild(int i) const {
std::shared_ptr<T> child;
if (auto parent = get_parent()) {
- ARROW_ASSIGN_OR_RAISE(child, GetChild(*parent, i, pool_));
+ const DataType* type = Util::GetType(*parent);
+ // We avoid this check for schema fields since it's inconsequential
(plus there are
+ // tests elsewhere that rely on it not happening)
+ if constexpr (!std::is_same_v<T, Field>) {
+ if (ARROW_PREDICT_FALSE(type->id() != Type::STRUCT)) {
+ return Util::NonStructError();
+ }
+ }
+ // Bounds-check the index *once* using the parent's type
+ if (ARROW_PREDICT_TRUE(i >= 0 && i < type->num_fields())) {
+ ARROW_ASSIGN_OR_RAISE(child, GetChild(*parent, i, pool_));
+ }
} else if (auto children = get_children()) {
if (ARROW_PREDICT_TRUE(i >= 0 && static_cast<size_t>(i) <
children->size())) {
child = (*children)[i];
@@ -1129,10 +1152,10 @@ class NestedSelector {
*os << "column types: { ";
if (auto children = get_children()) {
for (const auto& child : *children) {
- *os << *child->type() << ", ";
+ *os << *Util::GetType(*child) << ", ";
}
} else if (auto parent = get_parent()) {
- for (const auto& field : parent->type()->fields()) {
+ for (const auto& field : Util::GetType(*parent)->fields()) {
*os << *field->type() << ", ";
}
}
@@ -1155,21 +1178,33 @@ class NestedSelector {
}
static Result<std::shared_ptr<Field>> GetChild(const Field& field, int i,
MemoryPool*) {
- if (ARROW_PREDICT_FALSE(i < 0 || i >= field.type()->num_fields())) {
- return nullptr;
- }
return field.type()->field(i);
}
- static Result<std::shared_ptr<Array>> GetChild(const Array& array, int i,
- MemoryPool* pool) {
- if (ARROW_PREDICT_FALSE(array.type_id() != Type::STRUCT)) {
- return NonStructError();
- }
- if (ARROW_PREDICT_FALSE(i < 0 || i >= array.num_fields())) {
- return nullptr;
+ static Result<std::shared_ptr<ArrayData>> GetChild(const ArrayData& data,
int i,
+ MemoryPool* pool) {
+ std::shared_ptr<ArrayData> child_data;
+ if constexpr (IsFlattening) {
+ // First, convert to an Array so we can use
StructArray::GetFlattenedField
+ auto array = MakeArray(data.Copy());
+ ARROW_ASSIGN_OR_RAISE(auto child_array, GetChild(*array, i, pool));
+ child_data = child_array->data();
+ } else {
+ // We could achieve the same result by converting to an Array (via
MakeArray),
+ // calling StructArray::field(i), and pulling out the new ArrayData.
However, this
+ // process can be very expensive when there are many columns - so we just
+ // reimplement the functionality that we need
+ child_data = data.child_data[i];
+ if (data.offset != 0 || data.child_data[i]->length != data.length) {
+ child_data = child_data->Slice(data.offset, data.length);
+ }
}
+ return std::move(child_data);
+ }
+
+ static Result<std::shared_ptr<Array>> GetChild(const Array& array, int i,
+ MemoryPool* pool) {
const auto& struct_array = checked_cast<const StructArray&>(array);
if constexpr (IsFlattening) {
return struct_array.GetFlattenedField(i, pool);
@@ -1181,22 +1216,15 @@ class NestedSelector {
static Result<std::shared_ptr<ChunkedArray>> GetChild(const ChunkedArray&
chunked_array,
int i, MemoryPool*
pool) {
const auto& type = *chunked_array.type();
- if (ARROW_PREDICT_FALSE(type.id() != Type::STRUCT)) {
- return NonStructError();
- }
- if (ARROW_PREDICT_FALSE(i < 0 || i >= type.num_fields())) {
- return nullptr;
- }
ArrayVector chunks;
chunks.reserve(chunked_array.num_chunks());
for (const auto& parent_chunk : chunked_array.chunks()) {
ARROW_ASSIGN_OR_RAISE(auto chunk, GetChild(*parent_chunk, i, pool));
- if (!chunk) return nullptr;
chunks.push_back(std::move(chunk));
}
- return ChunkedArray::Make(std::move(chunks), type.field(i)->type());
+ return std::make_shared<ChunkedArray>(std::move(chunks),
type.field(i)->type());
}
std::shared_ptr<T> owned_parent_;
@@ -1289,7 +1317,11 @@ Result<std::shared_ptr<Schema>> FieldPath::GetAll(const
Schema& schm,
}
Result<std::shared_ptr<Array>> FieldPath::Get(const RecordBatch& batch) const {
- return FieldPathGetImpl::Get(this, ZeroCopySelector<Array>(batch.columns()));
+ // Deliberately calling `column_data` here because `RecordBatch::columns` is
nontrivial
+ ARROW_ASSIGN_OR_RAISE(
+ auto data,
+ FieldPathGetImpl::Get(this,
ZeroCopySelector<ArrayData>(batch.column_data())));
+ return MakeArray(data);
}
Result<std::shared_ptr<ChunkedArray>> FieldPath::Get(const Table& table) const
{
@@ -1301,11 +1333,7 @@ Result<std::shared_ptr<Array>> FieldPath::Get(const
Array& array) const {
}
Result<std::shared_ptr<ArrayData>> FieldPath::Get(const ArrayData& data) const
{
- // We indirect from ArrayData to Array rather than vice-versa because, when
selecting a
- // nested column, the StructArray::field method does the work of adjusting
the data's
- // offset/length if necessary.
- ARROW_ASSIGN_OR_RAISE(auto array, Get(*MakeArray(data.Copy())));
- return array->data();
+ return FieldPathGetImpl::Get(this, ZeroCopySelector<ArrayData>(data));
}
Result<std::shared_ptr<ChunkedArray>> FieldPath::Get(
@@ -1320,8 +1348,7 @@ Result<std::shared_ptr<Array>>
FieldPath::GetFlattened(const Array& array,
Result<std::shared_ptr<ArrayData>> FieldPath::GetFlattened(const ArrayData&
data,
MemoryPool* pool)
const {
- ARROW_ASSIGN_OR_RAISE(auto array, GetFlattened(*MakeArray(data.Copy()),
pool));
- return array->data();
+ return FieldPathGetImpl::Get(this, FlatteningSelector<ArrayData>(data,
pool));
}
Result<std::shared_ptr<ChunkedArray>> FieldPath::GetFlattened(
@@ -1332,7 +1359,10 @@ Result<std::shared_ptr<ChunkedArray>>
FieldPath::GetFlattened(
Result<std::shared_ptr<Array>> FieldPath::GetFlattened(const RecordBatch&
batch,
MemoryPool* pool) const
{
- return FieldPathGetImpl::Get(this,
FlatteningSelector<Array>(batch.columns(), pool));
+ ARROW_ASSIGN_OR_RAISE(
+ auto data, FieldPathGetImpl::Get(
+ this, FlatteningSelector<ArrayData>(batch.column_data(),
pool)));
+ return MakeArray(data);
}
Result<std::shared_ptr<ChunkedArray>> FieldPath::GetFlattened(const Table&
table,
diff --git a/cpp/src/arrow/type_benchmark.cc b/cpp/src/arrow/type_benchmark.cc
index de90577ffd..17dccfcb33 100644
--- a/cpp/src/arrow/type_benchmark.cc
+++ b/cpp/src/arrow/type_benchmark.cc
@@ -18,15 +18,19 @@
#include <algorithm>
#include <cstdint>
#include <exception>
+#include <optional>
#include <random>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
+#include "arrow/array.h"
#include "arrow/result.h"
#include "arrow/status.h"
+#include "arrow/table.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/macros.h"
@@ -418,6 +422,120 @@ static void ErrorSchemeExceptionNoInline(
state.SetItemsProcessed(state.iterations() * integers.size());
}
+// ----------------------------------------------------------------------
+// FieldPath::Get benchmarks
+
+static std::shared_ptr<Schema> GenerateTestSchema(int num_columns) {
+ FieldVector fields(num_columns);
+ for (int i = 0; i < num_columns; ++i) {
+ auto name = std::string("f") + std::to_string(i);
+ fields[i] = field(std::move(name), int64());
+ }
+ return schema(std::move(fields));
+}
+
+static std::shared_ptr<Array> GenerateTestArray(int num_columns) {
+ constexpr int64_t kLength = 100;
+
+ auto rand = random::RandomArrayGenerator(0xbeef);
+ auto schm = GenerateTestSchema(num_columns);
+
+ ArrayVector columns(num_columns);
+ for (auto& column : columns) {
+ column = rand.Int64(kLength, 0, std::numeric_limits<int64_t>::max());
+ }
+
+ return *StructArray::Make(columns, schm->fields());
+}
+
+static std::shared_ptr<RecordBatch> ToBatch(const std::shared_ptr<Array>&
array) {
+ return *RecordBatch::FromStructArray(array);
+}
+
+static std::shared_ptr<ChunkedArray> ToChunked(const std::shared_ptr<Array>&
array,
+ double chunk_proportion = 1.0) {
+ auto struct_array = internal::checked_pointer_cast<StructArray>(array);
+ const auto num_rows = struct_array->length();
+ const auto chunk_length = static_cast<int64_t>(std::ceil(num_rows *
chunk_proportion));
+
+ ArrayVector chunks;
+ for (int64_t offset = 0; offset < num_rows;) {
+ int64_t slice_length = std::min(chunk_length, num_rows - offset);
+ chunks.push_back(*struct_array->SliceSafe(offset, slice_length));
+ offset += slice_length;
+ }
+
+ return *ChunkedArray::Make(std::move(chunks));
+}
+
+static std::shared_ptr<Table> ToTable(const std::shared_ptr<Array>& array,
+ double chunk_proportion = 1.0) {
+ return *Table::FromChunkedStructArray(ToChunked(array, chunk_proportion));
+}
+
+template <typename T>
+static void BenchmarkFieldPathGet(benchmark::State& state, // NOLINT
non-const reference
+ const T& input, int num_columns,
+ std::optional<int> num_chunks = {}) {
+ // Reassigning a single FieldPath var within each iteration's scope seems to
be costly
+ // enough to influence the timings, so we preprocess them.
+ std::vector<FieldPath> paths(num_columns);
+ for (int i = 0; i < num_columns; ++i) {
+ paths[i] = {i};
+ }
+
+ for (auto _ : state) {
+ for (const auto& path : paths) {
+ benchmark::DoNotOptimize(path.Get(input).ValueOrDie());
+ }
+ }
+
+ state.SetItemsProcessed(state.iterations() * num_columns);
+ state.counters["num_columns"] = num_columns;
+ if (num_chunks.has_value()) {
+ state.counters["num_chunks"] = num_chunks.value();
+ }
+}
+
+static void FieldPathGetFromWideArray(
+ benchmark::State& state) { // NOLINT non-const reference
+ constexpr int kNumColumns = 10000;
+ auto array = GenerateTestArray(kNumColumns);
+ BenchmarkFieldPathGet(state, *array, kNumColumns);
+}
+
+static void FieldPathGetFromWideArrayData(
+ benchmark::State& state) { // NOLINT non-const reference
+ constexpr int kNumColumns = 10000;
+ auto array = GenerateTestArray(kNumColumns);
+ BenchmarkFieldPathGet(state, *array->data(), kNumColumns);
+}
+
+static void FieldPathGetFromWideBatch(
+ benchmark::State& state) { // NOLINT non-const reference
+ constexpr int kNumColumns = 10000;
+ auto batch = ToBatch(GenerateTestArray(kNumColumns));
+ BenchmarkFieldPathGet(state, *batch, kNumColumns);
+}
+
+static void FieldPathGetFromWideChunkedArray(
+ benchmark::State& state) { // NOLINT non-const reference
+ constexpr int kNumColumns = 10000;
+ // Percentage representing the size of each chunk relative to the total
length (smaller
+ // proportion means more chunks)
+ const double chunk_proportion = state.range(0) / 100.0;
+ auto chunked_array = ToChunked(GenerateTestArray(kNumColumns),
chunk_proportion);
+ BenchmarkFieldPathGet(state, *chunked_array, kNumColumns,
chunked_array->num_chunks());
+}
+
+static void FieldPathGetFromWideTable(
+ benchmark::State& state) { // NOLINT non-const reference
+ constexpr int kNumColumns = 10000;
+ const double chunk_proportion = state.range(0) / 100.0;
+ auto table = ToTable(GenerateTestArray(kNumColumns), chunk_proportion);
+ BenchmarkFieldPathGet(state, *table, kNumColumns,
table->column(0)->num_chunks());
+}
+
BENCHMARK(TypeEqualsSimple);
BENCHMARK(TypeEqualsComplex);
BENCHMARK(TypeEqualsWithMetadata);
@@ -436,4 +554,11 @@ BENCHMARK(ErrorSchemeStatusNoInline);
BENCHMARK(ErrorSchemeResultNoInline);
BENCHMARK(ErrorSchemeExceptionNoInline);
+BENCHMARK(FieldPathGetFromWideArray);
+BENCHMARK(FieldPathGetFromWideArrayData);
+BENCHMARK(FieldPathGetFromWideBatch);
+
+BENCHMARK(FieldPathGetFromWideChunkedArray)->Arg(2)->Arg(10)->Arg(25)->Arg(100);
+BENCHMARK(FieldPathGetFromWideTable)->Arg(2)->Arg(10)->Arg(25)->Arg(100);
+
} // namespace arrow