This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a4acb64343 GH-40872: [C++][Parquet] Encoding: Optimize 
DecodeArrow/Decode(bitmap) for PlainBooleanDecoder (#40876)
a4acb64343 is described below

commit a4acb643437af2323f683e51d6043907fed496a9
Author: mwish <[email protected]>
AuthorDate: Thu Apr 4 03:57:41 2024 +0800

    GH-40872: [C++][Parquet] Encoding: Optimize DecodeArrow/Decode(bitmap) for 
PlainBooleanDecoder (#40876)
    
    
    
    ### Rationale for this change
    
    This is for enhance boolean decoding. I optimized the `DecodeArrow` for 
PlainBoolean
    
    ### What changes are included in this PR?
    
    Optimize DecodeArrow/Decode(bitmap) for PlainBooleanDecoder, and add 
benchmarks
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    Minor optimization. And `Decode` boolean will change the syntax
    
    * GitHub Issue: #40872
    
    Lead-authored-by: mwish <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: mwish <[email protected]>
---
 cpp/src/parquet/column_reader.cc      |   3 +-
 cpp/src/parquet/encoding.cc           |  79 ++++++++----
 cpp/src/parquet/encoding.h            |   4 +-
 cpp/src/parquet/encoding_benchmark.cc | 236 ++++++++++++++++++++++++++++------
 4 files changed, 261 insertions(+), 61 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 3fb224154c..af489c70a5 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1316,7 +1316,8 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
     levels_position_ = 0;
     levels_capacity_ = 0;
     read_dense_for_nullable_ = read_dense_for_nullable;
-    uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY);
+    // BYTE_ARRAY values are not stored in the `values_` buffer.
+    uses_values_ = descr->physical_type() != Type::BYTE_ARRAY;
 
     if (uses_values_) {
       values_ = AllocateBuffer(pool);
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 3eed88f08b..f16e9b34fc 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -55,6 +55,7 @@ namespace bit_util = arrow::bit_util;
 using arrow::Status;
 using arrow::VisitNullBitmapInline;
 using arrow::internal::AddWithOverflow;
+using arrow::internal::BitBlockCounter;
 using arrow::internal::checked_cast;
 using arrow::internal::MultiplyWithOverflow;
 using arrow::internal::SafeSignedSubtract;
@@ -1173,13 +1174,15 @@ class PlainBooleanDecoder : public DecoderImpl, virtual 
public BooleanDecoder {
 
  private:
   std::unique_ptr<::arrow::bit_util::BitReader> bit_reader_;
+  int total_num_values_{0};
 };
 
 PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr)
     : DecoderImpl(descr, Encoding::PLAIN) {}
 
 void PlainBooleanDecoder::SetData(int num_values, const uint8_t* data, int 
len) {
-  num_values_ = num_values;
+  DecoderImpl::SetData(num_values, data, len);
+  total_num_values_ = num_values;
   bit_reader_ = std::make_unique<bit_util::BitReader>(data, len);
 }
 
@@ -1188,19 +1191,52 @@ int PlainBooleanDecoder::DecodeArrow(
     typename EncodingTraits<BooleanType>::Accumulator* builder) {
   int values_decoded = num_values - null_count;
   if (ARROW_PREDICT_FALSE(num_values_ < values_decoded)) {
+    // A too large `num_values` was requested.
+    ParquetException::EofException();
+  }
+  if (ARROW_PREDICT_FALSE(!bit_reader_->Advance(values_decoded))) {
     ParquetException::EofException();
   }
 
-  PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
-
-  VisitNullBitmapInline(
-      valid_bits, valid_bits_offset, num_values, null_count,
-      [&]() {
-        bool value;
-        ARROW_IGNORE_EXPR(bit_reader_->GetValue(1, &value));
-        builder->UnsafeAppend(value);
-      },
-      [&]() { builder->UnsafeAppendNull(); });
+  if (null_count == 0) {
+    // FastPath: can copy the data directly
+    PARQUET_THROW_NOT_OK(builder->AppendValues(data_, values_decoded, NULLPTR,
+                                               total_num_values_ - 
num_values_));
+  } else {
+    // Handle nulls by BitBlockCounter
+    PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
+    BitBlockCounter bit_counter(valid_bits, valid_bits_offset, num_values);
+    int64_t value_position = 0;
+    int64_t valid_bits_offset_position = valid_bits_offset;
+    int64_t previous_value_offset = 0;
+    while (value_position < num_values) {
+      auto block = bit_counter.NextWord();
+      if (block.AllSet()) {
+        // GH-40978: We don't have UnsafeAppendValues for booleans currently,
+        // so using `AppendValues` here.
+        PARQUET_THROW_NOT_OK(
+            builder->AppendValues(data_, block.length, NULLPTR, 
previous_value_offset));
+        previous_value_offset += block.length;
+      } else if (block.NoneSet()) {
+        // GH-40978: We don't have UnsafeAppendNulls for booleans currently,
+        // so using `AppendNulls` here.
+        PARQUET_THROW_NOT_OK(builder->AppendNulls(block.length));
+      } else {
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (bit_util::GetBit(valid_bits, valid_bits_offset_position + i)) {
+            bool value = bit_util::GetBit(
+                data_, total_num_values_ - num_values_ + 
previous_value_offset);
+            builder->UnsafeAppend(value);
+            previous_value_offset += 1;
+          } else {
+            builder->UnsafeAppendNull();
+          }
+        }
+      }
+      value_position += block.length;
+      valid_bits_offset_position += block.length;
+    }
+  }
 
   num_values_ -= values_decoded;
   return values_decoded;
@@ -1214,18 +1250,15 @@ inline int PlainBooleanDecoder::DecodeArrow(
 
 int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) {
   max_values = std::min(max_values, num_values_);
-  bool val;
-  ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values);
-  for (int i = 0; i < max_values; ++i) {
-    if (!bit_reader_->GetValue(1, &val)) {
-      ParquetException::EofException();
-    }
-    if (val) {
-      bit_writer.Set();
-    }
-    bit_writer.Next();
+  if (ARROW_PREDICT_FALSE(!bit_reader_->Advance(max_values))) {
+    ParquetException::EofException();
   }
-  bit_writer.Finish();
+  // Copy the data directly
+  // Parquet's boolean encoding is bit-packed using LSB. So
+  // we can directly copy the data to the buffer.
+  ::arrow::internal::CopyBitmap(this->data_, /*offset=*/total_num_values_ - 
num_values_,
+                                /*length=*/max_values, /*dest=*/buffer,
+                                /*dest_offset=*/0);
   num_values_ -= max_values;
   return max_values;
 }
@@ -1692,7 +1725,7 @@ class DictDecoderImpl : public DecoderImpl, virtual 
public DictDecoder<Type> {
   }
 
  protected:
-  Status IndexInBounds(int32_t index) {
+  Status IndexInBounds(int32_t index) const {
     if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) {
       return Status::OK();
     }
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index de47bb7deb..6020091895 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -400,7 +400,9 @@ class BooleanDecoder : virtual public 
TypedDecoder<BooleanType> {
   /// \brief Decode and bit-pack values into a buffer
   ///
   /// \param[in] buffer destination for decoded values
-  /// This buffer will contain bit-packed values.
+  /// This buffer will contain bit-packed values. If
+  /// max_values is not a multiple of 8, the trailing bits
+  /// of the last byte will be undefined.
   /// \param[in] max_values max values to decode.
   /// \return The number of values decoded. Should be identical to max_values 
except
   /// at the end of the current data page.
diff --git a/cpp/src/parquet/encoding_benchmark.cc 
b/cpp/src/parquet/encoding_benchmark.cc
index 61959b659f..9c07d262b3 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -66,6 +66,7 @@ static void BM_PlainEncodingBoolean(benchmark::State& state) {
     typed_encoder->FlushValues();
   }
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
 }
 
 BENCHMARK(BM_PlainEncodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
@@ -86,11 +87,34 @@ static void BM_PlainDecodingBoolean(benchmark::State& 
state) {
   }
 
   state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
   delete[] output;
 }
 
 BENCHMARK(BM_PlainDecodingBoolean)->Range(MIN_RANGE, MAX_RANGE);
 
+static void BM_PlainDecodingBooleanToBitmap(benchmark::State& state) {
+  std::vector<bool> values(state.range(0), true);
+  int64_t bitmap_bytes = ::arrow::bit_util::BytesForBits(state.range(0));
+  std::vector<uint8_t> output(bitmap_bytes, 0);
+  auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN);
+  auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get());
+  typed_encoder->Put(values, static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(output.data(), static_cast<int>(values.size()));
+  }
+  // Still set `BytesProcessed` to byte level.
+  state.SetBytesProcessed(state.iterations() * bitmap_bytes);
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK(BM_PlainDecodingBooleanToBitmap)->Range(MIN_RANGE, MAX_RANGE);
+
 static void BM_PlainEncodingInt64(benchmark::State& state) {
   std::vector<int64_t> values(state.range(0), 64);
   auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN);
@@ -1097,8 +1121,11 @@ 
BENCHMARK(BM_DictDecodingByteArray)->Apply(ByteArrayCustomArguments);
 using ::arrow::BinaryBuilder;
 using ::arrow::BinaryDictionary32Builder;
 
-class BenchmarkDecodeArrow : public ::benchmark::Fixture {
+template <typename ParquetType>
+class BenchmarkDecodeArrowBase : public ::benchmark::Fixture {
  public:
+  virtual ~BenchmarkDecodeArrowBase() = default;
+
   void SetUp(const ::benchmark::State& state) override {
     num_values_ = static_cast<int>(state.range());
     InitDataInputs();
@@ -1111,37 +1138,18 @@ class BenchmarkDecodeArrow : public 
::benchmark::Fixture {
     values_.clear();
   }
 
-  void InitDataInputs() {
-    // Generate a random string dictionary without any nulls so that this 
dataset can
-    // be used for benchmarking the DecodeArrowNonNull API
-    constexpr int repeat_factor = 8;
-    constexpr int64_t min_length = 2;
-    constexpr int64_t max_length = 10;
-    ::arrow::random::RandomArrayGenerator rag(0);
-    input_array_ = rag.StringWithRepeats(num_values_, num_values_ / 
repeat_factor,
-                                         min_length, max_length, 
/*null_probability=*/0);
-    valid_bits_ = input_array_->null_bitmap_data();
-    total_size_ = input_array_->data()->buffers[2]->size();
-
-    values_.reserve(num_values_);
-    const auto& binary_array = static_cast<const 
::arrow::BinaryArray&>(*input_array_);
-    for (int64_t i = 0; i < binary_array.length(); i++) {
-      auto view = binary_array.GetView(i);
-      values_.emplace_back(static_cast<uint32_t>(view.length()),
-                           reinterpret_cast<const uint8_t*>(view.data()));
-    }
-  }
-
+  virtual void InitDataInputs() = 0;
   virtual void DoEncodeArrow() = 0;
   virtual void DoEncodeLowLevel() = 0;
-
-  virtual std::unique_ptr<ByteArrayDecoder> InitializeDecoder() = 0;
+  virtual std::unique_ptr<TypedDecoder<ParquetType>> InitializeDecoder() = 0;
+  virtual typename EncodingTraits<ParquetType>::Accumulator 
CreateAccumulator() = 0;
 
   void EncodeArrowBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       DoEncodeArrow();
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void EncodeLowLevelBenchmark(benchmark::State& state) {
@@ -1149,26 +1157,27 @@ class BenchmarkDecodeArrow : public 
::benchmark::Fixture {
       DoEncodeLowLevel();
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowDenseBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       auto decoder = InitializeDecoder();
-      typename EncodingTraits<ByteArrayType>::Accumulator acc;
-      acc.builder.reset(new BinaryBuilder);
+      auto acc = CreateAccumulator();
       decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &acc);
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowNonNullDenseBenchmark(benchmark::State& state) {
     for (auto _ : state) {
       auto decoder = InitializeDecoder();
-      typename EncodingTraits<ByteArrayType>::Accumulator acc;
-      acc.builder.reset(new BinaryBuilder);
+      auto acc = CreateAccumulator();
       decoder->DecodeArrowNonNull(num_values_, &acc);
     }
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowDictBenchmark(benchmark::State& state) {
@@ -1179,6 +1188,7 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
     }
 
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
   void DecodeArrowNonNullDictBenchmark(benchmark::State& state) {
@@ -1189,20 +1199,56 @@ class BenchmarkDecodeArrow : public 
::benchmark::Fixture {
     }
 
     state.SetBytesProcessed(state.iterations() * total_size_);
+    state.SetItemsProcessed(state.iterations() * num_values_);
   }
 
  protected:
-  int num_values_;
+  int num_values_{0};
   std::shared_ptr<::arrow::Array> input_array_;
-  std::vector<ByteArray> values_;
-  uint64_t total_size_;
-  const uint8_t* valid_bits_;
+  uint64_t total_size_{0};
+  const uint8_t* valid_bits_{nullptr};
   std::shared_ptr<Buffer> buffer_;
+  std::vector<typename ParquetType::c_type> values_;
+};
+
+class BenchmarkDecodeArrowByteArray : public 
BenchmarkDecodeArrowBase<ByteArrayType> {
+ public:
+  using ByteArrayAccumulator = typename 
EncodingTraits<ByteArrayType>::Accumulator;
+
+  ByteArrayAccumulator CreateAccumulator() final {
+    ByteArrayAccumulator acc;
+    acc.builder = std::make_unique<BinaryBuilder>(default_memory_pool());
+    return acc;
+  }
+
+  void InitDataInputs() final {
+    // Generate a random string dictionary without any nulls so that this 
dataset can
+    // be used for benchmarking the DecodeArrowNonNull API
+    constexpr int repeat_factor = 8;
+    constexpr int64_t min_length = 2;
+    constexpr int64_t max_length = 10;
+    ::arrow::random::RandomArrayGenerator rag(0);
+    input_array_ = rag.StringWithRepeats(num_values_, num_values_ / 
repeat_factor,
+                                         min_length, max_length, 
/*null_probability=*/0);
+    valid_bits_ = input_array_->null_bitmap_data();
+    total_size_ = input_array_->data()->buffers[2]->size();
+
+    values_.reserve(num_values_);
+    const auto& binary_array = static_cast<const 
::arrow::BinaryArray&>(*input_array_);
+    for (int64_t i = 0; i < binary_array.length(); i++) {
+      auto view = binary_array.GetView(i);
+      values_.emplace_back(static_cast<uint32_t>(view.length()),
+                           reinterpret_cast<const uint8_t*>(view.data()));
+    }
+  }
+
+ protected:
+  std::vector<ByteArray> values_;
 };
 
 // ----------------------------------------------------------------------
 // Benchmark Decoding from Plain Encoding
-class BM_ArrowBinaryPlain : public BenchmarkDecodeArrow {
+class BM_ArrowBinaryPlain : public BenchmarkDecodeArrowByteArray {
  public:
   void DoEncodeArrow() override {
     auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN);
@@ -1251,7 +1297,7 @@ BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, 
DecodeArrowNonNull_Dict)
 
 // ----------------------------------------------------------------------
 // Benchmark Decoding from Dictionary Encoding
-class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
+class BM_ArrowBinaryDict : public BenchmarkDecodeArrowByteArray {
  public:
   template <typename PutValuesFunc>
   void DoEncode(PutValuesFunc&& put_values) {
@@ -1319,7 +1365,7 @@ class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
   }
 
   void TearDown(const ::benchmark::State& state) override {
-    BenchmarkDecodeArrow::TearDown(state);
+    BenchmarkDecodeArrowByteArray::TearDown(state);
     dict_buffer_.reset();
     descr_.reset();
   }
@@ -1327,7 +1373,7 @@ class BM_ArrowBinaryDict : public BenchmarkDecodeArrow {
  protected:
   std::unique_ptr<ColumnDescriptor> descr_;
   std::shared_ptr<Buffer> dict_buffer_;
-  int num_dict_entries_;
+  int num_dict_entries_{0};
 };
 
 BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeArrow)
@@ -1373,4 +1419,122 @@ BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, 
DecodeArrowNonNull_Dict)
 BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict)
     ->Range(MIN_RANGE, MAX_RANGE);
 
+class BenchmarkDecodeArrowBoolean : public 
BenchmarkDecodeArrowBase<BooleanType> {
+ public:
+  void InitDataInputs() final {
+    // Generate a random boolean array with `null_probability_`.
+    ::arrow::random::RandomArrayGenerator rag(0);
+    input_array_ = rag.Boolean(num_values_, /*true_probability=*/0.5, 
null_probability_);
+    valid_bits_ = input_array_->null_bitmap_data();
+
+    // Arrow uses a bitmap representation for boolean arrays,
+    // so, we uses this as "total_size" for the benchmark.
+    total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
+
+    values_.reserve(num_values_);
+    const auto& boolean_array = static_cast<const 
::arrow::BooleanArray&>(*input_array_);
+    for (int64_t i = 0; i < boolean_array.length(); i++) {
+      values_.push_back(boolean_array.Value(i));
+    }
+  }
+
+  typename EncodingTraits<BooleanType>::Accumulator CreateAccumulator() final {
+    return typename EncodingTraits<BooleanType>::Accumulator();
+  }
+
+  void DoEncodeLowLevel() final { ParquetException::NYI(); }
+
+  void DecodeArrowWithNullDenseBenchmark(benchmark::State& state);
+
+ protected:
+  void DoEncodeArrowImpl(Encoding::type encoding) {
+    auto encoder = MakeTypedEncoder<BooleanType>(encoding);
+    encoder->Put(*input_array_);
+    buffer_ = encoder->FlushValues();
+  }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoderImpl(
+      Encoding::type encoding) const {
+    auto decoder = MakeTypedDecoder<BooleanType>(encoding);
+    decoder->SetData(num_values_, buffer_->data(), 
static_cast<int>(buffer_->size()));
+    return decoder;
+  }
+
+ protected:
+  double null_probability_ = 0.0;
+};
+
+void BenchmarkDecodeArrowBoolean::DecodeArrowWithNullDenseBenchmark(
+    benchmark::State& state) {
+  // Change null_probability
+  null_probability_ = static_cast<double>(state.range(1)) / 10000;
+  InitDataInputs();
+  this->DoEncodeArrow();
+  int num_values_with_nulls = this->num_values_;
+
+  for (auto _ : state) {
+    auto decoder = this->InitializeDecoder();
+    auto acc = this->CreateAccumulator();
+    decoder->DecodeArrow(
+        num_values_with_nulls,
+        /*null_count=*/static_cast<int>(this->input_array_->null_count()),
+        this->valid_bits_, 0, &acc);
+  }
+  state.SetBytesProcessed(state.iterations() * 
static_cast<int64_t>(total_size_));
+  state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+class BM_DecodeArrowBooleanPlain : public BenchmarkDecodeArrowBoolean {
+ public:
+  void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::PLAIN); }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
+    return InitializeDecoderImpl(Encoding::PLAIN);
+  }
+};
+
+class BM_DecodeArrowBooleanRle : public BenchmarkDecodeArrowBoolean {
+ public:
+  void DoEncodeArrow() final { DoEncodeArrowImpl(Encoding::RLE); }
+
+  std::unique_ptr<TypedDecoder<BooleanType>> InitializeDecoder() override {
+    return InitializeDecoderImpl(Encoding::RLE);
+  }
+};
+
+static void BooleanWithNullCustomArguments(benchmark::internal::Benchmark* b) {
+  b->ArgsProduct({
+                     benchmark::CreateRange(MIN_RANGE, MAX_RANGE, /*multi=*/4),
+                     {1, 100, 1000, 5000, 10000},
+                 })
+      ->ArgNames({"num_values", "null_in_ten_thousand"});
+}
+
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrow)(benchmark::State& 
state) {
+  DecodeArrowDenseBenchmark(state);
+}
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrow)->Range(MIN_RANGE, 
MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
+(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowNonNull)
+    ->Range(MIN_RANGE, MAX_RANGE);
+// TODO(mwish): RleBoolean not implemented DecodeArrow with null slots yet.
+// BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
+//(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
+// BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanRle, DecodeArrowWithNull)
+//    ->Apply(BooleanWithNullCustomArguments);
+
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
+(benchmark::State& state) { DecodeArrowDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrow)
+    ->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
+(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowNonNull)
+    ->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK_DEFINE_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
+(benchmark::State& state) { DecodeArrowWithNullDenseBenchmark(state); }
+BENCHMARK_REGISTER_F(BM_DecodeArrowBooleanPlain, DecodeArrowWithNull)
+    ->Apply(BooleanWithNullCustomArguments);
+
 }  // namespace parquet

Reply via email to