jianxind commented on a change in pull request #7213:
URL: https://github.com/apache/arrow/pull/7213#discussion_r427133854
##########
File path: cpp/src/parquet/encoding_benchmark.cc
##########
@@ -199,6 +200,130 @@ static void BM_PlainDecodingFloat(benchmark::State&
state) {
BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE);
+template <typename ParquetType>
+struct BM_SpacedEncodingTraits;
+
+template <>
+struct BM_SpacedEncodingTraits<BooleanType> {
+ // Leverage UInt8 vector array data for Boolean, the input src of PutSpaced
is bool*
+ using ArrowType = ::arrow::UInt8Type;
+ using ArrayType = ::arrow::UInt8Array;
+ using CType = bool;
+};
+
+template <>
+struct BM_SpacedEncodingTraits<FloatType> {
+ using ArrowType = typename EncodingTraits<FloatType>::ArrowType;
+ using ArrayType = typename arrow::TypeTraits<ArrowType>::ArrayType;
+ using CType = typename FloatType::c_type;
+};
+
+template <>
+struct BM_SpacedEncodingTraits<DoubleType> {
+ using ArrowType = typename EncodingTraits<DoubleType>::ArrowType;
+ using ArrayType = typename arrow::TypeTraits<ArrowType>::ArrayType;
+ using CType = typename DoubleType::c_type;
+};
+
+static void BM_PlainSpacedArgs(benchmark::internal::Benchmark* bench) {
+ static const auto BM_kPlainSpacedSize =
+ arrow::internal::CpuInfo::GetInstance()->CacheSize(
+ arrow::internal::CpuInfo::L1_CACHE);
+
+ bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/1});
+ bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/10});
+ bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/50});
+ bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/90});
+ bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/99});
+}
+
+template <typename ParquetType>
+static void BM_PlainEncodingSpaced(benchmark::State& state) {
+ using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
+ using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
+ using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
+
+ const auto num_values = state.range(0);
+ const double null_percent = static_cast<double>(state.range(1)) / 100.0;
+
+ auto rand = ::arrow::random::RandomArrayGenerator(1923);
+ const auto array = rand.Numeric<ArrowType>(num_values, -100, 100,
null_percent);
+ const auto valid_bits = array->null_bitmap_data();
+ const auto array_actual =
arrow::internal::checked_pointer_cast<ArrayType>(array);
+
+ auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN);
+ for (auto _ : state) {
+ // Cast only happens for special BooleanType as it use UInt8 for the array
data to
+ // match a bool* input to PutSpaced.
+ encoder->PutSpaced(reinterpret_cast<const
CType*>(array_actual->raw_values()),
+ num_values, valid_bits, 0);
+ encoder->FlushValues();
+ }
+ state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType));
+}
+
+static void BM_PlainEncodingSpacedBoolean(benchmark::State& state) {
+ BM_PlainEncodingSpaced<BooleanType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedBoolean)->Apply(BM_PlainSpacedArgs);
+
+static void BM_PlainEncodingSpacedFloat(benchmark::State& state) {
+ BM_PlainEncodingSpaced<FloatType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedFloat)->Apply(BM_PlainSpacedArgs);
+
+static void BM_PlainEncodingSpacedDouble(benchmark::State& state) {
+ BM_PlainEncodingSpaced<DoubleType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedDouble)->Apply(BM_PlainSpacedArgs);
+
+template <typename ParquetType>
+static void BM_PlainDecodingSpaced(benchmark::State& state) {
+ using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
+ using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
+ using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
+
+ const auto num_values = state.range(0);
+ const double null_percent = static_cast<double>(state.range(1)) / 100.0;
+
+ auto rand = ::arrow::random::RandomArrayGenerator(1923);
+ const auto array = rand.Numeric<ArrowType>(num_values, -100, 100,
null_percent);
+ const auto valid_bits = array->null_bitmap_data();
+ const auto null_count = array->null_count();
+ const auto array_actual =
arrow::internal::checked_pointer_cast<ArrayType>(array);
+
+ auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN);
+ // Cast only happens for special BooleanType as it use UInt8 for the array
data to match
+ // a bool* input to PutSpaced.
Review comment:
I just added a static_assert to guarantee the size match between the cast
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]