emkornfield commented on a change in pull request #7175:
URL: https://github.com/apache/arrow/pull/7175#discussion_r427457149
##########
File path: cpp/src/parquet/arrow/reader_writer_benchmark.cc
##########
@@ -95,15 +97,37 @@ void SetBytesProcessed(::benchmark::State& state) {
state.SetBytesProcessed(bytes_processed);
}
+constexpr int64_t kAlternatingOrNa = -1;
+
+template <typename T>
+std::vector<T> RandomVector(int64_t true_percentage, int64_t vector_size,
+ const std::array<T, 2>& sample_values) {
+ std::vector<T> values(BENCHMARK_SIZE, {});
+ if (true_percentage == kAlternatingOrNa) {
+ int n = {0};
+ std::generate(values.begin(), values.end(), [&n] { return n++ % 2; });
+ } else {
+ std::default_random_engine rng(500);
+ double true_probability = static_cast<double>(true_percentage) / 100.0;
+ std::bernoulli_distribution dist(true_probability);
+ std::generate(values.begin(), values.end(), [&] { return
sample_values[dist(rng)]; });
+ }
+ return values;
+}
+
template <typename ParquetType>
std::shared_ptr<::arrow::Table> TableFromVector(
- const std::vector<typename ParquetType::c_type>& vec, bool nullable) {
+ const std::vector<typename ParquetType::c_type>& vec, bool nullable,
+ int64_t null_percentage = kAlternatingOrNa) {
+ if (!nullable) {
+ ARROW_CHECK_EQ(null_percentage, kAlternatingOrNa);
+ }
std::shared_ptr<::arrow::DataType> type =
std::make_shared<ArrowType<ParquetType>>();
NumericBuilder<ArrowType<ParquetType>> builder;
if (nullable) {
- std::vector<uint8_t> valid_bytes(BENCHMARK_SIZE, 0);
- int n = {0};
- std::generate(valid_bytes.begin(), valid_bytes.end(), [&n] { return n++ %
2; });
+ // Note true values select index 1 of sample_values
+ auto valid_bytes =
RandomVector<uint8_t>(/*true_percengate=*/null_percentage,
Review comment:
fixed
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]