wgtmac commented on code in PR #45085: URL: https://github.com/apache/arrow/pull/45085#discussion_r1906833939
########## cpp/src/parquet/arrow/size_stats_benchmark.cc: ########## @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "benchmark/benchmark.h" + +#include <cstdint> +#include <numeric> + +#include "parquet/arrow/writer.h" +#include "parquet/file_reader.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/properties.h" + +#include "arrow/array.h" +#include "arrow/io/buffered.h" +#include "arrow/io/memory.h" +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" + +namespace parquet::benchmark { + +// This should result in multiple pages for most primitive types +constexpr int64_t kBenchmarkSize = 1024 * 1024; +constexpr double kNullProbability = 0.5; Review Comment: kNullProbability = 0.95 ``` ------------------------------------------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------------------------------------------------------------------------ BM_WritePrimitiveColumn<SizeStatisticsLevel::None, ::arrow::Int64Type> 61819253 ns 61819750 ns 20 bytes_per_second=131.43Mi/s items_per_second=16.9618M/s output_size=546.091k page_index_size=33 BM_WritePrimitiveColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::Int64Type> 55140742 ns 54834400 ns 10 bytes_per_second=148.173Mi/s items_per_second=19.1226M/s output_size=546.107k page_index_size=33 BM_WritePrimitiveColumn<SizeStatisticsLevel::PageAndColumnChunk, ::arrow::Int64Type> 53924121 ns 53887400 ns 10 bytes_per_second=150.777Mi/s items_per_second=19.4586M/s output_size=546.121k page_index_size=47 BM_WritePrimitiveColumn<SizeStatisticsLevel::None, ::arrow::StringType> 51773791 ns 51774500 ns 10 bytes_per_second=89.4236Mi/s items_per_second=20.2527M/s output_size=864.083k page_index_size=30 BM_WritePrimitiveColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::StringType> 65489058 ns 65488500 ns 10 bytes_per_second=70.6973Mi/s items_per_second=16.0116M/s output_size=864.103k page_index_size=30 BM_WritePrimitiveColumn<SizeStatisticsLevel::PageAndColumnChunk, ::arrow::StringType> 65241288 ns 65241300 ns 10 bytes_per_second=70.9652Mi/s items_per_second=16.0723M/s output_size=864.122k page_index_size=44 BM_WriteListColumn<SizeStatisticsLevel::None, ::arrow::Int64Type> 72174783 ns 72174900 ns 10 bytes_per_second=118.289Mi/s items_per_second=14.5283M/s output_size=625.915k page_index_size=34 BM_WriteListColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::Int64Type> 102759675 ns 102760300 ns 10 bytes_per_second=83.0817Mi/s items_per_second=10.2041M/s output_size=625.937k page_index_size=34 BM_WriteListColumn<SizeStatisticsLevel::PageAndColumnChunk, ::arrow::Int64Type> 105034546 ns 105034000 ns 10 bytes_per_second=81.2832Mi/s items_per_second=9.98321M/s output_size=625.957k page_index_size=54 BM_WriteListColumn<SizeStatisticsLevel::None, ::arrow::StringType> 92049333 ns 92049200 ns 10 bytes_per_second=54.779Mi/s items_per_second=11.3915M/s output_size=944.123k page_index_size=31 BM_WriteListColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::StringType> 122477704 ns 122478100 ns 10 bytes_per_second=41.1695Mi/s items_per_second=8.56133M/s output_size=944.149k page_index_size=31 BM_WriteListColumn<SizeStatisticsLevel::PageAndColumnChunk, ::arrow::StringType> 121217775 ns 121217000 ns 10 bytes_per_second=41.5978Mi/s items_per_second=8.6504M/s output_size=944.174k page_index_size=51 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
