wgtmac commented on code in PR #45085:
URL: https://github.com/apache/arrow/pull/45085#discussion_r1906833939


##########
cpp/src/parquet/arrow/size_stats_benchmark.cc:
##########
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+#include <numeric>
+
+#include "parquet/arrow/writer.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+#include "arrow/array.h"
+#include "arrow/io/buffered.h"
+#include "arrow/io/memory.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+
+namespace parquet::benchmark {
+
+// This should result in multiple pages for most primitive types
+constexpr int64_t kBenchmarkSize = 1024 * 1024;
+constexpr double kNullProbability = 0.5;

Review Comment:
   kNullProbability = 0.95
   
   ```
   
------------------------------------------------------------------------------------------------------------------------------------------------
   Benchmark                                                                    
                  Time             CPU   Iterations UserCounters...
   
------------------------------------------------------------------------------------------------------------------------------------------------
   BM_WritePrimitiveColumn<SizeStatisticsLevel::None, ::arrow::Int64Type>       
           61819253 ns     61819750 ns           20 bytes_per_second=131.43Mi/s 
items_per_second=16.9618M/s output_size=546.091k page_index_size=33
   BM_WritePrimitiveColumn<SizeStatisticsLevel::ColumnChunk, 
::arrow::Int64Type>           55140742 ns     54834400 ns           10 
bytes_per_second=148.173Mi/s items_per_second=19.1226M/s output_size=546.107k 
page_index_size=33
   BM_WritePrimitiveColumn<SizeStatisticsLevel::PageAndColumnChunk, 
::arrow::Int64Type>    53924121 ns     53887400 ns           10 
bytes_per_second=150.777Mi/s items_per_second=19.4586M/s output_size=546.121k 
page_index_size=47
   BM_WritePrimitiveColumn<SizeStatisticsLevel::None, ::arrow::StringType>      
           51773791 ns     51774500 ns           10 
bytes_per_second=89.4236Mi/s items_per_second=20.2527M/s output_size=864.083k 
page_index_size=30
   BM_WritePrimitiveColumn<SizeStatisticsLevel::ColumnChunk, 
::arrow::StringType>          65489058 ns     65488500 ns           10 
bytes_per_second=70.6973Mi/s items_per_second=16.0116M/s output_size=864.103k 
page_index_size=30
   BM_WritePrimitiveColumn<SizeStatisticsLevel::PageAndColumnChunk, 
::arrow::StringType>   65241288 ns     65241300 ns           10 
bytes_per_second=70.9652Mi/s items_per_second=16.0723M/s output_size=864.122k 
page_index_size=44
   BM_WriteListColumn<SizeStatisticsLevel::None, ::arrow::Int64Type>            
           72174783 ns     72174900 ns           10 
bytes_per_second=118.289Mi/s items_per_second=14.5283M/s output_size=625.915k 
page_index_size=34
   BM_WriteListColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::Int64Type>     
          102759675 ns    102760300 ns           10 
bytes_per_second=83.0817Mi/s items_per_second=10.2041M/s output_size=625.937k 
page_index_size=34
   BM_WriteListColumn<SizeStatisticsLevel::PageAndColumnChunk, 
::arrow::Int64Type>        105034546 ns    105034000 ns           10 
bytes_per_second=81.2832Mi/s items_per_second=9.98321M/s output_size=625.957k 
page_index_size=54
   BM_WriteListColumn<SizeStatisticsLevel::None, ::arrow::StringType>           
           92049333 ns     92049200 ns           10 bytes_per_second=54.779Mi/s 
items_per_second=11.3915M/s output_size=944.123k page_index_size=31
   BM_WriteListColumn<SizeStatisticsLevel::ColumnChunk, ::arrow::StringType>    
          122477704 ns    122478100 ns           10 
bytes_per_second=41.1695Mi/s items_per_second=8.56133M/s output_size=944.149k 
page_index_size=31
   BM_WriteListColumn<SizeStatisticsLevel::PageAndColumnChunk, 
::arrow::StringType>       121217775 ns    121217000 ns           10 
bytes_per_second=41.5978Mi/s items_per_second=8.6504M/s output_size=944.174k 
page_index_size=51
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to