mapleFU commented on code in PR #38067:
URL: https://github.com/apache/arrow/pull/38067#discussion_r1369004030
##########
cpp/src/arrow/util/compression_benchmark.cc:
##########
@@ -175,27 +206,56 @@ static void ReferenceStreamingDecompression(
StreamingDecompression(COMPRESSION, data, state);
}
+template <Compression::type COMPRESSION>
+static void ReferenceDecompression(
+ benchmark::State& state) { // NOLINT non-const
reference
+ auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB
+
+ auto codec = *Codec::Create(COMPRESSION);
+
+ std::vector<uint8_t> compressed_data;
+ ARROW_UNUSED(NonStreamingCompress(codec.get(), data, &compressed_data));
+ state.counters["ratio"] =
+ static_cast<double>(data.size()) /
static_cast<double>(compressed_data.size());
+
+ std::vector<uint8_t> decompressed_data(data);
+ while (state.KeepRunning()) {
+ auto result = codec->Decompress(compressed_data.size(),
compressed_data.data(),
+ decompressed_data.size(),
decompressed_data.data());
+ ARROW_CHECK(result.ok());
+ ARROW_CHECK(*result == static_cast<int64_t>(decompressed_data.size()));
+ }
+ state.SetBytesProcessed(state.iterations() * data.size());
+}
+
#ifdef ARROW_WITH_ZLIB
BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::GZIP);
BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::GZIP);
#endif
#ifdef ARROW_WITH_BROTLI
BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::BROTLI);
BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::BROTLI);
#endif
#ifdef ARROW_WITH_ZSTD
BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::ZSTD);
BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::ZSTD);
#endif
#ifdef ARROW_WITH_LZ4
BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4_FRAME);
+BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4_FRAME);
BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4_FRAME);
+BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4_FRAME);
Review Comment:
> It seems that Parquet doesn't use LZ4_FRAME
Aha I remember parquet-mr first implement LZ4. And arrow implement a
different version ( LZ4_FRAME ). `LZ4` stores an extra-length here.
Maybe https://github.com/apache/parquet-format/pull/168 helps
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]