This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch tpc_preview4 in repository https://gitbox.apache.org/repos/asf/doris.git
commit d2589b7200ce73f6acba765519cec76924c8da5f Author: happenlee <[email protected]> AuthorDate: Wed Dec 10 19:13:32 2025 +0800 plain lz4 null page encode --- be/src/olap/rowset/segment_v2/column_writer.cpp | 32 +++++++++++++++++++++---- be/src/olap/rowset/segment_v2/parsed_page.h | 21 +++++++++++----- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 19409824612..03bb4bfffdb 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -140,10 +140,34 @@ public: Status finish(OwnedSlice* slice) override { // No need to flush, just build the slice from the buffer RETURN_IF_CATCH_EXCEPTION({ - // Create a new OwnedSlice and copy the data - OwnedSlice result(_bitmap_buf.size()); - memcpy(result.data(), _bitmap_buf.data(), _bitmap_buf.size()); - *slice = std::move(result); + // Check if we should compress the data + if (!_bitmap_buf.empty()) { + // Get LZ4 compression codec + BlockCompressionCodec* codec = nullptr; + RETURN_IF_ERROR( + get_block_compression_codec(segment_v2::CompressionTypePB::LZ4, &codec)); + if (codec != nullptr) { + // Compress the data + faststring compressed_buf; + Slice raw_slice(_bitmap_buf.data(), _bitmap_buf.size()); + Status status = codec->compress(raw_slice, &compressed_buf); + if (status.ok()) { + // Use compressed data if compression is successful and reduces size + // if (compressed_buf.size() < _bitmap_buf.size()) { + // Directly build OwnedSlice from compressed_buf to avoid memory copy + *slice = compressed_buf.build(); + return Status::OK(); + // } + } else { + return status; + } + } + } + // // Fallback to uncompressed data if compression fails or doesn't reduce size + // // Create OwnedSlice directly from _bitmap_buf data + // OwnedSlice result(_bitmap_buf.size()); + // memcpy(result.data(), _bitmap_buf.data(), _bitmap_buf.size()); + // *slice = std::move(result); }); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/parsed_page.h b/be/src/olap/rowset/segment_v2/parsed_page.h index b654b73b10d..6137ee5e1f2 100644 --- a/be/src/olap/rowset/segment_v2/parsed_page.h +++ b/be/src/olap/rowset/segment_v2/parsed_page.h @@ -29,6 +29,7 @@ #include "olap/rowset/segment_v2/options.h" #include "olap/rowset/segment_v2/page_decoder.h" #include "olap/rowset/segment_v2/page_handle.h" +#include "util/block_compression.h" #include "util/rle_encoding.h" #include "util/slice.h" @@ -52,15 +53,24 @@ struct ParsedPage { if (null_size > 0) { if (footer.has_new_null_map() && footer.new_null_map()) { - page->null_maps = std::span<uint8_t>((uint8_t*)null_bitmap.data, null_size); + // Get LZ4 compression codec + BlockCompressionCodec* codec = nullptr; + RETURN_IF_ERROR( + get_block_compression_codec(segment_v2::CompressionTypePB::LZ4, &codec)); + if (codec != nullptr) { + // Compress the data + faststring compressed_buf; + page->null_maps.resize(footer.num_values()); + auto tmp_slice = Slice(page->null_maps.data(), page->null_maps.size()); + RETURN_IF_ERROR(codec->decompress(null_bitmap, &tmp_slice)); + } } else { auto null_decoder = RleDecoder<bool>((const uint8_t*)null_bitmap.data, null_size, 1); // Decode all null values into null_maps in advance auto num_rows = footer.num_values(); - page->null_bitmap.resize(num_rows); - null_decoder.get_values((bool*)page->null_bitmap.data(), num_rows); - page->null_maps = std::span<uint8_t>(page->null_bitmap.data(), num_rows); + page->null_maps.resize(num_rows); + null_decoder.get_values((bool*)page->null_maps.data(), num_rows); } } @@ -90,8 +100,7 @@ struct ParsedPage { PageHandle page_handle; - std::span<uint8_t> null_maps; - std::vector<uint8_t> null_bitmap; + std::vector<uint8_t> null_maps; std::unique_ptr<PageDecoder> data_decoder; // ordinal of the first value in this page --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
