This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch tpc_preview4 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 9209c20f8737898c39ad047f145a44add474653e Author: happenlee <[email protected]> AuthorDate: Wed Dec 10 16:49:37 2025 +0800 change the null map encode --- be/src/olap/rowset/segment_v2/column_writer.cpp | 87 +++++++++++++++++++++++-- be/src/olap/rowset/segment_v2/column_writer.h | 4 +- be/src/olap/rowset/segment_v2/parsed_page.h | 19 ++++-- gensrc/proto/segment_v2.proto | 2 + 4 files changed, 98 insertions(+), 14 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index b165b2b766a..19409824612 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -22,6 +22,7 @@ #include <algorithm> #include <filesystem> #include <memory> +#include <vector> #include "common/config.h" #include "common/logging.h" @@ -52,7 +53,28 @@ namespace doris::segment_v2 { #include "common/compile_check_begin.h" -class NullBitmapBuilder { +// Abstract base class for null bitmap builders +class NullBitmapBuilderBase { +public: + virtual ~NullBitmapBuilderBase() = default; + + // Add a run of 'run' values, all equal to 'value' + virtual void add_run(bool value, size_t run) = 0; + + // Returns whether the building nullmap contains any null values + virtual bool has_null() const = 0; + + // Finish building the null bitmap and write the result to 'slice' + virtual Status finish(OwnedSlice* slice) = 0; + + // Reset the builder to its initial state + virtual void reset() = 0; + + // Return the current size of the buffer in bytes + virtual uint64_t size() = 0; +}; + +class NullBitmapBuilder : public NullBitmapBuilderBase { public: NullBitmapBuilder() : _has_null(false), _bitmap_buf(512), _rle_encoder(&_bitmap_buf, 1) {} @@ -61,26 +83,26 @@ public: _bitmap_buf(BitmapSize(reserve_bits)), _rle_encoder(&_bitmap_buf, 1) {} - void add_run(bool value, size_t run) { + void add_run(bool value, size_t run) override { _has_null |= value; _rle_encoder.Put(value, run); } // Returns whether the building nullmap contains nullptr - bool has_null() const { return _has_null; } + bool has_null() const override { return _has_null; } - Status finish(OwnedSlice* slice) { + Status finish(OwnedSlice* slice) override { _rle_encoder.Flush(); RETURN_IF_CATCH_EXCEPTION({ *slice = _bitmap_buf.build(); }); return Status::OK(); } - void reset() { + void reset() override { _has_null = false; _rle_encoder.Clear(); } - uint64_t size() { return _bitmap_buf.size(); } + uint64_t size() override { return _bitmap_buf.size(); } private: bool _has_null; @@ -88,6 +110,56 @@ private: RleEncoder<bool> _rle_encoder; }; +// PlainNullBitmapBuilder uses std::vector<uint8_t> to store null values directly without RLE encoding +// Each uint8_t represents a single null value: 0 = non-null, 1 = null +class PlainNullBitmapBuilder : public NullBitmapBuilderBase { +public: + PlainNullBitmapBuilder() : _has_null(false), _bitmap_buf() {} + + explicit PlainNullBitmapBuilder(size_t reserve_bits) + : _has_null(false), + _bitmap_buf(reserve_bits, 0) {} // Reserve enough bytes for the given number of bits + + void add_run(bool value, size_t run) override { + _has_null |= value; + const uint8_t val = value ? 1 : 0; + + // Ensure the buffer has enough bytes to hold all values + const size_t current_size = _bitmap_buf.size(); + _bitmap_buf.resize(current_size + run, 0); + + if (val) { + // Fill the new bytes with the value + std::fill(_bitmap_buf.begin() + current_size, _bitmap_buf.end(), val); + } + } + + // Returns whether the building nullmap contains nullptr + bool has_null() const override { return _has_null; } + + Status finish(OwnedSlice* slice) override { + // No need to flush, just build the slice from the buffer + RETURN_IF_CATCH_EXCEPTION({ + // Create a new OwnedSlice and copy the data + OwnedSlice result(_bitmap_buf.size()); + memcpy(result.data(), _bitmap_buf.data(), _bitmap_buf.size()); + *slice = std::move(result); + }); + return Status::OK(); + } + + void reset() override { + _has_null = false; + _bitmap_buf.clear(); + } + + uint64_t size() override { return _bitmap_buf.size(); } + +private: + bool _has_null; + std::vector<uint8_t> _bitmap_buf; +}; + inline ScalarColumnWriter* get_null_writer(const ColumnWriterOptions& opts, io::FileWriter* file_writer, uint32_t id) { if (!opts.meta->is_nullable()) { @@ -458,7 +530,7 @@ Status ScalarColumnWriter::init() { _ordinal_index_builder = std::make_unique<OrdinalIndexWriter>(); // create null bitmap builder if (is_nullable()) { - _null_bitmap_builder = std::make_unique<NullBitmapBuilder>(); + _null_bitmap_builder = std::make_unique<PlainNullBitmapBuilder>(); } if (_opts.need_zone_map) { RETURN_IF_ERROR(ZoneMapIndexWriter::create(get_field(), _zone_map_index_builder)); @@ -743,6 +815,7 @@ Status ScalarColumnWriter::finish_current_page() { data_page_footer->set_first_ordinal(_first_rowid); data_page_footer->set_num_values(_next_rowid - _first_rowid); data_page_footer->set_nullmap_size(cast_set<uint32_t>(nullmap.slice().size)); + data_page_footer->set_new_null_map(true); if (_new_page_callback != nullptr) { _new_page_callback->put_extra_info_in_page(data_page_footer); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 9e39ef45bb4..89d544ea2e9 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -92,7 +92,9 @@ struct ColumnWriterOptions { class BitmapIndexWriter; class EncodingInfo; +class NullBitmapBuilderBase; class NullBitmapBuilder; +class PlainNullBitmapBuilder; class OrdinalIndexWriter; class PageBuilder; class BloomFilterIndexWriter; @@ -268,7 +270,7 @@ private: private: std::unique_ptr<PageBuilder> _page_builder; - std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder; + std::unique_ptr<NullBitmapBuilderBase> _null_bitmap_builder; ColumnWriterOptions _opts; diff --git a/be/src/olap/rowset/segment_v2/parsed_page.h b/be/src/olap/rowset/segment_v2/parsed_page.h index 7ef20adecfe..b654b73b10d 100644 --- a/be/src/olap/rowset/segment_v2/parsed_page.h +++ b/be/src/olap/rowset/segment_v2/parsed_page.h @@ -51,11 +51,17 @@ struct ParsedPage { auto null_bitmap = Slice(body.data + body.size - null_size, null_size); if (null_size > 0) { - auto null_decoder = RleDecoder<bool>((const uint8_t*)null_bitmap.data, null_size, 1); - // Decode all null values into null_maps in advance - auto num_rows = footer.num_values(); - page->null_maps.resize(num_rows); - null_decoder.get_values((bool*)page->null_maps.data(), num_rows); + if (footer.has_new_null_map() && footer.new_null_map()) { + page->null_maps = std::span<uint8_t>((uint8_t*)null_bitmap.data, null_size); + } else { + auto null_decoder = + RleDecoder<bool>((const uint8_t*)null_bitmap.data, null_size, 1); + // Decode all null values into null_maps in advance + auto num_rows = footer.num_values(); + page->null_bitmap.resize(num_rows); + null_decoder.get_values((bool*)page->null_bitmap.data(), num_rows); + page->null_maps = std::span<uint8_t>(page->null_bitmap.data(), num_rows); + } } Slice data_slice(body.data, body.size - null_size); @@ -84,7 +90,8 @@ struct ParsedPage { PageHandle page_handle; - std::vector<uint8_t> null_maps; + std::span<uint8_t> null_maps; + std::vector<uint8_t> null_bitmap; std::unique_ptr<PageDecoder> data_decoder; // ordinal of the first value in this page diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 653d565d546..535c270d40b 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -74,6 +74,8 @@ message DataPageFooterPB { // only for array column // Save the offset of next page optional uint64 next_array_item_ordinal = 4; + + optional bool new_null_map = 5; } message IndexPageFooterPB { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
