This is an automated email from the ASF dual-hosted git repository.
yangzhg pushed a commit to branch support_batch_delete_in_fe
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/support_batch_delete_in_fe by
this push:
new f2b2a0a Add delete bitmap index and base operation (#4242)
f2b2a0a is described below
commit f2b2a0a70bc6d1fed3f5d4f853ede10b35a53f92
Author: ZhangYu0123 <[email protected]>
AuthorDate: Tue Aug 4 14:29:33 2020 +0800
Add delete bitmap index and base operation (#4242)
* add delete bitmap index
* delete index for batch delete
* delete index for batch delete
* add delete bitmap index
* add delete bitmap index
---
be/src/olap/CMakeLists.txt | 1 +
be/src/olap/delete_bitmap_index.cpp | 72 +++++++++++++
be/src/olap/delete_bitmap_index.h | 118 +++++++++++++++++++++
be/src/olap/row_block.cpp | 2 +
be/src/olap/row_block.h | 8 ++
be/src/olap/row_block2.cpp | 12 ++-
be/src/olap/row_block2.h | 8 ++
be/src/olap/row_cursor.cpp | 3 +-
be/src/olap/row_cursor.h | 7 ++
be/src/olap/rowset/segment_v2/segment.cpp | 32 ++++++
be/src/olap/rowset/segment_v2/segment.h | 15 +++
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 22 ++++
be/src/olap/rowset/segment_v2/segment_writer.cpp | 14 +++
be/src/olap/rowset/segment_v2/segment_writer.h | 3 +
be/test/olap/CMakeLists.txt | 1 +
be/test/olap/delete_bitmap_index_test.cpp | 79 ++++++++++++++
docs/zh-CN/internal/doris_storage_optimization.md | 13 +--
gensrc/proto/segment_v2.proto | 10 ++
18 files changed, 409 insertions(+), 11 deletions(-)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 3705f59..99da5ef 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -71,6 +71,7 @@ add_library(Olap STATIC
data_dir.cpp
row.cpp
short_key_index.cpp
+ delete_bitmap_index.cpp
snapshot_manager.cpp
stream_index_common.cpp
stream_index_reader.cpp
diff --git a/be/src/olap/delete_bitmap_index.cpp
b/be/src/olap/delete_bitmap_index.cpp
new file mode 100644
index 0000000..92f9e02
--- /dev/null
+++ b/be/src/olap/delete_bitmap_index.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/delete_bitmap_index.h"
+
+#include <string>
+#include "gutil/strings/substitute.h"
+
+using strings::Substitute;
+
+namespace doris {
+
+Status DeleteBitmapIndexBuilder::add_delete_item(const uint32_t& _row_count) {
+ _delete_bitmap.add(_row_count);
+ _num_items++;
+ return Status::OK();
+}
+
+Status DeleteBitmapIndexBuilder::finalize(std::vector<Slice>* body,
+ segment_v2::PageFooterPB*
page_footer) {
+ // get the size after serialize
+ _delete_bitmap.runOptimize();
+ uint32_t size = _delete_bitmap.getSizeInBytes(false);
+ // fill in bitmap index page
+ page_footer->set_type(segment_v2::DELETE_INDEX_PAGE);
+ page_footer->set_uncompressed_size(size);
+
+ segment_v2::DeleteIndexFooterPB* footer =
page_footer->mutable_delete_index_page_footer();
+ footer->set_num_items(_num_items);
+ footer->set_content_bytes(size);
+
+ // write bitmap to slice as return
+ _buf.resize(size);
+ _delete_bitmap.write(reinterpret_cast<char*>(_buf.data()), false);
+ body->emplace_back(_buf);
+ return Status::OK();
+}
+
+Status DeleteBitmapIndexDecoder::parse(const Slice& body, const
segment_v2::DeleteIndexFooterPB& footer) {
+ _footer = footer;
+ // check if body size match footer's information
+ if (body.size != (_footer.content_bytes())) {
+ return Status::Corruption(Substitute("Index size not match, need=$0,
real=$1",
+ _footer.content_bytes(),
body.size));
+ }
+ // set index buffer
+ Slice index_data(body.data, _footer.content_bytes());
+ // load delete bitmap
+ _delete_bitmap = Roaring::read(index_data.data, false);
+
+ _parsed = true;
+ return Status::OK();
+}
+
+const Roaring& DeleteBitmapIndexIterator:: get_delete_bitmap() const{
+ return _decoder->get_delete_bitmap();
+}
+}
diff --git a/be/src/olap/delete_bitmap_index.h
b/be/src/olap/delete_bitmap_index.h
new file mode 100644
index 0000000..5d1c553
--- /dev/null
+++ b/be/src/olap/delete_bitmap_index.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <vector>
+#include <vector>
+#include <roaring/roaring.hh>
+
+#include "common/status.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "util/faststring.h"
+#include "util/slice.h"
+
+#include "util/debug_util.h"
+
+namespace doris {
+
+class DeleteBitmapIndexIterator;
+class DeleteBitmapIndexDecoder;
+
+/// This class is a builder which can build delete bitmap index. SegmentWriter
can use it to generate
+/// delete bitmap index page and save it in segment.
+class DeleteBitmapIndexBuilder {
+public:
+ /// Construction function of DeleteBitmapIndexBuilder
+ DeleteBitmapIndexBuilder() : _num_items(0) {
+ }
+
+ /// Add delete item to delete bitmap index
+ Status add_delete_item(const uint32_t& _row_count);
+
+ /// How many bytes are required to serialize this bitmap
+ uint64_t size() {
+ return _delete_bitmap.getSizeInBytes(false);
+ }
+
+ /// When the segment flush, use finalize function to flush index data to
slice to generate index page
+ /// and fill the page footer record meta.
+ Status finalize(std::vector<Slice>* body, segment_v2::PageFooterPB*
footer);
+
+private:
+ /// the number of delete items in delete bitmap index
+ uint32_t _num_items;
+
+ /// roaring bitmap to record rowids of delete items
+ Roaring _delete_bitmap;
+
+ faststring _buf;
+};
+
+/// An Iterator to iterate one delete bitmap index.
+/// Client can use this class to access the bitmap.
+class DeleteBitmapIndexIterator {
+public:
+ /// Construction function of DeleteBitmapIndexBuilder
+ DeleteBitmapIndexIterator(const DeleteBitmapIndexDecoder* decoder)
+ : _decoder(decoder) {}
+
+ /// get const delete bitmap to access delete bitmap record
+ const Roaring& get_delete_bitmap() const;
+
+private:
+ const DeleteBitmapIndexDecoder* _decoder;
+};
+
+/// Used to decode bitmap ordinal to footer and encoded index data.
+/// Usage:
+/// DeleteBitmapIndexDecoder decoder;
+/// decoder.parse(body, footer);
+class DeleteBitmapIndexDecoder {
+public:
+ DeleteBitmapIndexDecoder(bool parsed = false) : _parsed(parsed),
_delete_bitmap() {}
+
+ /// client should assure that body is available when this class is used
+ Status parse(const Slice& body, const segment_v2::DeleteIndexFooterPB&
footer);
+
+ /// The number of delete items in delete bitmap index
+ uint32_t num_items() const {
+ DCHECK(_parsed);
+ return _footer.num_items();
+ }
+
+ /// Get the iterator of DeleteBitmapIndex
+ DeleteBitmapIndexIterator get_iterator() const {
+ DCHECK(_parsed);
+ return {this};
+ }
+
+ /// get const delete bitmap to access delete bitmap record
+ const Roaring& get_delete_bitmap() const { return _delete_bitmap; }
+
+private:
+ bool _parsed;
+
+ // All following fields are only valid after parse has been executed
successfully
+ segment_v2::DeleteIndexFooterPB _footer;
+ Roaring _delete_bitmap;
+};
+
+}
diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp
index 09cf48b..e20d1a4 100644
--- a/be/src/olap/row_block.cpp
+++ b/be/src/olap/row_block.cpp
@@ -52,6 +52,7 @@ OLAPStatus RowBlock::init(const RowBlockInfo& block_info) {
_info = block_info;
_null_supported = block_info.null_supported;
_capacity = _info.row_num;
+ _delete_bitmap = std::unique_ptr<Roaring>(new Roaring());
_compute_layout();
_mem_buf = new char[_mem_buf_bytes];
return OLAP_SUCCESS;
@@ -75,6 +76,7 @@ void RowBlock::clear() {
_pos = 0;
_limit = 0;
_mem_pool->clear();
+ _delete_bitmap.reset();
}
void RowBlock::_compute_layout() {
diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h
index c9d277f..a439338 100644
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@@ -68,6 +68,8 @@ public:
inline void get_row(uint32_t row_index, RowCursor* cursor) const {
cursor->attach(_mem_buf + row_index * _mem_row_bytes);
+ // set current row whether it is deleted
+ cursor->set_is_delete(_delete_bitmap->contains(row_index));
}
template<typename RowType>
@@ -93,6 +95,10 @@ public:
return _mem_pool.get();
}
+ Roaring* get_delete_bitmap() const {
+ return _delete_bitmap.get();
+ }
+
// 重用rowblock之前需调用clear,恢复到init之后的原始状态
void clear();
@@ -139,6 +145,8 @@ private:
std::unique_ptr<MemTracker> _tracker;
std::unique_ptr<MemPool> _mem_pool;
+ // delete bitmap which records deleted rows
+ std::unique_ptr<Roaring> _delete_bitmap;
// 由于内部持有内存资源,所以这里禁止拷贝和赋值
DISALLOW_COPY_AND_ASSIGN(RowBlock);
};
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 1ab8782..66d480e 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -32,7 +32,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t
capacity)
_column_datas(_schema.num_columns(), nullptr),
_column_null_bitmaps(_schema.num_columns(), nullptr),
_pool(new MemPool(&_tracker)),
- _selection_vector(nullptr) {
+ _selection_vector(nullptr),
+ _delete_bitmap(new Roaring()) {
auto bitmap_size = BitmapSize(capacity);
for (auto cid : _schema.column_ids()) {
size_t data_size = _schema.column(cid)->type_info()->size() *
_capacity;
@@ -68,7 +69,8 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper,
RowBlock* dst) {
helper->set_null(cid);
} else {
helper->set_not_null(cid);
- helper->set_field_content_shallow(cid,
+ helper->set_field_content_shallow(
+ cid,
reinterpret_cast<const
char*>(column_block(cid).cell_ptr(row_idx)));
}
}
@@ -77,11 +79,13 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper,
RowBlock* dst) {
uint16_t row_idx = _selection_vector[i];
dst->get_row(i, helper);
helper->set_not_null(cid);
- helper->set_field_content_shallow(cid,
- reinterpret_cast<const
char*>(column_block(cid).cell_ptr(row_idx)));
+ helper->set_field_content_shallow(
+ cid, reinterpret_cast<const
char*>(column_block(cid).cell_ptr(row_idx)));
}
}
}
+
+ dst->get_delete_bitmap()->swap(*(_delete_bitmap.get()));
// swap MemPool to copy string content
dst->mem_pool()->exchange_data(_pool.get());
dst->set_pos(0);
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index 6716592..c22eca8 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -59,6 +59,7 @@ public:
// all previously returned ColumnBlocks are invalidated after clear(),
accessing them
// will result in undefined behavior.
void clear() {
+ _delete_bitmap.reset();
_num_rows = 0;
_pool->clear();
_selected_size = _capacity;
@@ -110,6 +111,11 @@ public:
_delete_state = delete_state;
}
+ // get delete bitmap
+ Roaring* get_delete_bitmap() {
+ return _delete_bitmap.get();
+ }
+
private:
Schema _schema;
size_t _capacity;
@@ -133,6 +139,8 @@ private:
// block delete state
DelCondSatisfied _delete_state;
+ // delete bit map
+ std::unique_ptr<Roaring> _delete_bitmap;
};
// Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference
diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp
index 9fad7ba..22533de 100644
--- a/be/src/olap/row_cursor.cpp
+++ b/be/src/olap/row_cursor.cpp
@@ -30,7 +30,8 @@ using std::vector;
namespace doris {
RowCursor::RowCursor() :
_fixed_len(0),
- _variable_len(0) {}
+ _variable_len(0),
+ _is_delete(false) {}
RowCursor::~RowCursor() {
delete [] _owned_fixed_buf;
diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h
index eb4e537..fb77fe7 100644
--- a/be/src/olap/row_cursor.h
+++ b/be/src/olap/row_cursor.h
@@ -74,6 +74,10 @@ public:
column_schema(index)->to_index(&dst_cell, cell(index));
}
+ void set_is_delete(bool is_delete) { _is_delete = is_delete; }
+
+ bool is_delete() { return _is_delete; }
+
// deep copy field content (ignore null-byte)
void set_field_content(size_t index, const char* buf, MemPool* mem_pool) {
char* dest = cell_ptr(index);
@@ -166,6 +170,9 @@ private:
char* _variable_buf = nullptr;
size_t _variable_len;
+ // current row is deleted
+ bool _is_delete;
+
DISALLOW_COPY_AND_ASSIGN(RowCursor);
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 002f056..b132065 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -80,6 +80,8 @@ Status Segment::new_iterator(const Schema& schema,
}
RETURN_IF_ERROR(_load_index());
+ RETURN_IF_ERROR(_load_delete_index());
+
iter->reset(new SegmentIterator(this->shared_from_this(), schema));
iter->get()->init(read_options);
return Status::OK();
@@ -157,6 +159,36 @@ Status Segment::_load_index() {
});
}
+Status Segment::_load_delete_index() {
+ return _delete_index_once.call([this] {
+ if (!_footer.has_delete_index_page()) {
+ _delete_index_decoder.reset(new DeleteBitmapIndexDecoder(true));
+ Status::OK();
+ }
+
+ // read and parse delete key index page
+ std::unique_ptr<fs::ReadableBlock> rblock;
+ fs::BlockManager* block_mgr = fs::fs_util::block_manager();
+ RETURN_IF_ERROR(block_mgr->open_block(_fname, &rblock));
+
+ PageReadOptions opts;
+ opts.rblock = rblock.get();
+ opts.page_pointer = PagePointer(_footer.delete_index_page());
+ opts.codec = nullptr; // delete key index page uses NO_COMPRESSION for
now
+ OlapReaderStatistics tmp_stats;
+ opts.stats = &tmp_stats;
+
+ Slice body;
+ PageFooterPB footer;
+ RETURN_IF_ERROR(PageIO::read_and_decompress_page(opts,
&_sk_index_handle, &body, &footer));
+ DCHECK_EQ(footer.type(), SHORT_KEY_PAGE);
+ DCHECK(footer.has_delete_index_page_footer());
+
+ _delete_index_decoder.reset(new DeleteBitmapIndexDecoder());
+ return _delete_index_decoder->parse(body,
footer.delete_index_page_footer());
+ });
+}
+
Status Segment::_create_column_readers() {
for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) {
auto& column_pb = _footer.columns(ordinal);
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 78539fa..d279489 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -28,6 +28,8 @@
#include "olap/iterators.h"
#include "olap/rowset/segment_v2/page_handle.h"
#include "olap/short_key_index.h"
+#include "olap/delete_bitmap_index.h"
+
#include "olap/tablet_schema.h"
#include "util/faststring.h"
#include "util/once.h"
@@ -95,6 +97,11 @@ public:
return _sk_index_decoder->upper_bound(key);
}
+ DeleteBitmapIndexIterator delete_index_iterator() const {
+ DCHECK(_delete_index_once.has_called() &&
_delete_index_once.stored_result().ok());
+ return _delete_index_decoder->get_iterator();
+ }
+
// This will return the last row block in this segment.
// NOTE: Before call this function , client should assure that
// this segment is not empty.
@@ -120,6 +127,8 @@ private:
// May be called multiple times, subsequent calls will no op.
Status _load_index();
+ Status _load_delete_index();
+
private:
friend class SegmentIterator;
std::string _fname;
@@ -140,10 +149,16 @@ private:
// used to guarantee that short key index will be loaded at most once in a
thread-safe way
DorisCallOnce<Status> _load_index_once;
+
+ // used to guarantee that delete index will be loaded at most once in a
thread-safe way
+ DorisCallOnce<Status> _delete_index_once;
// used to hold short key index page in memory
PageHandle _sk_index_handle;
// short key index decoder
std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder;
+ // delete index decoder
+ std::unique_ptr<DeleteBitmapIndexDecoder> _delete_index_decoder;
+
};
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index ec8bb47..b039e81 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -561,6 +561,28 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
i += range_size;
}
}
+
+ // phase 4: read delete index, fill in the row whether is delete.
+ {
+ Roaring* current_bitmap = block->get_delete_bitmap();
+
+ // fetch delete index
+ const Roaring& delete_bitmap =
_segment->delete_index_iterator().get_delete_bitmap();
+
+ const uint16_t* sv = block->selection_vector();
+ const uint16_t sv_size = block->selected_size();
+ uint16_t i = 0;
+ // check the delete rows and fill in the current_bitmap
+ // which use i in selection_vector as rowid
+ while (i < sv_size) {
+
+ if(delete_bitmap.contains(_block_rowids[sv[i]])) {
+ current_bitmap->add(i);
+ }
+ i++;
+ }
+ }
+
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a977407..a9f7aba 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -77,6 +77,7 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec
__attribute__((unused))
_column_writers.push_back(std::move(writer));
}
_index_builder.reset(new ShortKeyIndexBuilder(_segment_id,
_opts.num_rows_per_block));
+ _delete_bitmap_builder.reset(new DeleteBitmapIndexBuilder());
return Status::OK();
}
@@ -111,6 +112,7 @@ uint64_t SegmentWriter::estimate_segment_size() {
size += column_writer->estimate_buffer_size();
}
size += _index_builder->size();
+ size += _delete_bitmap_builder->size();
return size;
}
@@ -125,6 +127,7 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size,
uint64_t* index_size
RETURN_IF_ERROR(_write_bitmap_index());
RETURN_IF_ERROR(_write_bloom_filter_index());
RETURN_IF_ERROR(_write_short_key_index());
+ RETURN_IF_ERROR(_write_delete_index());
*index_size = _wblock->bytes_appended() - index_offset;
RETURN_IF_ERROR(_write_footer());
RETURN_IF_ERROR(_wblock->finalize());
@@ -180,6 +183,17 @@ Status SegmentWriter::_write_short_key_index() {
return Status::OK();
}
+Status SegmentWriter::_write_delete_index() {
+ std::vector<Slice> body;
+ PageFooterPB footer;
+ RETURN_IF_ERROR(_delete_bitmap_builder->finalize(&body, &footer));
+ PagePointer pp;
+ // delete index page is not compressed right now
+ RETURN_IF_ERROR(PageIO::write_page(_wblock, body, footer, &pp));
+ pp.to_proto(_footer.mutable_delete_index_page());
+ return Status::OK();
+}
+
Status SegmentWriter::_write_footer() {
_footer.set_num_rows(_row_count);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 4703f68..8eee044 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -25,6 +25,7 @@
#include "common/status.h" // Status
#include "gen_cpp/segment_v2.pb.h"
#include "gutil/macros.h"
+#include "olap/delete_bitmap_index.h"
namespace doris {
@@ -75,6 +76,7 @@ private:
Status _write_bitmap_index();
Status _write_bloom_filter_index();
Status _write_short_key_index();
+ Status _write_delete_index();
Status _write_footer();
Status _write_raw_data(const std::vector<Slice>& slices);
@@ -88,6 +90,7 @@ private:
SegmentFooterPB _footer;
std::unique_ptr<ShortKeyIndexBuilder> _index_builder;
+ std::unique_ptr<DeleteBitmapIndexBuilder> _delete_bitmap_builder;
std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
uint32_t _row_count = 0;
};
diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt
index 76e0569..2e500af 100644
--- a/be/test/olap/CMakeLists.txt
+++ b/be/test/olap/CMakeLists.txt
@@ -37,6 +37,7 @@ ADD_BE_TEST(in_list_predicate_test)
ADD_BE_TEST(null_predicate_test)
ADD_BE_TEST(file_helper_test)
ADD_BE_TEST(file_utils_test)
+ADD_BE_TEST(delete_bitmap_index_test)
ADD_BE_TEST(delete_handler_test)
ADD_BE_TEST(column_reader_test)
ADD_BE_TEST(schema_change_test)
diff --git a/be/test/olap/delete_bitmap_index_test.cpp
b/be/test/olap/delete_bitmap_index_test.cpp
new file mode 100644
index 0000000..ffc9386
--- /dev/null
+++ b/be/test/olap/delete_bitmap_index_test.cpp
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/delete_bitmap_index.h"
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema_helper.h"
+#include "olap/row_cursor.h"
+#include "util/debug_util.h"
+
+namespace doris {
+
+class DeleteBitmapIndexTest : public testing::Test {
+public:
+ DeleteBitmapIndexTest() { }
+ virtual ~DeleteBitmapIndexTest() {
+ }
+};
+
+TEST_F(DeleteBitmapIndexTest, buider) {
+ DeleteBitmapIndexBuilder builder;
+
+ int num_items = 0;
+ for (int i = 1000; i < 10000; i += 2) {
+ builder.add_delete_item(i);
+ num_items++;
+ }
+ std::vector<Slice> slices;
+ segment_v2::PageFooterPB footer;
+ auto st = builder.finalize(&slices, &footer);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(segment_v2::DELETE_INDEX_PAGE, footer.type());
+ ASSERT_EQ(num_items, footer.delete_index_page_footer().num_items());
+
+ std::string buf;
+ for (auto& slice : slices) {
+ buf.append(slice.data, slice.size);
+ }
+
+ DeleteBitmapIndexDecoder decoder;
+ st = decoder.parse(buf, footer.delete_index_page_footer());
+ ASSERT_TRUE(st.ok());
+
+ auto& bitmap = decoder.get_iterator().get_delete_bitmap();
+ {
+ ASSERT_TRUE(bitmap.contains(1002));
+ }
+ {
+ ASSERT_TRUE(!bitmap.contains(1003));
+ }
+ {
+ ASSERT_TRUE(!bitmap.contains(5003));
+ }
+ {
+ ASSERT_TRUE(bitmap.contains(5002));
+ }
+}
+}
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
diff --git a/docs/zh-CN/internal/doris_storage_optimization.md
b/docs/zh-CN/internal/doris_storage_optimization.md
index 3d65388..a2a49f5 100644
--- a/docs/zh-CN/internal/doris_storage_optimization.md
+++ b/docs/zh-CN/internal/doris_storage_optimization.md
@@ -163,17 +163,18 @@ message ColumnMetaPB {
repeated MetadataPairPB column_meta_datas;
}
-message FileFooterPB {
+message SegmentFooterPB {
optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
repeated ColumnPB schema = 5; // 列Schema
- optional uint64 num_values = 4; // 文件中保存的行数
- optional uint64 index_footprint = 7; // 索引大小
- optional uint64 data_footprint = 8; // 数据大小
+ optional uint64 num_values = 4; // 文件中保存的行数
+ optional uint64 index_footprint = 7; // 索引大小
+ optional uint64 data_footprint = 8; // 数据大小
optional uint64 raw_data_footprint = 8; // 原始数据大小
- optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
- repeated ColumnMetaPB column_metas = 10; // 列元数据
+ optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
+ repeated ColumnMetaPB column_metas = 10; // 列元数据
optional PagePointerPB key_index_page; // short key索引page
+ optional PagePointerPB delete_index_page; // 删除索引page
}
```
diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto
index 93c3f19..9ed7c2d 100644
--- a/gensrc/proto/segment_v2.proto
+++ b/gensrc/proto/segment_v2.proto
@@ -59,6 +59,7 @@ enum PageTypePB {
INDEX_PAGE = 2;
DICTIONARY_PAGE = 3;
SHORT_KEY_PAGE = 4;
+ DELETE_INDEX_PAGE = 5;
}
message DataPageFooterPB {
@@ -106,6 +107,13 @@ message ShortKeyFooterPB {
optional uint32 num_segment_rows = 6;
}
+message DeleteIndexFooterPB {
+ // How many index item in this index.
+ optional uint32 num_items = 1;
+ // The total bytes occupied by the delete index
+ optional uint32 content_bytes = 2;
+}
+
message PageFooterPB {
// required: indicates which of the *_footer fields is set
optional PageTypePB type = 1;
@@ -120,6 +128,8 @@ message PageFooterPB {
optional DictPageFooterPB dict_page_footer = 9;
// present only when type == SHORT_KEY_PAGE
optional ShortKeyFooterPB short_key_page_footer = 10;
+ // present only when type == DELETE_INDEX_PAGE
+ optional DeleteIndexFooterPB delete_index_page_footer = 11;
}
message ZoneMapPB {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]