This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 5541fd11e91 [branch-2.1](partial update)add logs for partial update
(#35416)
5541fd11e91 is described below
commit 5541fd11e91a3a5708a58d8991e7cb42f6e1f11f
Author: zhannngchen <[email protected]>
AuthorDate: Tue Jun 4 22:47:48 2024 +0800
[branch-2.1](partial update)add logs for partial update (#35416)
add logs for partial update
the master PR is #35802
If this is a relatively large or complex change, kick off the discussion
at [[email protected]](mailto:[email protected]) by explaining why
you chose the solution you did and what alternatives you considered,
etc...
---
be/src/olap/compaction.cpp | 27 +++++++++----
be/src/olap/rowset/beta_rowset_writer.h | 4 ++
be/src/olap/rowset/beta_rowset_writer_v2.h | 4 ++
be/src/olap/rowset/rowset_writer.h | 3 ++
be/src/olap/rowset/segment_creator.cpp | 6 +++
be/src/olap/rowset/segment_creator.h | 11 ++++++
be/src/olap/rowset/segment_v2/segment_writer.cpp | 12 +++++-
be/src/olap/rowset/segment_v2/segment_writer.h | 11 ++++++
.../rowset/segment_v2/vertical_segment_writer.cpp | 12 +++++-
.../rowset/segment_v2/vertical_segment_writer.h | 11 ++++++
be/src/olap/rowset_builder.cpp | 11 ++++++
be/src/olap/schema_change.cpp | 2 +-
be/src/olap/tablet.cpp | 44 +++++++++++++++++-----
be/src/olap/tablet_meta.cpp | 8 ++++
be/src/olap/tablet_meta.h | 5 +++
15 files changed, 151 insertions(+), 20 deletions(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 38f4c998201..05c4cbcb56a 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -912,13 +912,26 @@ Status Compaction::modify_rowsets(const
Merger::Statistics* stats) {
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&&
_tablet->tablet_state() == TABLET_RUNNING && stats != nullptr
&&
stats->merged_rows != missed_rows_size) {
- std::string err_msg = fmt::format(
- "cumulative compaction: the merged rows({}) is not
equal to missed "
- "rows({}) in rowid conversion, tablet_id: {},
table_id:{}",
- stats->merged_rows, missed_rows_size,
_tablet->tablet_id(),
- _tablet->table_id());
- DCHECK(false) << err_msg;
- LOG(WARNING) << err_msg;
+ std::stringstream ss;
+ ss << "cumulative compaction: the merged rows(" <<
stats->merged_rows
+ << ") is not equal to missed rows(" << missed_rows_size
+ << ") in rowid conversion, tablet_id: " <<
_tablet->tablet_id()
+ << ", table_id:" << _tablet->table_id();
+ if (missed_rows_size == 0) {
+ ss << ", debug info: ";
+ DeleteBitmap subset_map(_tablet->tablet_id());
+ for (auto rs : _input_rowsets) {
+ _tablet->tablet_meta()->delete_bitmap().subset(
+ {rs->rowset_id(), 0, 0},
+ {rs->rowset_id(), rs->num_segments(),
version.second + 1},
+ &subset_map);
+ ss << "(rowset id: " << rs->rowset_id()
+ << ", delete bitmap cardinality: " <<
subset_map.cardinality() << ")";
+ }
+ ss << ", version[0-" << version.second + 1 << "]";
+ }
+ DCHECK(false) << ss.str();
+ LOG(WARNING) << ss.str();
}
}
diff --git a/be/src/olap/rowset/beta_rowset_writer.h
b/be/src/olap/rowset/beta_rowset_writer.h
index d67084d1b3a..f169ce055fb 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -95,6 +95,10 @@ public:
int64_t num_rows() const override { return
_segment_creator.num_rows_written(); }
+ // for partial update
+ int64_t num_rows_updated() const override { return
_segment_creator.num_rows_updated(); }
+ int64_t num_rows_deleted() const override { return
_segment_creator.num_rows_deleted(); }
+ int64_t num_rows_new_added() const override { return
_segment_creator.num_rows_new_added(); }
int64_t num_rows_filtered() const override { return
_segment_creator.num_rows_filtered(); }
RowsetId rowset_id() override { return _context.rowset_id; }
diff --git a/be/src/olap/rowset/beta_rowset_writer_v2.h
b/be/src/olap/rowset/beta_rowset_writer_v2.h
index bdcd8a47a98..4b0ab950de4 100644
--- a/be/src/olap/rowset/beta_rowset_writer_v2.h
+++ b/be/src/olap/rowset/beta_rowset_writer_v2.h
@@ -110,6 +110,10 @@ public:
int64_t num_rows() const override { return
_segment_creator.num_rows_written(); }
+ // for partial update
+ int64_t num_rows_updated() const override { return
_segment_creator.num_rows_updated(); }
+ int64_t num_rows_deleted() const override { return
_segment_creator.num_rows_deleted(); }
+ int64_t num_rows_new_added() const override { return
_segment_creator.num_rows_new_added(); }
int64_t num_rows_filtered() const override { return
_segment_creator.num_rows_filtered(); }
RowsetId rowset_id() override { return _context.rowset_id; }
diff --git a/be/src/olap/rowset/rowset_writer.h
b/be/src/olap/rowset/rowset_writer.h
index d7ec494f0d6..40e27b43a56 100644
--- a/be/src/olap/rowset/rowset_writer.h
+++ b/be/src/olap/rowset/rowset_writer.h
@@ -134,6 +134,9 @@ public:
virtual int64_t num_rows() const = 0;
+ virtual int64_t num_rows_updated() const = 0;
+ virtual int64_t num_rows_deleted() const = 0;
+ virtual int64_t num_rows_new_added() const = 0;
virtual int64_t num_rows_filtered() const = 0;
virtual RowsetId rowset_id() = 0;
diff --git a/be/src/olap/rowset/segment_creator.cpp
b/be/src/olap/rowset/segment_creator.cpp
index f7b364a9fd1..126a6548be5 100644
--- a/be/src/olap/rowset/segment_creator.cpp
+++ b/be/src/olap/rowset/segment_creator.cpp
@@ -259,6 +259,9 @@ Status SegmentFlusher::_flush_segment_writer(
std::unique_ptr<segment_v2::VerticalSegmentWriter>& writer,
TabletSchemaSPtr flush_schema,
int64_t* flush_size) {
uint32_t row_num = writer->num_rows_written();
+ _num_rows_updated += writer->num_rows_updated();
+ _num_rows_deleted += writer->num_rows_deleted();
+ _num_rows_new_added += writer->num_rows_new_added();
_num_rows_filtered += writer->num_rows_filtered();
if (row_num == 0) {
@@ -301,6 +304,9 @@ Status SegmentFlusher::_flush_segment_writer(
Status
SegmentFlusher::_flush_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>&
writer,
TabletSchemaSPtr flush_schema,
int64_t* flush_size) {
uint32_t row_num = writer->num_rows_written();
+ _num_rows_updated += writer->num_rows_updated();
+ _num_rows_deleted += writer->num_rows_deleted();
+ _num_rows_new_added += writer->num_rows_new_added();
_num_rows_filtered += writer->num_rows_filtered();
if (row_num == 0) {
diff --git a/be/src/olap/rowset/segment_creator.h
b/be/src/olap/rowset/segment_creator.h
index fe439d3bc7a..214322ed8d5 100644
--- a/be/src/olap/rowset/segment_creator.h
+++ b/be/src/olap/rowset/segment_creator.h
@@ -101,6 +101,10 @@ public:
int64_t num_rows_written() const { return _num_rows_written; }
+ // for partial update
+ int64_t num_rows_updated() const { return _num_rows_updated; }
+ int64_t num_rows_deleted() const { return _num_rows_deleted; }
+ int64_t num_rows_new_added() const { return _num_rows_new_added; }
int64_t num_rows_filtered() const { return _num_rows_filtered; }
io::FileWriter* get_file_writer(int32_t segment_id);
@@ -160,6 +164,9 @@ private:
// written rows by add_block/add_row
std::atomic<int64_t> _num_rows_written = 0;
+ std::atomic<int64_t> _num_rows_updated = 0;
+ std::atomic<int64_t> _num_rows_new_added = 0;
+ std::atomic<int64_t> _num_rows_deleted = 0;
std::atomic<int64_t> _num_rows_filtered = 0;
};
@@ -183,6 +190,10 @@ public:
int64_t num_rows_written() const { return
_segment_flusher.num_rows_written(); }
+ // for partial update
+ int64_t num_rows_updated() const { return
_segment_flusher.num_rows_updated(); }
+ int64_t num_rows_deleted() const { return
_segment_flusher.num_rows_deleted(); }
+ int64_t num_rows_new_added() const { return
_segment_flusher.num_rows_new_added(); }
int64_t num_rows_filtered() const { return
_segment_flusher.num_rows_filtered(); }
// Flush a block into a single segment, with pre-allocated segment_id.
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index ec3bb9c993e..33f4e863824 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -448,8 +448,11 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
}
}
std::vector<std::unique_ptr<SegmentCacheHandle>>
segment_caches(specified_rowsets.size());
- // locate rows in base data
+ // locate rows in base data
+ int64_t num_rows_updated = 0;
+ int64_t num_rows_new_added = 0;
+ int64_t num_rows_deleted = 0;
int64_t num_rows_filtered = 0;
for (size_t block_pos = row_pos; block_pos < row_pos + num_rows;
block_pos++) {
// block segment
@@ -507,6 +510,7 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
error_column);
}
}
+ ++num_rows_new_added;
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
continue;
@@ -537,9 +541,11 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
_mow_context->delete_bitmap->add(
{_opts.rowset_ctx->rowset_id, _segment_id,
DeleteBitmap::TEMP_VERSION_COMMON},
segment_pos);
+ ++num_rows_deleted;
} else {
_mow_context->delete_bitmap->add(
{loc.rowset_id, loc.segment_id,
DeleteBitmap::TEMP_VERSION_COMMON}, loc.row_id);
+ ++num_rows_updated;
}
}
CHECK_EQ(use_default_or_null_flag.size(), num_rows);
@@ -554,6 +560,7 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns,
use_default_or_null_flag,
has_default_or_nullable,
segment_start_pos, block));
full_block.set_columns(std::move(mutable_full_columns));
+
// row column should be filled here
if (_tablet_schema->store_row_column()) {
// convert block to row store format
@@ -578,6 +585,9 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
num_rows));
}
+ _num_rows_updated += num_rows_updated;
+ _num_rows_deleted += num_rows_deleted;
+ _num_rows_new_added += num_rows_new_added;
_num_rows_filtered += num_rows_filtered;
if (_tablet_schema->has_sequence_col() && !have_input_seq_column) {
DCHECK_NE(seq_column, nullptr);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 2f26d6158ee..8f9e09ed01a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -106,7 +106,13 @@ public:
size_t get_inverted_index_file_size() const { return
_inverted_index_file_size; }
uint32_t num_rows_written() const { return _num_rows_written; }
+
+ // for partial update
+ int64_t num_rows_updated() const { return _num_rows_updated; }
+ int64_t num_rows_deleted() const { return _num_rows_deleted; }
+ int64_t num_rows_new_added() const { return _num_rows_new_added; }
int64_t num_rows_filtered() const { return _num_rows_filtered; }
+
uint32_t row_count() const { return _row_count; }
Status finalize(uint64_t* segment_file_size, uint64_t* index_size);
@@ -211,6 +217,11 @@ private:
bool _has_key = true;
// _num_rows_written means row count already written in this current
column group
uint32_t _num_rows_written = 0;
+
+ /** for partial update stats **/
+ int64_t _num_rows_updated = 0;
+ int64_t _num_rows_new_added = 0;
+ int64_t _num_rows_deleted = 0;
// number of rows filtered in strict mode partial update
int64_t _num_rows_filtered = 0;
// _row_count means total row count of this segment
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 48b892afc38..26bf6f6ca2e 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -385,8 +385,11 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
}
}
std::vector<std::unique_ptr<SegmentCacheHandle>>
segment_caches(specified_rowsets.size());
- // locate rows in base data
+ // locate rows in base data
+ int64_t num_rows_updated = 0;
+ int64_t num_rows_new_added = 0;
+ int64_t num_rows_deleted = 0;
int64_t num_rows_filtered = 0;
for (size_t block_pos = data.row_pos; block_pos < data.row_pos +
data.num_rows; block_pos++) {
// block segment
@@ -443,6 +446,7 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
error_column);
}
}
+ ++num_rows_new_added;
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
continue;
@@ -473,9 +477,11 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
_mow_context->delete_bitmap->add(
{_opts.rowset_ctx->rowset_id, _segment_id,
DeleteBitmap::TEMP_VERSION_COMMON},
segment_pos);
+ ++num_rows_deleted;
} else {
_mow_context->delete_bitmap->add(
{loc.rowset_id, loc.segment_id,
DeleteBitmap::TEMP_VERSION_COMMON}, loc.row_id);
+ ++num_rows_updated;
}
}
CHECK_EQ(use_default_or_null_flag.size(), data.num_rows);
@@ -489,6 +495,7 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
auto mutable_full_columns = full_block.mutate_columns();
RETURN_IF_ERROR(_fill_missing_columns(mutable_full_columns,
use_default_or_null_flag,
has_default_or_nullable,
segment_start_pos, data.block));
+
// row column should be filled here
if (_tablet_schema->store_row_column()) {
// convert block to row store format
@@ -513,6 +520,9 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
data.num_rows));
}
+ _num_rows_updated += num_rows_updated;
+ _num_rows_deleted += num_rows_deleted;
+ _num_rows_new_added += num_rows_new_added;
_num_rows_filtered += num_rows_filtered;
if (_tablet_schema->has_sequence_col() && !have_input_seq_column) {
DCHECK_NE(seq_column, nullptr);
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
index 02e7170ff51..ffa5f3807ae 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
@@ -100,6 +100,11 @@ public:
}
[[nodiscard]] size_t inverted_index_file_size() const { return
_inverted_index_file_size; }
[[nodiscard]] uint32_t num_rows_written() const { return
_num_rows_written; }
+
+ // for partial update
+ [[nodiscard]] int64_t num_rows_updated() const { return _num_rows_updated;
}
+ [[nodiscard]] int64_t num_rows_deleted() const { return _num_rows_deleted;
}
+ [[nodiscard]] int64_t num_rows_new_added() const { return
_num_rows_new_added; }
[[nodiscard]] int64_t num_rows_filtered() const { return
_num_rows_filtered; }
[[nodiscard]] uint32_t row_count() const { return _row_count; }
[[nodiscard]] uint32_t segment_id() const { return _segment_id; }
@@ -176,8 +181,14 @@ private:
// _num_rows_written means row count already written in this current
column group
uint32_t _num_rows_written = 0;
+
+ /** for partial update stats **/
+ int64_t _num_rows_updated = 0;
+ int64_t _num_rows_new_added = 0;
+ int64_t _num_rows_deleted = 0;
// number of rows filtered in strict mode partial update
int64_t _num_rows_filtered = 0;
+
// _row_count means total row count of this segment
// In vertical compaction row count is recorded when key columns group
finish
// and _num_rows_written will be updated in value column group
diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index 7ff06b39eb0..23232c4d0a5 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -255,6 +255,17 @@ Status RowsetBuilder::submit_calc_delete_bitmap_task() {
// of the delete bitmap. This operation is resource-intensive, and we need
to minimize
// the number of times it occurs. Therefore, we skip this operation here.
if (_partial_update_info->is_partial_update) {
+ // for partial update, the delete bitmap calculation is done while
append_block()
+ // we print it's summarize logs here before commit.
+ LOG(INFO) << fmt::format(
+ "partial update calc delete bitmap summary before commit:
tablet({}), txn_id({}), "
+ "rowset_ids({}), cur max_version({}), bitmap num({}), num rows
updated({}), num "
+ "rows new added({}), num rows deleted({}), total rows({})",
+ tablet()->tablet_id(), _req.txn_id, _rowset_ids.size(),
+ rowset_writer()->context().mow_context->max_version,
+ _delete_bitmap->delete_bitmap.size(),
rowset_writer()->num_rows_updated(),
+ rowset_writer()->num_rows_new_added(),
rowset_writer()->num_rows_deleted(),
+ rowset_writer()->num_rows());
return Status::OK();
}
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 02f60a993a1..e7ef0464ffa 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -989,7 +989,7 @@ Status
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
// if failed convert history data, then just remove the new tablet
if (!res) {
LOG(WARNING) << "failed to alter tablet. base_tablet=" <<
base_tablet->tablet_id()
- << ", drop new_tablet=" << new_tablet->tablet_id();
+ << ", drop new_tablet=" << new_tablet->tablet_id() << res;
// do not drop the new tablet and its data. GC thread will
}
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index e5cdc2b5d96..85706668100 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2904,6 +2904,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr
rowset,
// use for partial update
PartialUpdateReadPlan read_plan_ori;
PartialUpdateReadPlan read_plan_update;
+ int64_t conflict_rows = 0;
+ int64_t new_generated_rows = 0;
std::map<RowsetId, RowsetSharedPtr> rsid_to_rowset;
rsid_to_rowset[rowset_id] = rowset;
@@ -3009,6 +3011,7 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr
rowset,
// of the including columns in the current row into a
new row.
delete_bitmap->add({rowset_id, seg->id(),
DeleteBitmap::TEMP_VERSION_COMMON},
row_id);
+ ++conflict_rows;
continue;
}
if (is_partial_update && rowset_writer != nullptr) {
@@ -3036,11 +3039,14 @@ Status
Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
loc.row_id);
delete_bitmap->add({rowset_id, seg->id(),
DeleteBitmap::TEMP_VERSION_COMMON},
row_id);
+ ++conflict_rows;
+ ++new_generated_rows;
continue;
}
// when st = ok
delete_bitmap->add({loc.rowset_id, loc.segment_id,
DeleteBitmap::TEMP_VERSION_COMMON},
loc.row_id);
+ ++conflict_rows;
}
remaining -= num_read;
}
@@ -3065,10 +3071,23 @@ Status
Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
read_plan_ori, read_plan_update, rsid_to_rowset, &block));
RETURN_IF_ERROR(sort_block(block, ordered_block));
RETURN_IF_ERROR(rowset_writer->flush_single_block(&ordered_block));
+ if (new_generated_rows != rowset_writer->num_rows()) {
+ LOG(WARNING) << "partial update correctness warning: conflict new
generated rows ("
+ << new_generated_rows << ") not equal to the new
flushed rows ("
+ << rowset_writer->num_rows() << "), tablet: " <<
tablet_id();
+ }
+ LOG(INFO) << "calc segment delete bitmap for partial update, tablet: "
<< tablet_id()
+ << " rowset: " << rowset_id << " seg_id: " << seg->id()
+ << " dummy_version: " << end_version + 1 << " rows: " <<
seg->num_rows()
+ << " conflict rows: " << conflict_rows
+ << " new generated rows: " << new_generated_rows
+ << " bimap num: " << delete_bitmap->delete_bitmap.size()
+ << " cost: " << watch.get_elapse_time_us() << "(us)";
+ return Status::OK();
}
LOG(INFO) << "calc segment delete bitmap, tablet: " << tablet_id() << "
rowset: " << rowset_id
<< " seg_id: " << seg->id() << " dummy_version: " << end_version
+ 1
- << " rows: " << seg->num_rows()
+ << " rows: " << seg->num_rows() << " conflict rows: " <<
conflict_rows
<< " bimap num: " << delete_bitmap->delete_bitmap.size()
<< " cost: " << watch.get_elapse_time_us() << "(us)";
return Status::OK();
@@ -3434,15 +3453,6 @@ Status Tablet::update_delete_bitmap(TabletTxnInfo*
txn_info, int64_t txn_id) {
<< ", calc delete bitmap: " << watch.get_elapse_time_us() - t3 <<
")";
}
- size_t total_rows = std::accumulate(
- segments.begin(), segments.end(), 0,
- [](size_t sum, const segment_v2::SegmentSharedPtr& s) { return sum
+= s->num_rows(); });
- LOG(INFO) << "[Publish] construct delete bitmap tablet: " << tablet_id()
- << ", rowset_ids to add: " << rowset_ids_to_add.size()
- << ", rowset_ids to del: " << rowset_ids_to_del.size()
- << ", cur version: " << cur_version << ", transaction_id: " <<
txn_id << ","
- << ss.str() << " , total rows: " << total_rows;
-
if (config::enable_merge_on_write_correctness_check && rowset->num_rows()
!= 0) {
// only do correctness check if the rowset has at least one row written
// check if all the rowset has ROWSET_SENTINEL_MARK
@@ -3467,7 +3477,12 @@ Status Tablet::update_delete_bitmap(TabletTxnInfo*
txn_info, int64_t txn_id) {
RETURN_IF_ERROR(rowset_writer->flush());
RowsetSharedPtr transient_rowset;
RETURN_IF_ERROR(rowset_writer->build(transient_rowset));
+ auto old_segments = rowset->num_segments();
rowset->merge_rowset_meta(transient_rowset->rowset_meta());
+ auto new_segments = rowset->num_segments();
+ ss << ", partial update flush rowset (old segment num: " <<
old_segments
+ << ", new segment num: " << new_segments << ")";
+
// update the shared_ptr to new delete bitmap
txn_info->delete_bitmap = delete_bitmap;
@@ -3475,6 +3490,15 @@ Status Tablet::update_delete_bitmap(TabletTxnInfo*
txn_info, int64_t txn_id) {
SegmentLoader::instance()->erase_segments(rowset->rowset_id(),
rowset->num_segments());
}
+ size_t total_rows = std::accumulate(
+ segments.begin(), segments.end(), 0,
+ [](size_t sum, const segment_v2::SegmentSharedPtr& s) { return sum
+= s->num_rows(); });
+ LOG(INFO) << "[Publish] construct delete bitmap tablet: " << tablet_id()
+ << ", rowset_ids to add: " << rowset_ids_to_add.size()
+ << ", rowset_ids to del: " << rowset_ids_to_del.size()
+ << ", cur version: " << cur_version << ", transaction_id: " <<
txn_id << ","
+ << ss.str() << " , total rows: " << total_rows;
+
// update version without write lock, compaction and publish_txn
// will update delete bitmap, handle compaction with _rowset_update_lock
// and publish_txn runs sequential so no need to lock here
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index f8a3bb9e97a..1b242499187 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -1020,6 +1020,14 @@ bool DeleteBitmap::empty() const {
return delete_bitmap.empty();
}
+uint64_t DeleteBitmap::cardinality() const {
+ uint64_t res = 0;
+ for (auto entry : delete_bitmap) {
+ res += entry.second.cardinality();
+ }
+ return res;
+}
+
bool DeleteBitmap::contains_agg_without_cache(const BitmapKey& bmk, uint32_t
row_id) const {
std::shared_lock l(lock);
DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0};
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index f113e22ed07..460851bc772 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -420,6 +420,11 @@ public:
*/
bool empty() const;
+ /**
+ * return the total cardinality of the Delete Bitmap
+ */
+ uint64_t cardinality() const;
+
/**
* Sets the bitmap of specific segment, it's may be insertion or
replacement
*
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]