This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 691cb0e4436 [Fix](inverted index) fix data size when drop inverted
index (#30327)
691cb0e4436 is described below
commit 691cb0e4436c16e800e0cb8cc4e7359cdf1dc54c
Author: airborne12 <[email protected]>
AuthorDate: Thu Jan 25 10:15:25 2024 +0800
[Fix](inverted index) fix data size when drop inverted index (#30327)
---
be/src/olap/rowset/beta_rowset.cpp | 20 +++++++++++++
be/src/olap/rowset/beta_rowset.h | 1 +
be/src/olap/tablet_schema.cpp | 12 ++++++++
be/src/olap/tablet_schema.h | 2 ++
be/src/olap/task/index_builder.cpp | 34 +++++++++++++++++++---
.../suites/inverted_index_p0/test_show_data.groovy | 14 +++++++--
6 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 44b88296fbd..057e3411f4f 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -92,6 +92,26 @@ Status BetaRowset::do_load(bool /*use_cache*/) {
return Status::OK();
}
+Status BetaRowset::get_inverted_index_size_by_index_id(int64_t index_id,
size_t* index_size) {
+ auto fs = _rowset_meta->fs();
+ if (!fs || _schema == nullptr) {
+ return Status::Error<INIT_FAILED>("get fs failed");
+ }
+ for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
+ auto seg_path = segment_file_path(seg_id);
+ int64_t file_size = 0;
+ const auto* index =
_schema->get_inverted_index_with_index_id(index_id, "");
+ if (index == nullptr || index->index_type() != IndexType::INVERTED) {
+ continue;
+ }
+ std::string inverted_index_file_path =
InvertedIndexDescriptor::get_index_file_name(
+ seg_path, index_id, index->get_index_suffix());
+ RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, &file_size));
+ *index_size += file_size;
+ }
+ return Status::OK();
+}
+
Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) {
auto fs = _rowset_meta->fs();
if (!fs || _schema == nullptr) {
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index d404be13ea7..9d87eb6bf7d 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -94,6 +94,7 @@ public:
Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
Status get_segments_size(std::vector<size_t>* segments_size);
+ Status get_inverted_index_size_by_index_id(int64_t index_id, size_t*
index_size);
[[nodiscard]] virtual Status add_to_binlog() override;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 78e7e938caa..bd54af19603 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1221,6 +1221,18 @@ bool
TabletSchema::has_inverted_index_with_index_id(int32_t index_id,
return false;
}
+const TabletIndex* TabletSchema::get_inverted_index_with_index_id(
+ int32_t index_id, const std::string& suffix_name) const {
+ for (size_t i = 0; i < _indexes.size(); i++) {
+ if (_indexes[i].index_type() == IndexType::INVERTED &&
+ _indexes[i].get_index_suffix() == suffix_name &&
_indexes[i].index_id() == index_id) {
+ return &(_indexes[i]);
+ }
+ }
+
+ return nullptr;
+}
+
const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id,
const std::string&
suffix_path) const {
for (size_t i = 0; i < _indexes.size(); i++) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 21970b5cbac..613db2dcbae 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -304,6 +304,8 @@ public:
std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn&
col) const;
bool has_inverted_index(const TabletColumn& col) const;
bool has_inverted_index_with_index_id(int32_t index_id, const std::string&
suffix_path) const;
+ const TabletIndex* get_inverted_index_with_index_id(int32_t index_id,
+ const std::string&
suffix_name) const;
const TabletIndex* get_inverted_index(const TabletColumn& col) const;
const TabletIndex* get_inverted_index(int32_t col_unique_id,
const std::string& suffix_path)
const;
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index ca516cedbfe..84d61794b42 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -68,12 +68,38 @@ Status IndexBuilder::update_inverted_index_info() {
const auto& input_rs_tablet_schema = input_rowset->tablet_schema();
output_rs_tablet_schema->copy_from(*input_rs_tablet_schema);
if (_is_drop_op) {
- // base on input rowset's tablet_schema to build
- // output rowset's tablet_schema which only remove
- // the indexes specified in this drop index request
- for (auto t_inverted_index : _alter_inverted_indexes) {
+ size_t total_index_size = 0;
+ for (const auto& t_inverted_index : _alter_inverted_indexes) {
+ auto* beta_rowset =
reinterpret_cast<BetaRowset*>(input_rowset.get());
+ size_t index_size = 0;
+
RETURN_IF_ERROR(beta_rowset->get_inverted_index_size_by_index_id(
+ t_inverted_index.index_id, &index_size));
+ total_index_size += index_size;
output_rs_tablet_schema->remove_index(t_inverted_index.index_id);
}
+
+ auto input_rowset_meta = input_rowset->rowset_meta();
+ auto update_disk_size = [&](size_t& disk_size, const std::string&
size_type) {
+ if (disk_size >= total_index_size) {
+ disk_size -= total_index_size;
+ } else {
+ LOG(WARNING) << "rowset " <<
input_rowset_meta->rowset_id() << " " << size_type
+ << " size:" << disk_size
+ << " is less than index size:" <<
total_index_size;
+ }
+ };
+
+ size_t before_size = input_rowset_meta->total_disk_size();
+ update_disk_size(before_size, "total disk");
+ input_rowset_meta->set_total_disk_size(before_size);
+
+ before_size = input_rowset_meta->data_disk_size();
+ update_disk_size(before_size, "data disk");
+ input_rowset_meta->set_data_disk_size(before_size);
+
+ before_size = input_rowset_meta->index_disk_size();
+ update_disk_size(before_size, "index");
+ input_rowset_meta->set_index_disk_size(before_size);
} else {
// base on input rowset's tablet_schema to build
// output rowset's tablet_schema which only add
diff --git a/regression-test/suites/inverted_index_p0/test_show_data.groovy
b/regression-test/suites/inverted_index_p0/test_show_data.groovy
index 7f9b43498ee..e59624be801 100644
--- a/regression-test/suites/inverted_index_p0/test_show_data.groovy
+++ b/regression-test/suites/inverted_index_p0/test_show_data.groovy
@@ -103,7 +103,7 @@ suite("test_show_data", "p0") {
if (result.size() > 0) {
logger.info(table_name + " show data, detail: " +
result[0].toString())
def size = result[0][2].replace(" KB", "").toDouble()
- if (size > origin_size) {
+ if (size != origin_size) {
return size
}
}
@@ -169,6 +169,11 @@ suite("test_show_data", "p0") {
def with_index_size = wait_for_show_data_finish(testTableWithoutIndex,
300000, no_index_size)
assertTrue(with_index_size != "wait_timeout")
+ sql """ ALTER TABLE ${testTableWithoutIndex} DROP INDEX idx_request """
+ wait_for_latest_op_on_table_finish(testTableWithoutIndex, timeout)
+ def another_no_index_size =
wait_for_show_data_finish(testTableWithoutIndex, 300000, with_index_size)
+ assertEquals(another_no_index_size, no_index_size)
+
sql "DROP TABLE IF EXISTS ${testTableWithIndex}"
create_httplogs_table_with_index.call(testTableWithIndex)
load_httplogs_data.call(testTableWithIndex,
'test_httplogs_load_with_index', 'true', 'json', 'documents-1000.json')
@@ -267,7 +272,7 @@ suite("test_show_data_for_bkd", "p0") {
if (result.size() > 0) {
logger.info(table_name + " show data, detail: " +
result[0].toString())
def size = result[0][2].replace(" KB", "").toDouble()
- if (size > origin_size) {
+ if (size != origin_size) {
return size
}
}
@@ -333,6 +338,11 @@ suite("test_show_data_for_bkd", "p0") {
def with_index_size =
wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, no_index_size)
assertTrue(with_index_size != "wait_timeout")
+ sql """ ALTER TABLE ${testTableWithoutBKDIndex} DROP INDEX idx_status
"""
+ wait_for_latest_op_on_table_finish(testTableWithoutBKDIndex, timeout)
+ def another_no_index_size =
wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, with_index_size)
+ assertEquals(another_no_index_size, no_index_size)
+
sql "DROP TABLE IF EXISTS ${testTableWithBKDIndex}"
create_httplogs_table_with_bkd_index.call(testTableWithBKDIndex)
load_httplogs_data.call(testTableWithBKDIndex,
'test_httplogs_load_with_bkd_index', 'true', 'json', 'documents-1000.json')
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]