This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d1ebe957c18 branch-4.0: [fix](compaction) Check schema version before
ordered data compaction #59570 (#59620)
d1ebe957c18 is described below
commit d1ebe957c184df42c159edebbd82651bcf2c13f4
Author: zhiqiang <[email protected]>
AuthorDate: Thu Jan 8 09:43:40 2026 +0800
branch-4.0: [fix](compaction) Check schema version before ordered data
compaction #59570 (#59620)
cherry pick from #59570
---
be/src/olap/compaction.cpp | 23 +++++++++++++++++++++++
be/src/olap/rowset/rowset_meta.h | 2 ++
2 files changed, 25 insertions(+)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 543fad1f3dd..4ce66300129 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -47,6 +47,7 @@
#include "io/fs/file_writer.h"
#include "io/fs/remote_file_system.h"
#include "io/io_common.h"
+#include "olap/collection_statistics.h"
#include "olap/cumulative_compaction.h"
#include "olap/cumulative_compaction_policy.h"
#include "olap/cumulative_compaction_time_series_policy.h"
@@ -423,6 +424,28 @@ bool CompactionMixin::handle_ordered_data_compaction() {
if (!config::enable_ordered_data_compaction) {
return false;
}
+
+ // If some rowsets has idx files and some rowsets has not, we can not do
link file compaction.
+ // Since the output rowset will be broken.
+
+ // Use schema version instead of schema hash to check if they are the same,
+ // because light schema change will not change the schema hash on BE, but
will increase the schema version
+ // See
fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java::2979
+ std::vector<int32_t> schema_versions_of_rowsets;
+
+ for (auto input_rowset : _input_rowsets) {
+
schema_versions_of_rowsets.push_back(input_rowset->rowset_meta()->schema_version());
+ }
+
+ // If all rowsets has same schema version, then we can do link file
compaction directly.
+ bool all_same_schema_version =
+ std::all_of(schema_versions_of_rowsets.begin(),
schema_versions_of_rowsets.end(),
+ [&](int32_t v) { return v ==
schema_versions_of_rowsets.front(); });
+
+ if (!all_same_schema_version) {
+ return false;
+ }
+
if (compaction_type() == ReaderType::READER_COLD_DATA_COMPACTION ||
compaction_type() == ReaderType::READER_FULL_COMPACTION) {
// The remote file system and full compaction does not support to link
files.
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 4cd346ce414..c0dc1fb8c67 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -424,6 +424,8 @@ public:
RowsetMeta(const RowsetMeta&) = delete;
RowsetMeta operator=(const RowsetMeta&) = delete;
+ int32_t schema_version() const { return _rowset_meta_pb.schema_version(); }
+
private:
bool _deserialize_from_pb(std::string_view value);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]