This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7d7e3a3 [refactor] Remove snapshot converter and unused Protobuf
Definitions (#8026)
7d7e3a3 is described below
commit 7d7e3a39f56baafa76266d8dc3c26226a5ce2709
Author: yiguolei <[email protected]>
AuthorDate: Sat Feb 12 16:06:04 2022 +0800
[refactor] Remove snapshot converter and unused Protobuf Definitions (#8026)
1. remove snapshot converter
2. remove unused protobuf definitions
3. move some macro as const variables
---
be/src/olap/CMakeLists.txt | 1 -
be/src/olap/data_dir.cpp | 1 -
be/src/olap/olap_snapshot_converter.cpp | 306 --------------------------
be/src/olap/olap_snapshot_converter.h | 65 ------
be/src/olap/rowset/alpha_rowset.h | 2 -
be/src/olap/snapshot_manager.cpp | 1 -
be/src/olap/storage_engine.cpp | 1 -
be/src/olap/task/engine_clone_task.cpp | 1 -
be/src/vec/io/io_helper.h | 8 +-
be/test/olap/CMakeLists.txt | 1 -
be/test/olap/olap_snapshot_converter_test.cpp | 250 ---------------------
gensrc/proto/olap_file.proto | 90 --------
12 files changed, 5 insertions(+), 722 deletions(-)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 36f4515..ced9307 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -120,7 +120,6 @@ add_library(Olap STATIC
task/engine_storage_migration_task.cpp
task/engine_publish_version_task.cpp
task/engine_alter_tablet_task.cpp
- olap_snapshot_converter.cpp
column_vector.cpp
segment_loader.cpp
)
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index a3ddab2..5bb86df 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -35,7 +35,6 @@
#include "gutil/strings/substitute.h"
#include "olap/file_helper.h"
#include "olap/olap_define.h"
-#include "olap/olap_snapshot_converter.h"
#include "olap/rowset/alpha_rowset_meta.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/rowset_meta_manager.h"
diff --git a/be/src/olap/olap_snapshot_converter.cpp
b/be/src/olap/olap_snapshot_converter.cpp
deleted file mode 100644
index 334af43..0000000
--- a/be/src/olap/olap_snapshot_converter.cpp
+++ /dev/null
@@ -1,306 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/olap_snapshot_converter.h"
-
-#include "olap/rowset/alpha_rowset.h"
-#include "olap/rowset/alpha_rowset_meta.h"
-#include "olap/rowset/rowset_id_generator.h"
-#include "olap/storage_engine.h"
-
-namespace doris {
-
-OLAPStatus OlapSnapshotConverter::convert_to_pdelta(const RowsetMetaPB&
rowset_meta_pb,
- PDelta* delta) {
- if (!rowset_meta_pb.has_start_version()) {
- LOG(FATAL) << "rowset does not have start_version."
- << " rowset id = " << rowset_meta_pb.rowset_id();
- }
- delta->set_start_version(rowset_meta_pb.start_version());
- if (!rowset_meta_pb.has_end_version()) {
- LOG(FATAL) << "rowset does not have end_version."
- << " rowset id = " << rowset_meta_pb.rowset_id();
- }
- delta->set_end_version(rowset_meta_pb.end_version());
- if (!rowset_meta_pb.has_version_hash()) {
- LOG(FATAL) << "rowset does not have version_hash."
- << " rowset id = " << rowset_meta_pb.rowset_id();
- }
- delta->set_version_hash(rowset_meta_pb.version_hash());
- if (!rowset_meta_pb.has_creation_time()) {
- LOG(FATAL) << "rowset does not have creation_time."
- << " rowset id = " << rowset_meta_pb.rowset_id();
- }
- delta->set_creation_time(rowset_meta_pb.creation_time());
- AlphaRowsetExtraMetaPB extra_meta_pb =
rowset_meta_pb.alpha_rowset_extra_meta_pb();
-
- for (auto& segment_group : extra_meta_pb.segment_groups()) {
- SegmentGroupPB* new_segment_group = delta->add_segment_group();
- *new_segment_group = segment_group;
- }
- if (rowset_meta_pb.has_delete_predicate()) {
- DeletePredicatePB* delete_condition =
delta->mutable_delete_condition();
- *delete_condition = rowset_meta_pb.delete_predicate();
- }
- return OLAP_SUCCESS;
-}
-
-OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PDelta& delta,
- const RowsetId&
rowset_id,
- int64_t tablet_id,
int32_t schema_hash,
- RowsetMetaPB*
rowset_meta_pb) {
- rowset_meta_pb->set_rowset_id(0);
- rowset_meta_pb->set_rowset_id_v2(rowset_id.to_string());
- rowset_meta_pb->set_tablet_id(tablet_id);
- rowset_meta_pb->set_tablet_schema_hash(schema_hash);
- rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET);
- rowset_meta_pb->set_rowset_state(RowsetStatePB::VISIBLE);
- rowset_meta_pb->set_start_version(delta.start_version());
- rowset_meta_pb->set_end_version(delta.end_version());
- rowset_meta_pb->set_version_hash(delta.version_hash());
-
- bool empty = true;
- int64_t num_rows = 0;
- int64_t index_size = 0;
- int64_t data_size = 0;
- AlphaRowsetExtraMetaPB* extra_meta_pb =
rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb();
- for (auto& segment_group : delta.segment_group()) {
- SegmentGroupPB* new_segment_group =
extra_meta_pb->add_segment_groups();
- *new_segment_group = segment_group;
- // if segment group does not has empty property, then it is not empty
- // if segment group's empty == false, then it is not empty
- if (!segment_group.has_empty() || !segment_group.empty()) {
- empty = false;
- }
- num_rows += segment_group.num_rows();
- index_size += segment_group.index_size();
- data_size += segment_group.data_size();
- }
-
- rowset_meta_pb->set_empty(empty);
- rowset_meta_pb->set_num_rows(num_rows);
- rowset_meta_pb->set_data_disk_size(data_size);
- rowset_meta_pb->set_index_disk_size(index_size);
- rowset_meta_pb->set_total_disk_size(data_size + index_size);
- if (delta.has_delete_condition()) {
- DeletePredicatePB* delete_condition =
rowset_meta_pb->mutable_delete_predicate();
- *delete_condition = delta.delete_condition();
- }
- rowset_meta_pb->set_creation_time(delta.creation_time());
- LOG(INFO) << "convert visible delta start_version = " <<
delta.start_version()
- << " end_version = " << delta.end_version()
- << " version_hash = " << delta.version_hash() << " to rowset id
= " << rowset_id
- << " tablet_id = " << tablet_id;
- return OLAP_SUCCESS;
-}
-
-OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PPendingDelta&
pending_delta,
- const RowsetId&
rowset_id,
- int64_t tablet_id,
int32_t schema_hash,
- RowsetMetaPB*
rowset_meta_pb) {
- rowset_meta_pb->set_rowset_id(0);
- rowset_meta_pb->set_rowset_id_v2(rowset_id.to_string());
- rowset_meta_pb->set_tablet_id(tablet_id);
- rowset_meta_pb->set_tablet_schema_hash(schema_hash);
- rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET);
- rowset_meta_pb->set_rowset_state(RowsetStatePB::COMMITTED);
- rowset_meta_pb->set_partition_id(pending_delta.partition_id());
- rowset_meta_pb->set_txn_id(pending_delta.transaction_id());
- rowset_meta_pb->set_creation_time(pending_delta.creation_time());
-
- bool empty = true;
- int64_t num_rows = 0;
- int64_t index_size = 0;
- int64_t data_size = 0;
- AlphaRowsetExtraMetaPB* extra_meta_pb =
rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb();
- for (auto& pending_segment_group : pending_delta.pending_segment_group()) {
- SegmentGroupPB* new_segment_group =
extra_meta_pb->add_segment_groups();
-
new_segment_group->set_segment_group_id(pending_segment_group.pending_segment_group_id());
-
new_segment_group->set_num_segments(pending_segment_group.num_segments());
- new_segment_group->set_index_size(0);
- new_segment_group->set_data_size(0);
- new_segment_group->set_num_rows(0);
- for (auto& pending_zone_map : pending_segment_group.zone_maps()) {
- ZoneMap* zone_map = new_segment_group->add_zone_maps();
- *zone_map = pending_zone_map;
- }
- new_segment_group->set_empty(pending_segment_group.empty());
- PUniqueId* load_id = new_segment_group->mutable_load_id();
- *load_id = pending_segment_group.load_id();
-
- if (!pending_segment_group.empty()) {
- empty = false;
- }
- }
-
- rowset_meta_pb->set_empty(empty);
- rowset_meta_pb->set_num_rows(num_rows);
- rowset_meta_pb->set_data_disk_size(data_size);
- rowset_meta_pb->set_index_disk_size(index_size);
- rowset_meta_pb->set_total_disk_size(data_size + index_size);
- if (pending_delta.has_delete_condition()) {
- DeletePredicatePB* delete_condition =
rowset_meta_pb->mutable_delete_predicate();
- *delete_condition = pending_delta.delete_condition();
- }
- rowset_meta_pb->set_creation_time(pending_delta.creation_time());
- LOG(INFO) << "convert pending delta txn id = " <<
pending_delta.transaction_id()
- << " tablet_id = " << tablet_id << " schema_hash = " <<
schema_hash
- << " to rowset id = " << rowset_id;
- return OLAP_SUCCESS;
-}
-
-OLAPStatus OlapSnapshotConverter::to_column_pb(const ColumnMessage& column_msg,
- ColumnPB* column_pb) {
- if (column_msg.has_unique_id()) {
- column_pb->set_unique_id(column_msg.unique_id());
- }
- column_pb->set_name(column_msg.name());
- column_pb->set_type(column_msg.type());
- column_pb->set_is_key(column_msg.is_key());
- column_pb->set_aggregation(column_msg.aggregation());
- if (column_msg.has_is_allow_null()) {
- column_pb->set_is_nullable(column_msg.is_allow_null());
- }
- if (column_msg.has_default_value()) {
- column_pb->set_default_value(column_msg.default_value());
- }
- if (column_msg.has_precision()) {
- column_pb->set_precision(column_msg.precision());
- }
- if (column_msg.has_frac()) {
- column_pb->set_frac(column_msg.frac());
- }
- column_pb->set_length(column_msg.length());
- if (column_msg.has_index_length()) {
- column_pb->set_index_length(column_msg.index_length());
- }
- if (column_msg.has_is_bf_column()) {
- column_pb->set_is_bf_column(column_msg.is_bf_column());
- }
- if (column_msg.has_has_bitmap_index()) {
- column_pb->set_has_bitmap_index(column_msg.has_bitmap_index());
- }
- // TODO(ygl) calculate column id from column list
- // column_pb->set_referenced_column_id(column_msg.());
-
- if (column_msg.has_referenced_column()) {
- column_pb->set_referenced_column(column_msg.referenced_column());
- }
- return OLAP_SUCCESS;
-}
-
-OLAPStatus OlapSnapshotConverter::to_column_msg(const ColumnPB& column_pb,
- ColumnMessage* column_msg) {
- if (!column_pb.has_name()) {
- LOG(FATAL) << "column pb does not have name"
- << " column id " << column_pb.unique_id();
- }
- column_msg->set_name(column_pb.name());
- column_msg->set_type(column_pb.type());
- if (!column_pb.has_aggregation()) {
- LOG(FATAL) << "column pb does not have aggregation"
- << " column id " << column_pb.unique_id();
- }
- column_msg->set_aggregation(column_pb.aggregation());
- if (!column_pb.has_length()) {
- LOG(FATAL) << "column pb does not have length"
- << " column id " << column_pb.unique_id();
- }
- column_msg->set_length(column_pb.length());
- if (!column_pb.has_is_key()) {
- LOG(FATAL) << "column pb does not have is_key"
- << " column id " << column_pb.unique_id();
- }
- column_msg->set_is_key(column_pb.is_key());
- if (column_pb.has_default_value()) {
- column_msg->set_default_value(column_pb.default_value());
- }
- if (column_pb.has_referenced_column()) {
- column_msg->set_referenced_column(column_pb.referenced_column());
- }
- if (column_pb.has_index_length()) {
- column_msg->set_index_length(column_pb.index_length());
- }
- if (column_pb.has_precision()) {
- column_msg->set_precision(column_pb.precision());
- }
- if (column_pb.has_frac()) {
- column_msg->set_frac(column_pb.frac());
- }
- if (column_pb.has_is_nullable()) {
- column_msg->set_is_allow_null(column_pb.is_nullable());
- }
- column_msg->set_unique_id(column_pb.unique_id());
- if (column_pb.has_is_bf_column()) {
- column_msg->set_is_bf_column(column_pb.is_bf_column());
- }
- if (column_pb.has_has_bitmap_index()) {
- column_msg->set_has_bitmap_index(column_pb.has_bitmap_index());
- }
- column_msg->set_is_root_column(true);
- return OLAP_SUCCESS;
-}
-
-OLAPStatus OlapSnapshotConverter::save(const string& file_path,
- const OLAPHeaderMessage& olap_header) {
- DCHECK(!file_path.empty());
-
- FileHeader<OLAPHeaderMessage> file_header;
- FileHandler file_handler;
-
- if (file_handler.open_with_mode(file_path.c_str(), O_CREAT | O_WRONLY |
O_TRUNC,
- S_IRUSR | S_IWUSR) != OLAP_SUCCESS) {
- LOG(WARNING) << "fail to open header file. file='" << file_path;
- return OLAP_ERR_IO_ERROR;
- }
-
- try {
- file_header.mutable_message()->CopyFrom(olap_header);
- } catch (...) {
- LOG(WARNING) << "fail to copy protocol buffer object. file='" <<
file_path;
- return OLAP_ERR_OTHER_ERROR;
- }
-
- if (file_header.prepare(&file_handler) != OLAP_SUCCESS ||
- file_header.serialize(&file_handler) != OLAP_SUCCESS) {
- LOG(WARNING) << "fail to serialize to file header. file='" <<
file_path;
- return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR;
- }
-
- return OLAP_SUCCESS;
-}
-
-void OlapSnapshotConverter::_modify_old_segment_group_id(RowsetMetaPB&
rowset_meta) {
- if (!rowset_meta.has_alpha_rowset_extra_meta_pb()) {
- return;
- }
- AlphaRowsetExtraMetaPB* alpha_rowset_extra_meta_pb =
- rowset_meta.mutable_alpha_rowset_extra_meta_pb();
- for (auto& segment_group_pb :
alpha_rowset_extra_meta_pb->segment_groups()) {
- if (segment_group_pb.segment_group_id() == -1) {
- // check if segment groups size == 1
- if (alpha_rowset_extra_meta_pb->segment_groups().size() != 1) {
- LOG(FATAL) << "the rowset has a segment group's id == -1 but
it contains more than "
- "one segment group"
- << " it should not happen";
- }
-
(const_cast<SegmentGroupPB&>(segment_group_pb)).set_segment_group_id(0);
- }
- }
-}
-
-} // namespace doris
diff --git a/be/src/olap/olap_snapshot_converter.h
b/be/src/olap/olap_snapshot_converter.h
deleted file mode 100644
index 05a8233..0000000
--- a/be/src/olap/olap_snapshot_converter.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H
-#define DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H
-
-#include <functional>
-#include <map>
-#include <string>
-
-#include "gen_cpp/olap_file.pb.h"
-#include "olap/data_dir.h"
-#include "olap/delete_handler.h"
-#include "olap/olap_common.h"
-#include "olap/olap_define.h"
-#include "olap/rowset/rowset.h"
-#include "olap/rowset/rowset_meta.h"
-#include "olap/tablet_schema.h"
-
-using std::ifstream;
-using std::string;
-using std::vector;
-
-namespace doris {
-
-class OlapSnapshotConverter {
-public:
-
- OLAPStatus convert_to_pdelta(const RowsetMetaPB& rowset_meta_pb, PDelta*
delta);
-
- OLAPStatus convert_to_rowset_meta(const PDelta& delta, const RowsetId&
rowset_id,
- int64_t tablet_id, int32_t schema_hash,
- RowsetMetaPB* rowset_meta_pb);
-
- OLAPStatus convert_to_rowset_meta(const PPendingDelta& pending_delta,
const RowsetId& rowset_id,
- int64_t tablet_id, int32_t schema_hash,
- RowsetMetaPB* rowset_meta_pb);
-
- OLAPStatus to_column_pb(const ColumnMessage& column_msg, ColumnPB*
column_pb);
-
- OLAPStatus to_column_msg(const ColumnPB& column_pb, ColumnMessage*
column_msg);
-
- OLAPStatus save(const string& file_path, const OLAPHeaderMessage&
olap_header);
-
-private:
- void _modify_old_segment_group_id(RowsetMetaPB& rowset_meta);
-};
-
-} // namespace doris
-
-#endif // DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H
diff --git a/be/src/olap/rowset/alpha_rowset.h
b/be/src/olap/rowset/alpha_rowset.h
index 84b17ad..78ad5f1 100644
--- a/be/src/olap/rowset/alpha_rowset.h
+++ b/be/src/olap/rowset/alpha_rowset.h
@@ -33,7 +33,6 @@ class AlphaRowset;
using AlphaRowsetSharedPtr = std::shared_ptr<AlphaRowset>;
class AlphaRowsetWriter;
class AlphaRowsetReader;
-class OlapSnapshotConverter;
class RowsetFactory;
class AlphaRowset : public Rowset {
@@ -89,7 +88,6 @@ private:
private:
friend class AlphaRowsetWriter;
friend class AlphaRowsetReader;
- friend class OlapSnapshotConverter;
std::vector<std::shared_ptr<SegmentGroup>> _segment_groups;
};
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 3789a1e..52836f7 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -29,7 +29,6 @@
#include "env/env.h"
#include "gen_cpp/Types_constants.h"
-#include "olap/olap_snapshot_converter.h"
#include "olap/rowset/alpha_rowset_meta.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_converter.h"
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index aeac350..201bffb 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -42,7 +42,6 @@
#include "olap/fs/file_block_manager.h"
#include "olap/lru_cache.h"
#include "olap/memtable_flush_executor.h"
-#include "olap/olap_snapshot_converter.h"
#include "olap/push_handler.h"
#include "olap/reader.h"
#include "olap/rowset/alpha_rowset.h"
diff --git a/be/src/olap/task/engine_clone_task.cpp
b/be/src/olap/task/engine_clone_task.cpp
index 25a863c..71e73a3 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -26,7 +26,6 @@
#include "gutil/strings/stringpiece.h"
#include "gutil/strings/substitute.h"
#include "http/http_client.h"
-#include "olap/olap_snapshot_converter.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/snapshot_manager.h"
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 87d3683..bc09fe8 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -33,11 +33,13 @@
#include "vec/io/var_int.h"
#include "vec/runtime/vdatetime_value.h"
-#define DEFAULT_MAX_STRING_SIZE (1ULL << 30)
-#define WRITE_HELPERS_MAX_INT_WIDTH 40U
-
namespace doris::vectorized {
+// Define in the namespace and avoid defining global macros,
+// because it maybe conflict with other libs
+static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB
+static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U;
+
template <typename T>
inline T decimal_scale_multiplier(UInt32 scale);
template <>
diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt
index 442314c..5e4d15f 100644
--- a/be/test/olap/CMakeLists.txt
+++ b/be/test/olap/CMakeLists.txt
@@ -77,7 +77,6 @@ ADD_BE_TEST(rowset/alpha_rowset_test)
ADD_BE_TEST(rowset/beta_rowset_test)
ADD_BE_TEST(rowset/unique_rowset_id_generator_test)
ADD_BE_TEST(rowset/rowset_converter_test)
-# ADD_BE_TEST(olap_snapshot_converter_test)
ADD_BE_TEST(txn_manager_test)
ADD_BE_TEST(generic_iterators_test)
ADD_BE_TEST(key_coder_test)
diff --git a/be/test/olap/olap_snapshot_converter_test.cpp
b/be/test/olap/olap_snapshot_converter_test.cpp
deleted file mode 100644
index ab5a4e4..0000000
--- a/be/test/olap/olap_snapshot_converter_test.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/olap_snapshot_converter.h"
-
-#include <boost/algorithm/string.hpp>
-#include <filesystem>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <string>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "json2pb/json_to_pb.h"
-#include "olap/lru_cache.h"
-#include "olap/olap_meta.h"
-#include "olap/rowset/alpha_rowset.h"
-#include "olap/rowset/alpha_rowset_meta.h"
-#include "olap/rowset/rowset_meta_manager.h"
-#include "olap/storage_engine.h"
-#include "olap/txn_manager.h"
-#include "util/file_utils.h"
-
-#ifndef BE_TEST
-#define BE_TEST
-#endif
-
-using ::testing::_;
-using ::testing::Return;
-using ::testing::SetArgPointee;
-using std::string;
-
-namespace doris {
-
-static StorageEngine* k_engine = nullptr;
-
-class OlapSnapshotConverterTest : public testing::Test {
-public:
- virtual void SetUp() {
- config::tablet_map_shard_size = 1;
- config::txn_map_shard_size = 1;
- config::txn_shard_size = 1;
- EngineOptions options;
- // won't open engine, options.path is needless
- options.backend_uid = UniqueId::gen_uid();
- if (k_engine == nullptr) {
- k_engine = new StorageEngine(options);
- }
-
- string test_engine_data_path =
"./be/test/olap/test_data/converter_test_data/data";
- _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp";
- std::filesystem::remove_all(_engine_data_path);
- FileUtils::create_dir(_engine_data_path);
-
- _data_dir = new DataDir(_engine_data_path, 1000000000);
- _data_dir->init();
- _meta_path = "./meta";
- string tmp_data_path = _engine_data_path + "/data";
- if (std::filesystem::exists(tmp_data_path)) {
- std::filesystem::remove_all(tmp_data_path);
- }
- FileUtils::copy_file(test_engine_data_path, tmp_data_path);
- _tablet_id = 15007;
- _schema_hash = 368169781;
- _tablet_data_path = tmp_data_path + "/" + std::to_string(0) + "/" +
- std::to_string(_tablet_id) + "/" +
std::to_string(_schema_hash);
- if (std::filesystem::exists(_meta_path)) {
- std::filesystem::remove_all(_meta_path);
- }
- ASSERT_TRUE(std::filesystem::create_directory(_meta_path));
- ASSERT_TRUE(std::filesystem::exists(_meta_path));
- _meta = new (std::nothrow) OlapMeta(_meta_path);
- ASSERT_NE(nullptr, _meta);
- OLAPStatus st = _meta->init();
- ASSERT_TRUE(st == OLAP_SUCCESS);
- }
-
- virtual void TearDown() {
- delete _meta;
- delete _data_dir;
- if (std::filesystem::exists(_meta_path)) {
- ASSERT_TRUE(std::filesystem::remove_all(_meta_path));
- }
- if (std::filesystem::exists(_engine_data_path)) {
- ASSERT_TRUE(std::filesystem::remove_all(_engine_data_path));
- }
- }
-
-private:
- DataDir* _data_dir;
- OlapMeta* _meta;
- std::string _json_rowset_meta;
- std::string _engine_data_path;
- std::string _meta_path;
- int64_t _tablet_id;
- int32_t _schema_hash;
- string _tablet_data_path;
-};
-
-TEST_F(OlapSnapshotConverterTest, ToNewAndToOldSnapshot) {
- // --- start to convert old snapshot to new snapshot
- string header_file_path = _tablet_data_path + "/" + "olap_header.json";
- std::ifstream infile(header_file_path);
- string buffer;
- std::string json_header;
- while (getline(infile, buffer)) {
- json_header = json_header + buffer;
- }
- boost::algorithm::trim(json_header);
- OLAPHeaderMessage header_msg;
- bool ret = json2pb::JsonToProtoMessage(json_header, &header_msg);
- ASSERT_TRUE(ret);
- OlapSnapshotConverter converter;
- TabletMetaPB tablet_meta_pb;
- std::vector<RowsetMetaPB> pending_rowsets;
- OLAPStatus status = converter.to_new_snapshot(header_msg,
_tablet_data_path, _tablet_data_path,
- &tablet_meta_pb,
&pending_rowsets, true);
- ASSERT_TRUE(status == OLAP_SUCCESS);
-
- TabletSchema tablet_schema;
- tablet_schema.init_from_pb(tablet_meta_pb.schema());
- string data_path_prefix = _data_dir->get_absolute_tablet_path(
- tablet_meta_pb.shard_id(), tablet_meta_pb.tablet_id(),
tablet_meta_pb.schema_hash());
- // check converted new tabletmeta pb and its files
- // check visible delta
- ASSERT_TRUE(tablet_meta_pb.rs_metas().size() == header_msg.delta().size());
- for (auto& pdelta : header_msg.delta()) {
- int64_t start_version = pdelta.start_version();
- int64_t end_version = pdelta.end_version();
- bool found = false;
- for (auto& visible_rowset : tablet_meta_pb.rs_metas()) {
- if (visible_rowset.start_version() == start_version &&
- visible_rowset.end_version() == end_version) {
- found = true;
- }
- }
- ASSERT_TRUE(found);
- }
- for (auto& visible_rowset : tablet_meta_pb.rs_metas()) {
- RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta());
- alpha_rowset_meta->init_from_pb(visible_rowset);
- AlphaRowset rowset(&tablet_schema, data_path_prefix,
alpha_rowset_meta);
- ASSERT_TRUE(rowset.init() == OLAP_SUCCESS);
- ASSERT_TRUE(rowset.load() == OLAP_SUCCESS);
- std::vector<std::string> old_files;
- rowset.remove_old_files(&old_files);
- }
- // check incremental delta
- ASSERT_TRUE(tablet_meta_pb.inc_rs_metas().size() ==
header_msg.incremental_delta().size());
- for (auto& pdelta : header_msg.incremental_delta()) {
- int64_t start_version = pdelta.start_version();
- int64_t end_version = pdelta.end_version();
- int64_t version_hash = pdelta.version_hash();
- bool found = false;
- for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) {
- if (inc_rowset.start_version() == start_version &&
- inc_rowset.end_version() == end_version &&
- inc_rowset.version_hash() == version_hash) {
- found = true;
- }
- }
- ASSERT_TRUE(found);
- }
- for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) {
- RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta());
- alpha_rowset_meta->init_from_pb(inc_rowset);
- AlphaRowset rowset(&tablet_schema, data_path_prefix,
alpha_rowset_meta);
- ASSERT_TRUE(rowset.init() == OLAP_SUCCESS);
- ASSERT_TRUE(rowset.load() == OLAP_SUCCESS);
- AlphaRowset tmp_rowset(&tablet_schema, data_path_prefix +
"/incremental_delta",
- alpha_rowset_meta);
- ASSERT_TRUE(tmp_rowset.init() == OLAP_SUCCESS);
- std::vector<std::string> old_files;
- tmp_rowset.remove_old_files(&old_files);
- }
- // check pending delta
- ASSERT_TRUE(pending_rowsets.size() == header_msg.pending_delta().size());
- for (auto& pdelta : header_msg.pending_delta()) {
- int64_t partition_id = pdelta.partition_id();
- int64_t transaction_id = pdelta.transaction_id();
- bool found = false;
- for (auto& pending_rowset : pending_rowsets) {
- if (pending_rowset.partition_id() == partition_id &&
- pending_rowset.txn_id() == transaction_id &&
- pending_rowset.tablet_uid().hi() ==
tablet_meta_pb.tablet_uid().hi() &&
- pending_rowset.tablet_uid().lo() ==
tablet_meta_pb.tablet_uid().lo()) {
- found = true;
- }
- }
- ASSERT_TRUE(found);
- }
- for (auto& pending_rowset : pending_rowsets) {
- RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta());
- alpha_rowset_meta->init_from_pb(pending_rowset);
- AlphaRowset rowset(&tablet_schema, data_path_prefix,
alpha_rowset_meta);
- ASSERT_TRUE(rowset.init() == OLAP_SUCCESS);
- ASSERT_TRUE(rowset.load() == OLAP_SUCCESS);
- std::vector<std::string> old_files;
- rowset.remove_old_files(&old_files);
- }
-
- // old files are removed, then convert new snapshot to old snapshot
- OLAPHeaderMessage old_header_msg;
- status = converter.to_old_snapshot(tablet_meta_pb, _tablet_data_path,
_tablet_data_path,
- &old_header_msg);
- ASSERT_TRUE(status == OLAP_SUCCESS);
- for (auto& pdelta : header_msg.delta()) {
- bool found = false;
- for (auto& converted_pdelta : old_header_msg.delta()) {
- if (converted_pdelta.start_version() == pdelta.start_version() &&
- converted_pdelta.end_version() == pdelta.end_version()) {
- found = true;
- }
- }
- ASSERT_TRUE(found);
- }
- for (auto& pdelta : header_msg.incremental_delta()) {
- bool found = false;
- for (auto& converted_pdelta : old_header_msg.incremental_delta()) {
- if (converted_pdelta.start_version() == pdelta.start_version() &&
- converted_pdelta.end_version() == pdelta.end_version() &&
- converted_pdelta.version_hash() == pdelta.version_hash()) {
- found = true;
- }
- }
- ASSERT_TRUE(found);
- }
-}
-
-} // namespace doris
-
-int main(int argc, char** argv) {
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 8dce011..5f3b226 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -30,37 +30,6 @@ message ZoneMap {
optional bool null_flag = 3;
}
-message DeltaPruning {
- repeated ZoneMap zone_maps = 1;
-}
-
-// define OLAP FileVersion Message, Base, delta and cumulative delta
-// are both instance of Version
-// Base Version: (start_version == 0 && end_version > start_version) || [0, 0]
-// Cumulative Version: (start_version >= 1 && end_version > start_version)
-// Delta Version: start_version == end_version
-message FileVersionMessage { // Deprecated, Use PDelta instead
- required uint32 num_segments = 1 [default = 0];
- required int32 start_version = 2;
- required int32 end_version = 3;
- required int64 version_hash = 4 [default = 0];
- required int64 max_timestamp = 5 [default = 0];
- required int64 index_size = 6 [default = 0];
- required int64 data_size = 7 [default = 0];
- optional int64 num_rows = 8 [default = 0];
- required int64 creation_time = 9 [default = 0];
- optional DeltaPruning delta_pruning = 10;
-}
-
-message PDelta {
- required int64 start_version = 1; // RowsetMetaPB.start_version
- required int64 end_version = 2; // RowsetMetaPB.end_version
- required int64 version_hash = 3; // RowsetMetaPB.version_hash
- required int64 creation_time = 4; // RowsetMetaPB.creation_time
- repeated SegmentGroupPB segment_group = 5; //
RowsetMetaPB.extra_properties
- optional DeletePredicatePB delete_condition = 6; //
RowsetMetaPB.delete_predicate
-}
-
enum RowsetTypePB {
ALPHA_ROWSET = 0; // doris原有的列存格式
BETA_ROWSET = 1; // 新列存
@@ -140,33 +109,6 @@ message SegmentGroupPB {
optional PUniqueId load_id = 8;
}
-message PPendingDelta {
- required int64 partition_id = 1; // RowsetMetaPB.partition_id
- required int64 transaction_id = 2; // RowsetMetaPB.txn_id
- required int64 creation_time = 3; // RowsetMetaPB.creation_time
- repeated PendingSegmentGroupPB pending_segment_group = 4; //
RowsetMetaPB.extra_properties
- optional DeletePredicatePB delete_condition = 5; //
RowsetMetaPB.delete_predicate
-}
-
-message PendingSegmentGroupPB {
- required int32 pending_segment_group_id = 1;
- required int32 num_segments = 2;
- required PUniqueId load_id = 3;
- repeated ZoneMap zone_maps = 4;
- optional bool empty = 5;
-}
-
-message SchemaChangeStatusMessage {
- required int64 related_tablet_id = 1;
- required int32 related_schema_hash = 2;
-
- repeated FileVersionMessage versions_to_be_changed = 3; // Deprecated. Use
PDelta instead
-
- required int32 schema_change_type = 4;
-
- repeated PDelta versions_to_changed = 5;
-}
-
enum DataFileType {
OLAP_DATA_FILE = 0; //Deprecated. Only columnar-wise format is supported.
COLUMN_ORIENTED_FILE = 1;
@@ -190,38 +132,6 @@ message InPredicatePB {
repeated string values = 3;
}
-message OLAPHeaderMessage {
- required uint32 num_rows_per_data_block = 1; //
TabletSchemaPB.num_rows_per_row_block
-
- repeated FileVersionMessage file_version = 2; // Deprecated. Use PDelta
instead after stream load
-
- required int32 cumulative_layer_point = 3; //
TabletMetaPB.cumulative_layer_point
- required uint32 num_short_key_fields = 4; //
TabletSchemaPB.num_short_key_columns
- repeated ColumnMessage column = 5; // TabletSchemaPB.column
- required int64 creation_time = 6; // TabletMetaPB.creation_time
- repeated int32 selectivity = 7; // Deprecated.
- optional SchemaChangeStatusMessage schema_change_status = 8; //
TabletMetaPB.alter_task
- optional DataFileType data_file_type = 9 [default = OLAP_DATA_FILE]; //
? only column oriented
- optional uint32 next_column_unique_id = 10 [default = 0]; //
TabletSchemaPB.next_column_unique_id
- optional CompressKind compress_kind = 11 [default = COMPRESS_LZO]; //
TabletSchemaPB.compress_kind
- optional uint32 segment_size = 12 [default = 4292870144]; // ? not used
- repeated DeletePredicatePB delete_data_conditions = 13; // not serialized
any more, just read from PDelta
- // bloom filter false positive probability
- optional double bf_fpp = 14; // TabletSchemaPB.bf_fpp
- optional KeysType keys_type = 15; // TabletSchemaPB.keys_type
- repeated PDelta delta = 16; // TabletMetaPB.rs_metas
- repeated PPendingDelta pending_delta = 17; // need write to olap meta
store
- // @Deprecated
- repeated PDelta incremental_delta = 18; // TabletMetaPB.inc_rs_metas
-
- // if true, this tablet will not do compaction,
- // and does not create init version
- optional bool in_restore_mode = 19 [default = false]; //
TabletMetaPB.is_restore_mode
- optional int64 tablet_id = 20; // TabletMetaPB.tablet_id
- optional int32 schema_hash = 21; // TabletMetaPB.schema_hash? int32 vs
int64
- optional uint64 shard_id = 22; // TabletMetaPB.shard_id? int64 vs int32
-}
-
enum AlterTabletState {
ALTER_PREPARED = 0;
ALTER_RUNNING = 1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]