chaoyli closed pull request #506: add create file hard link for rowset
URL: https://github.com/apache/incubator-doris/pull/506
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/be/src/olap/rowset/alpha_rowset.cpp 
b/be/src/olap/rowset/alpha_rowset.cpp
index 1c5e11bc..cbd69fa0 100644
--- a/be/src/olap/rowset/alpha_rowset.cpp
+++ b/be/src/olap/rowset/alpha_rowset.cpp
@@ -30,13 +30,20 @@ AlphaRowset::AlphaRowset(const RowFields& tablet_schema,
         _rowset_path(rowset_path),
         _rowset_meta(rowset_meta),
         _segment_group_size(0),
-        _is_cumulative_rowset(false) {
-    Version version = _rowset_meta->version();
-    if (version.first == version.second) {
-        _is_cumulative_rowset = false;
-    } else {
-        _is_cumulative_rowset = true;
+        _is_cumulative_rowset(false),
+        _is_pending_rowset(false) {
+    if (!_rowset_meta->has_version()) {
+        _is_pending_rowset = true;
     }
+    if (!_is_pending_rowset) {
+        Version version = _rowset_meta->version();
+        if (version.first == version.second) {
+            _is_cumulative_rowset = false;
+        } else {
+            _is_cumulative_rowset = true;
+        }
+    }
+
 }
 
 OLAPStatus AlphaRowset::init() {
@@ -71,6 +78,31 @@ RowsetMetaSharedPtr AlphaRowset::get_meta() {
 
 void AlphaRowset::set_version(Version version) {
     _rowset_meta->set_version(version);
+    _is_pending_rowset = false;
+}
+
+bool AlphaRowset::create_hard_links(std::vector<std::string>* success_links) {
+    for (auto segment_group : _segment_groups) {
+        bool  ret = segment_group->create_hard_links(success_links);
+        if (!ret) {
+            LOG(WARNING) << "create hard links failed for segment group:"
+                << segment_group->segment_group_id();
+            return false;
+        }
+    }
+    return true;
+}
+
+bool AlphaRowset::remove_old_files(std::vector<std::string>* removed_links) {
+    for (auto segment_group : _segment_groups) {
+        bool  ret = segment_group->remove_old_files(removed_links);
+        if (!ret) {
+            LOG(WARNING) << "remove old files failed for segment group:"
+                << segment_group->segment_group_id();
+            return false;
+        }
+    }
+    return true;
 }
 
 OLAPStatus AlphaRowset::_init_segment_groups() {
diff --git a/be/src/olap/rowset/alpha_rowset.h 
b/be/src/olap/rowset/alpha_rowset.h
index 077ce324..412c4f38 100644
--- a/be/src/olap/rowset/alpha_rowset.h
+++ b/be/src/olap/rowset/alpha_rowset.h
@@ -48,6 +48,10 @@ class AlphaRowset : public Rowset {
 
     virtual void set_version(Version version);
 
+    bool create_hard_links(std::vector<std::string>* success_links);
+
+    bool remove_old_files(std::vector<std::string>* removed_links);
+
 private:
     OLAPStatus _init_segment_groups();
 
@@ -61,6 +65,7 @@ class AlphaRowset : public Rowset {
     std::vector<std::shared_ptr<SegmentGroup>> _segment_groups;
     int _segment_group_size;
     bool _is_cumulative_rowset;
+    bool _is_pending_rowset;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 1a4e6719..fd4926a4 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -127,6 +127,11 @@ class RowsetMeta {
         _rowset_meta_pb.set_end_version(version.second);
     }
 
+    virtual bool has_version() {
+        return _rowset_meta_pb.has_start_version()
+            &&  _rowset_meta_pb.has_end_version();
+    }
+
     virtual int start_version() {
         return _rowset_meta_pb.start_version();
     }
diff --git a/be/src/olap/rowset/segment_group.cpp 
b/be/src/olap/rowset/segment_group.cpp
index 167bb13e..13ae5539 100644
--- a/be/src/olap/rowset/segment_group.cpp
+++ b/be/src/olap/rowset/segment_group.cpp
@@ -80,7 +80,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t 
rowset_id, const RowFields
     _ref_count = 0;
     _is_pending = false;
     _partition_id = 0;
-    _transaction_id = 0;
+    _txn_id = 0;
     _short_key_length = 0;
     _new_short_key_length = 0;
     _short_key_buf = nullptr;
@@ -114,7 +114,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t 
rowset_id, const RowFields
         _delete_flag(delete_flag),
         _segment_group_id(segment_group_id), _num_segments(num_segments),
         _is_pending(is_pending), _partition_id(partition_id),
-        _transaction_id(transaction_id) {
+        _txn_id(transaction_id) {
     _version = {-1, -1};
     _version_hash = 0;
     _load_id.set_hi(0);
@@ -155,7 +155,7 @@ std::string 
SegmentGroup::_construct_pending_file_path(int32_t segment_id, const
     std::string pending_dir_path = _rowset_path_prefix + PENDING_DELTA_PREFIX;
     std::stringstream file_path;
     file_path << pending_dir_path << "/"
-                          << _transaction_id << "_"
+                          << std::to_string(_rowset_id) << "_" +  _txn_id
                           << _segment_group_id << "_" << segment_id << suffix;
     return file_path.str();
 }
@@ -688,4 +688,101 @@ int64_t SegmentGroup::get_tablet_id() {
     return _tablet_id;
 }
 
+bool SegmentGroup::create_hard_links(std::vector<std::string>* success_links) {
+    for (int segment_id = 0; segment_id < _num_segments; segment_id++) {
+        std::string new_data_file_name = construct_data_file_path(segment_id);
+        if (!check_dir_existed(new_data_file_name)) {
+            std::string old_data_file_name = 
construct_old_data_file_path(segment_id);
+            if (link(new_data_file_name.c_str(), old_data_file_name.c_str()) 
!= 0) {
+                LOG(WARNING) << "fail to create hard link. from=" << 
old_data_file_name << ", "
+                    << "to=" << new_data_file_name << ", " << "errno=" << 
Errno::no();
+                return false;
+            }
+        }
+        success_links->push_back(new_data_file_name);
+        std::string new_index_file_name = 
construct_index_file_path(segment_id);
+        if (!check_dir_existed(new_index_file_name)) {
+            std::string old_index_file_name = 
construct_old_index_file_path(segment_id);
+            if (link(new_index_file_name.c_str(), old_index_file_name.c_str()) 
!= 0) {
+                LOG(WARNING) << "fail to create hard link. from=" << 
old_index_file_name << ", "
+                    << "to=" << new_index_file_name << ", " << "errno=" << 
Errno::no();
+                return false;
+            }
+        }
+        success_links->push_back(new_index_file_name);
+    }
+    return true;
+}
+
+bool SegmentGroup::remove_old_files(std::vector<std::string>* removed_links) {
+    for (int segment_id = 0; segment_id < _num_segments; segment_id++) {
+        std::string old_data_file_name = 
construct_old_data_file_path(segment_id);
+        OLAPStatus status = remove_dir(old_data_file_name);
+        if (status != OLAP_SUCCESS) {
+            return false;
+        }
+        removed_links->push_back(old_data_file_name);
+        std::string old_index_file_name = 
construct_old_index_file_path(segment_id);
+        status = remove_dir(old_index_file_name);
+        if (status != OLAP_SUCCESS) {
+            return false;
+        }
+        removed_links->push_back(old_index_file_name);
+    }
+    return true;
+}
+
+std::string SegmentGroup::construct_old_index_file_path(int32_t segment_id) 
const {
+    if (_is_pending) {
+        return _construct_old_pending_file_path(segment_id, ".idx");
+    } else {
+        return _construct_old_file_path(segment_id, ".idx");
+    }
+}
+    
+std::string SegmentGroup::construct_old_data_file_path(int32_t segment_id) 
const {
+    if (_is_pending) {
+        return _construct_old_pending_file_path(segment_id, ".dat");
+    } else {
+        return _construct_old_file_path(segment_id, ".dat");
+    }
+}
+
+std::string SegmentGroup::_construct_old_pending_file_path(int32_t segment_id,
+    const std::string& suffix) const {
+    std::string dir_path = _rowset_path_prefix + PENDING_DELTA_PREFIX;
+    std::stringstream file_path;
+    file_path << dir_path << "/"
+                          << _txn_id << "_"
+                          << _segment_group_id << "_" << segment_id << suffix;
+    return file_path.str();
+}
+    
+std::string SegmentGroup::_construct_old_file_path(int32_t segment_id, const 
std::string& suffix) const {
+    char file_path[OLAP_MAX_PATH_LEN];
+    if (_segment_group_id == -1) {
+        snprintf(file_path,
+                 sizeof(file_path),
+                 "%s_%ld_%ld_%ld_%d.%s",
+                 _rowset_path_prefix.c_str(),
+                 _version.first,
+                 _version.second,
+                 _version_hash,
+                 segment_id,
+                 suffix.c_str());
+    } else {
+        snprintf(file_path,
+                 sizeof(file_path),
+                 "%s_%ld_%ld_%ld_%d_%d.%s",
+                 _rowset_path_prefix.c_str(),
+                 _version.first,
+                 _version.second,
+                 _version_hash,
+                 _segment_group_id, segment_id,
+                 suffix.c_str());
+    }
+
+    return file_path;
+}
+
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_group.h 
b/be/src/olap/rowset/segment_group.h
index 3b6a6885..f54ac00a 100644
--- a/be/src/olap/rowset/segment_group.h
+++ b/be/src/olap/rowset/segment_group.h
@@ -161,7 +161,7 @@ class SegmentGroup {
     inline void set_pending_finished() { _is_pending = false; }
 
     inline TPartitionId partition_id() const { return _partition_id; }
-    inline TTransactionId transaction_id() const { return _transaction_id; }
+    inline TTransactionId transaction_id() const { return _txn_id; }
 
     inline bool delete_flag() const { return _delete_flag; }
 
@@ -227,8 +227,14 @@ class SegmentGroup {
         return _index.get_null_supported(seg_id);
     }
 
-    std::string construct_index_file_path(int32_t segment) const;
-    std::string construct_data_file_path(int32_t segment) const;
+    std::string construct_index_file_path(int32_t segment_id) const;
+    std::string construct_data_file_path(int32_t segment_id) const;
+
+    // these two functions are for compatible, and will be deleted later
+    // so it is better not to use it.
+    std::string construct_old_index_file_path(int32_t segment_id) const;
+    std::string construct_old_data_file_path(int32_t segment_id) const;
+
     size_t current_num_rows_per_row_block() const;
     void publish_version(Version version, VersionHash version_hash);
 
@@ -244,10 +250,18 @@ class SegmentGroup {
 
     int64_t get_tablet_id();
 
+    bool create_hard_links(std::vector<std::string>* success_links);
+
+    bool remove_old_files(std::vector<std::string>* removed_links);
+
 private:
-    std::string _construct_pending_file_path(int32_t segment, const 
std::string& suffix) const;
+    std::string _construct_pending_file_path(int32_t segment_id, const 
std::string& suffix) const;
+    
+    std::string _construct_file_path(int32_t segment_id, const std::string& 
suffix) const;
+
+    std::string _construct_old_pending_file_path(int32_t segment_id, const 
std::string& suffix) const;
     
-    std::string _construct_file_path(int32_t segment, const std::string& 
suffix) const;
+    std::string _construct_old_file_path(int32_t segment_id, const 
std::string& suffix) const;
 
 private:
     int64_t _tablet_id;
@@ -268,7 +282,7 @@ class SegmentGroup {
     MemIndex _index;
     bool _is_pending;
     TPartitionId _partition_id;
-    TTransactionId _transaction_id;
+    TTransactionId _txn_id;
 
     // short key对应的field_info数组
     RowFields _short_key_info_list;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

Reply via email to