kangkaisen commented on a change in pull request #1798: Optimize the load 
performance for large file
URL: https://github.com/apache/incubator-doris/pull/1798#discussion_r323638178
 
 

 ##########
 File path: be/src/runtime/tablets_channel.h
 ##########
 @@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+
+#include "runtime/descriptors.h"
+#include "runtime/mem_tracker.h"
+#include "util/bitmap.h"
+#include "util/thread_pool.hpp"
+
+#include "gen_cpp/Types_types.h"
+#include "gen_cpp/PaloInternalService_types.h"
+#include "gen_cpp/internal_service.pb.h"
+
+namespace doris {
+
+struct TabletsChannelKey {
+    UniqueId id;
+    int64_t index_id;
+
+    TabletsChannelKey(const PUniqueId& pid, int64_t index_id_)
+        : id(pid), index_id(index_id_) { }
+
+    ~TabletsChannelKey() noexcept { }
+
+    bool operator==(const TabletsChannelKey& rhs) const noexcept {
+        return index_id == rhs.index_id && id == rhs.id;
+    }
+
+    std::string to_string() const;
+};
+
+struct TabletsChannelKeyHasher {
+    std::size_t operator()(const TabletsChannelKey& key) const {
+        size_t seed = key.id.hash();
+        return doris::HashUtil::hash(&key.index_id, sizeof(key.index_id), 
seed);
+    }
+};
+
+class DeltaWriter;
+class MemTable;
+class OlapTableSchemaParam;
+
+// channel that process all data for this load
+class TabletsChannel {
+public:
+    TabletsChannel(const TabletsChannelKey& key, size_t tablet_num);
+
+    ~TabletsChannel();
+
+    Status open(const PTabletWriterOpenRequest& params);
+
+    Status add_batch(const PTabletWriterAddBatchRequest& batch);
+
+    Status close(int sender_id, bool* finished,
+        const google::protobuf::RepeatedField<int64_t>& partition_ids,
+        google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec);
+
+    time_t last_updated_time() {
+        return _last_updated_time;
+    }
+
+private:
+    // open all writer
+    Status _open_all_writers(const PTabletWriterOpenRequest& params);
+    // the work function of flush thread.
+    // it will continuously get memtable from flush queue, and flush them to 
disk
+    void _flush_memtable();
+
+private:
+    // id of this load channel, just for 
+    TabletsChannelKey _key;
+
+    // make execute sequece
+    std::mutex _lock;
+
+    // initialized in open function
+    int64_t _txn_id = -1;
+    int64_t _index_id = -1;
+    OlapTableSchemaParam* _schema = nullptr;
+    TupleDescriptor* _tuple_desc = nullptr;
+    // row_desc used to construct
+    RowDescriptor* _row_desc = nullptr;
+    bool _opened = false;
+
+    // next sequence we expect
+    int _num_remaining_senders = 0;
+    std::vector<int64_t> _next_seqs;
+    Bitmap _closed_senders;
+    Status _close_status;
+
+    // tablet_id -> TabletChannel
+    std::unordered_map<int64_t, DeltaWriter*> _tablet_writers;
+
+    std::unordered_set<int64_t> _partition_ids;
+
+    // TODO(zc): to add this tracker to somewhere
+    MemTracker _mem_tracker;
+
+    //use to erase timeout TabletsChannel in _tablets_channels
+    time_t _last_updated_time;
+
+    ThreadPool _flush_pool;
+    // the size of flush queue equals to the number of tablets.
+    // so that each tablet has at least one rotational memtable.
+    // and the over all mem usage is at most 2 times of total memtable's size
+    BlockingQueue<std::shared_ptr<MemTable>> _flush_queue;
 
 Review comment:
   All tablets data are not uniform. An extreme example: there 10 tablets, 
tablet A occupies ninety percent data, so tablet A data flush will very slowly. 
So I think there maybe two even more tablet A MemTable in flush_queue at the 
same time. 
   
   So "the over all mem usage is at most 2 times of total memtable's size" will 
not always right.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

Reply via email to