acelyc111 commented on a change in pull request #603: URL: https://github.com/apache/incubator-pegasus/pull/603#discussion_r489962281
########## File path: src/server/hotkey_coarse_data_collector.h ########## @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "base/pegasus_utils.h" + +namespace pegasus { +namespace server { + +// hotkey_coarse_data_collector handles the first procedure (COARSE) of hotkey detection. +// It captures the data without recording them, but simply divides the incoming requests +// into a number of buckets and counts the accessed times of each bucket. +// If the variance among the buckets exceeds the threshold, the most accessed bucket Review comment: ```suggestion // If the variance among the buckets exceeds the threshold, the most frequently accessed bucket ``` ########## File path: src/server/hotkey_coarse_data_collector.h ########## @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "base/pegasus_utils.h" + +namespace pegasus { +namespace server { + +// hotkey_coarse_data_collector handles the first procedure (COARSE) of hotkey detection. +// It captures the data without recording them, but simply divides the incoming requests +// into a number of buckets and counts the accessed times of each bucket. +// If the variance among the buckets exceeds the threshold, the most accessed bucket +// is regarded to contain the hotkey. +// +// This technique intends to reduce the load of data recording during FINE procedure, +// filtering what's unnecessary to catch. +class hotkey_coarse_data_collector +{ +public: + // Counts `row_cnt` for the bucket of `hash_key`. + void capture_data(const dsn::blob &hash_key, uint64_t size); + + // returns: id of the most accessed bucket. + // -1 if not hot bucket is found. + int analyse_data(); Review comment: 'analyse_data' is too obscure, how about 'get_hotest_bucket'? ########## File path: src/server/hotkey_collector.h ########## @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <dsn/utility/string_view.h> +#include <rrdb/rrdb_types.h> + +namespace pegasus { +namespace server { + +class hotkey_coarse_data_collector; +class hotkey_fine_data_collector; + +// hotkey_collector is responsible to find the hot keys after the partition +// was detected to be hot. The two types of hotkey, READ & WRITE, are detected +// separately. +class hotkey_collector +{ +public: + // size: the cu size of raw_key/hash_key calculated by `capacity_unit_calculator` + void capture_raw_key(const ::dsn::blob &raw_key, uint64_t size); Review comment: ```suggestion void capture_raw_key(const dsn::blob &raw_key, uint64_t size); ``` ########## File path: src/server/hotkey_collector.h ########## @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <dsn/utility/string_view.h> +#include <rrdb/rrdb_types.h> + +namespace pegasus { +namespace server { + +class hotkey_coarse_data_collector; +class hotkey_fine_data_collector; + +// hotkey_collector is responsible to find the hot keys after the partition +// was detected to be hot. The two types of hotkey, READ & WRITE, are detected +// separately. +class hotkey_collector +{ +public: + // size: the cu size of raw_key/hash_key calculated by `capacity_unit_calculator` + void capture_raw_key(const ::dsn::blob &raw_key, uint64_t size); + void capture_hash_key(const dsn::blob &hash_key, uint64_t size); + // analyse_data is a periodic task, only valid when _state == collector_state::COARSE + // || collector_state::FINE + void analyse_data(); + bool handle_operation(dsn::apps::hotkey_detect_action::type action, std::string &err_hint); + // find outlier in both coarse capture and fine capture + static int outlier_cal(const std::vector<uint64_t> &data_samples, int threshold); Review comment: Unify function name to others, 'verb + noun' ########## File path: src/server/hotkey_coarse_data_collector.h ########## @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "base/pegasus_utils.h" + +namespace pegasus { +namespace server { + +// hotkey_coarse_data_collector handles the first procedure (COARSE) of hotkey detection. +// It captures the data without recording them, but simply divides the incoming requests +// into a number of buckets and counts the accessed times of each bucket. +// If the variance among the buckets exceeds the threshold, the most accessed bucket +// is regarded to contain the hotkey. +// +// This technique intends to reduce the load of data recording during FINE procedure, +// filtering what's unnecessary to catch. +class hotkey_coarse_data_collector +{ +public: + // Counts `row_cnt` for the bucket of `hash_key`. + void capture_data(const dsn::blob &hash_key, uint64_t size); + + // returns: id of the most accessed bucket. + // -1 if not hot bucket is found. + int analyse_data(); + +private: + // hash method is from hotkey_collector::outlier_cal Review comment: No need to explain what hash method it use, but it's necessary to explain what value it stores. ########## File path: src/server/hotkey_fine_data_collector.h ########## @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "base/pegasus_utils.h" +#include <readerwriterqueue/readerwriterqueue.h> + +namespace pegasus { +namespace server { + +// hotkey_fine_data_collector handles the second procedure (FINE) of hotkey detection. +// It captures only the data mapping to the "hot" bucket. +// +// To prevent locking on the read path, we create one queue per thread of THREAD_POOL_LOCAL_APP. +// The read request is captured right inside its execution thread. +// +// For writes we do not apply this optimization. + +typedef std::vector<moodycamel::ReaderWriterQueue<std::pair<dsn::blob, uint64_t>>> + lockfree_capture_queues; + +class hotkey_fine_data_collector +{ +public: + void capture_data(const dsn::blob &hash_key, uint64_t size); + bool analyse_data(std::string &result); Review comment: Add some comments. ########## File path: src/server/hotkey_coarse_data_collector.h ########## @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "base/pegasus_utils.h" + +namespace pegasus { +namespace server { + +// hotkey_coarse_data_collector handles the first procedure (COARSE) of hotkey detection. +// It captures the data without recording them, but simply divides the incoming requests +// into a number of buckets and counts the accessed times of each bucket. +// If the variance among the buckets exceeds the threshold, the most accessed bucket +// is regarded to contain the hotkey. +// +// This technique intends to reduce the load of data recording during FINE procedure, +// filtering what's unnecessary to catch. +class hotkey_coarse_data_collector +{ +public: + // Counts `row_cnt` for the bucket of `hash_key`. Review comment: What is 'row_cnt'? I suggest you to improve this comment. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
