neverchanje commented on a change in pull request #597: URL: https://github.com/apache/incubator-pegasus/pull/597#discussion_r485488156
########## File path: src/server/hotspot_partition_calculator.cpp ########## @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "hotspot_partition_calculator.h" + +#include <algorithm> +#include <math.h> +#include <dsn/dist/fmt_logging.h> + +namespace pegasus { +namespace server { + +DSN_DEFINE_int64("pegasus.hotspot", + max_hotspot_store_size, + 100, + "the max count of historical data stored in calculator, in order to same Mem"); + +void hotspot_partition_calculator::data_aggregate(const std::vector<row_data> &partitions) +{ + while (_historical_data.size() > FLAGS_max_hotspot_store_size - 1) { + _historical_data.pop(); + } + std::vector<hotspot_partition_data> temp(partitions.size()); + for (int i = 0; i < partitions.size(); i++) { + temp[i] = std::move(hotspot_partition_data(partitions[i])); + } + _historical_data.emplace(temp); +} + +void hotspot_partition_calculator::init_perf_counter(const int perf_counter_count) +{ + std::string counter_name; + std::string counter_desc; + for (int i = 0; i < perf_counter_count; i++) { + string paritition_desc = _app_name + '.' + std::to_string(i); + counter_name = fmt::format("app.stat.hotspots@{}", paritition_desc); + counter_desc = fmt::format("statistic the hotspots of app {}", paritition_desc); + _hot_points[i].init_app_counter( + "app.pegasus", counter_name.c_str(), COUNTER_TYPE_NUMBER, counter_desc.c_str()); + } +} + +void hotspot_partition_calculator::_data_analyse( + const std::queue<std::vector<hotspot_partition_data>> &hotspot_app_data, Review comment: Serious? Why call it `hotspot_app_data`? ########## File path: src/server/hotspot_partition_calculator.h ########## @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "hotspot_partition_data.h" +#include <gtest/gtest_prod.h> +#include <dsn/perf_counter/perf_counter.h> + +namespace pegasus { +namespace server { + +// hotspot_partition_calculator is used to find the hot partition in a table. +class hotspot_partition_calculator +{ +public: + hotspot_partition_calculator(const std::string &app_name, int partition_count) + : _app_name(app_name), _hot_points(partition_count) + { + init_perf_counter(partition_count); + } + // aggregate related data of hotspot detection + void data_aggregate(const std::vector<row_data> &partitions); + // analyse the saved data to find hotspot partition + void data_analyse(); + +private: + const std::string _app_name; + + void init_perf_counter(int perf_counter_count); + // usually a partition with "hot-point value" >= 3 can be considered as a hotspot partition. + std::vector<dsn::perf_counter_wrapper> _hot_points; + // saving historical data can improve accuracy + std::queue<std::vector<hotspot_partition_data>> _historical_data; Review comment: ```suggestion std::queue<std::vector<hotspot_partition_data>> _partition_stat_histories; ``` I can't understand even after a deep look at it. "historical_data" for what? I think you mean the statistics histories of every partition. ########## File path: src/server/hotspot_partition_calculator.cpp ########## @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "hotspot_partition_calculator.h" + +#include <algorithm> +#include <math.h> +#include <dsn/dist/fmt_logging.h> +#include <dsn/utility/flags.h> + +namespace pegasus { +namespace server { + +DSN_DEFINE_int64("pegasus.collector", + max_hotspot_store_size, + 100, + "the max count of historical data " + "stored in calculator, The FIFO " + "queue design is used to " + "eliminate outdated historical " + "data"); + +void hotspot_partition_calculator::data_aggregate(const std::vector<row_data> &partitions) +{ + while (_historical_data.size() > FLAGS_max_hotspot_store_size - 1) { + _historical_data.pop(); + } + std::vector<hotspot_partition_data> temp(partitions.size()); + for (int i = 0; i < partitions.size(); i++) { + temp[i] = std::move(hotspot_partition_data(partitions[i])); + } + _historical_data.emplace(temp); +} + +void hotspot_partition_calculator::init_perf_counter(int partition_count) +{ + std::string counter_name; + std::string counter_desc; + for (int i = 0; i < partition_count; i++) { + string partition_desc = _app_name + '.' + std::to_string(i); + counter_name = fmt::format("app.stat.hotspots@{}", partition_desc); + counter_desc = fmt::format("statistic the hotspots of app {}", partition_desc); + _hot_points[i].init_app_counter( + "app.pegasus", counter_name.c_str(), COUNTER_TYPE_NUMBER, counter_desc.c_str()); + } +} + +void hotspot_partition_calculator::data_analyse() +{ + dassert(_historical_data.back().size() == _hot_points.size(), + "partition counts error, please check"); + std::vector<double> data_samples; + data_samples.reserve(_historical_data.size() * _hot_points.size()); + auto temp_data = _historical_data; + double total = 0, standard_deviation = 0, average_number = 0; + int sample_count = 0; + while (!temp_data.empty()) { + for (const auto &partition_data : temp_data.front()) { + if (partition_data.total_qps - 1.00 > 0) { + data_samples.push_back(partition_data.total_qps); + total += partition_data.total_qps; + sample_count++; + } + } + temp_data.pop(); + } Review comment: Use std::deque instead in order to iterating over the queue. std::queue does not support for loop. Your code using std::queue requires a data copy from `_historical_data` to `temp_data`. You should at your best effort prevent such large copy. ``` for (const auto& history : _historical_data) { for (const auto &partition_data : temp_data.front()) { if (partition_data.total_qps - 1.00 > 0) { ... } } } ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
