Copilot commented on code in PR #58636: URL: https://github.com/apache/doris/pull/58636#discussion_r2600823491
########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; + void update_judge_counter() { + if ((_judge_counter++) >= config::runtime_filter_sampling_frequency) { + reset_judge_selectivity(); + } + } + + void update_judge_selectivity(int filter_id, uint64_t filter_rows, uint64_t input_rows, + double ignore_thredhold) { + if (!_always_true) { + _judge_filter_rows += filter_rows; + _judge_input_rows += input_rows; + judge_selectivity(ignore_thredhold, _judge_filter_rows, _judge_input_rows, + _always_true); + } + + VLOG_ROW << fmt::format( + "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " + "rate: {}, " + "ignore_thredhold: {}, counter: {} , always_true: {}", Review Comment: Typo in log message: `ignore_thredhold` should be `ignore_threshold` (missing 's'). ```suggestion "ignore_threshold: {}, counter: {} , always_true: {}", ``` ########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; + void update_judge_counter() { + if ((_judge_counter++) >= config::runtime_filter_sampling_frequency) { + reset_judge_selectivity(); + } + } + + void update_judge_selectivity(int filter_id, uint64_t filter_rows, uint64_t input_rows, + double ignore_thredhold) { + if (!_always_true) { + _judge_filter_rows += filter_rows; + _judge_input_rows += input_rows; + judge_selectivity(ignore_thredhold, _judge_filter_rows, _judge_input_rows, + _always_true); + } + + VLOG_ROW << fmt::format( + "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " + "rate: {}, " + "ignore_thredhold: {}, counter: {} , always_true: {}", + filter_id, _judge_filter_rows, _judge_input_rows, + static_cast<double>(filter_rows) / static_cast<double>(input_rows), + ignore_thredhold, _judge_counter, _always_true); + } + + bool maybe_always_true_can_ignore() const { + /// TODO: maybe we can use seesion variable to control this behavior ? Review Comment: Typo in comment: `seesion` should be `session`. ```suggestion /// TODO: maybe we can use session variable to control this behavior ? ``` ########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; + void update_judge_counter() { + if ((_judge_counter++) >= config::runtime_filter_sampling_frequency) { + reset_judge_selectivity(); + } + } + + void update_judge_selectivity(int filter_id, uint64_t filter_rows, uint64_t input_rows, + double ignore_thredhold) { + if (!_always_true) { + _judge_filter_rows += filter_rows; + _judge_input_rows += input_rows; + judge_selectivity(ignore_thredhold, _judge_filter_rows, _judge_input_rows, + _always_true); + } + + VLOG_ROW << fmt::format( + "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " + "rate: {}, " + "ignore_thredhold: {}, counter: {} , always_true: {}", + filter_id, _judge_filter_rows, _judge_input_rows, + static_cast<double>(filter_rows) / static_cast<double>(input_rows), + ignore_thredhold, _judge_counter, _always_true); Review Comment: Typo in parameter name: `ignore_thredhold` should be `ignore_threshold` (missing 's'). ```suggestion double ignore_threshold) { if (!_always_true) { _judge_filter_rows += filter_rows; _judge_input_rows += input_rows; judge_selectivity(ignore_threshold, _judge_filter_rows, _judge_input_rows, _always_true); } VLOG_ROW << fmt::format( "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " "rate: {}, " "ignore_threshold: {}, counter: {} , always_true: {}", filter_id, _judge_filter_rows, _judge_input_rows, static_cast<double>(filter_rows) / static_cast<double>(input_rows), ignore_threshold, _judge_counter, _always_true); ``` ########## be/test/runtime_filter/runtime_filter_selectivity_test.cpp: ########## @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime_filter/runtime_filter_selectivity.h" + +#include <glog/logging.h> +#include <gtest/gtest.h> + +namespace doris { + +class RuntimeFilterSelectivityTest : public testing::Test {}; + +TEST_F(RuntimeFilterSelectivityTest, basic) { + RuntimeFilterSelectivity selectivity; + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, judge_selectivity_low_filter_rate) { + bool always_true = false; + RuntimeFilterSelectivity::judge_selectivity(0.1, 5, 100, always_true); + EXPECT_TRUE(always_true); +} + +TEST_F(RuntimeFilterSelectivityTest, judge_selectivity_high_filter_rate) { + bool always_true = false; + RuntimeFilterSelectivity::judge_selectivity(0.1, 50, 100, always_true); + EXPECT_FALSE(always_true); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_not_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 50, 100, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 5, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_once_true_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 5, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); + + // After marked as always_true, subsequent updates should be ignored + selectivity.update_judge_selectivity(-1, 90, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_counter_reset) { + RuntimeFilterSelectivity selectivity; + + // Set sampling frequency to a small value for testing + config::runtime_filter_sampling_frequency = 2; + + selectivity.update_judge_selectivity(-1, 5, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); + + // Trigger reset by calling update_judge_counter multiple times + selectivity.update_judge_counter(); + selectivity.update_judge_counter(); + selectivity.update_judge_counter(); + + // After reset, should be able to re-evaluate + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); + + // Now should be able to evaluate again + selectivity.update_judge_selectivity(-1, 90, 100, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, accumulated_selectivity) { + RuntimeFilterSelectivity selectivity; + + // First update with high filter rate + selectivity.update_judge_selectivity(-1, 80, 100, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); + + // Second update, accumulated should still have high filter rate + selectivity.update_judge_selectivity(-1, 70, 100, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); + + // Accumulated: 150 filtered out of 200 total (75% filter rate) +} + +TEST_F(RuntimeFilterSelectivityTest, edge_case_zero_input_rows) { + bool always_true = false; + // This will cause division by zero, implementation should handle it + RuntimeFilterSelectivity::judge_selectivity(0.1, 0, 0, always_true); + // Depending on implementation, this might be true or false +} Review Comment: Missing assertion for the edge case of zero input rows. The comment says "Depending on implementation, this might be true or false" but there should be an explicit expectation. Without it, this test doesn't verify any behavior and could hide division-by-zero bugs. ########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; + void update_judge_counter() { + if ((_judge_counter++) >= config::runtime_filter_sampling_frequency) { + reset_judge_selectivity(); + } + } + + void update_judge_selectivity(int filter_id, uint64_t filter_rows, uint64_t input_rows, + double ignore_thredhold) { + if (!_always_true) { + _judge_filter_rows += filter_rows; + _judge_input_rows += input_rows; + judge_selectivity(ignore_thredhold, _judge_filter_rows, _judge_input_rows, + _always_true); + } + + VLOG_ROW << fmt::format( + "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " + "rate: {}, " + "ignore_thredhold: {}, counter: {} , always_true: {}", + filter_id, _judge_filter_rows, _judge_input_rows, + static_cast<double>(filter_rows) / static_cast<double>(input_rows), + ignore_thredhold, _judge_counter, _always_true); + } + + bool maybe_always_true_can_ignore() const { + /// TODO: maybe we can use seesion variable to control this behavior ? + if (config::runtime_filter_sampling_frequency <= 0) { + return false; + } else { + return _always_true; + } + } + + static void judge_selectivity(double ignore_threshold, int64_t filter_rows, int64_t input_rows, + bool& always_true) { + always_true = static_cast<double>(filter_rows) / static_cast<double>(input_rows) < Review Comment: Potential division by zero bug on lines 58 and 73. When `input_rows` is 0, the division `filter_rows / input_rows` will cause undefined behavior. The code should handle this edge case, for example by checking if `input_rows > 0` before performing the division. ########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; + void update_judge_counter() { + if ((_judge_counter++) >= config::runtime_filter_sampling_frequency) { + reset_judge_selectivity(); + } + } + + void update_judge_selectivity(int filter_id, uint64_t filter_rows, uint64_t input_rows, + double ignore_thredhold) { + if (!_always_true) { + _judge_filter_rows += filter_rows; + _judge_input_rows += input_rows; + judge_selectivity(ignore_thredhold, _judge_filter_rows, _judge_input_rows, + _always_true); + } + + VLOG_ROW << fmt::format( + "Runtime filter[{}] selectivity update: filter_rows: {}, input_rows: {}, filter " + "rate: {}, " + "ignore_thredhold: {}, counter: {} , always_true: {}", + filter_id, _judge_filter_rows, _judge_input_rows, + static_cast<double>(filter_rows) / static_cast<double>(input_rows), Review Comment: Potential division by zero bug in log statement. When `input_rows` is 0, the division `filter_rows / input_rows` will cause undefined behavior. This should be guarded with a check for `input_rows > 0`. ```suggestion (input_rows > 0 ? static_cast<double>(filter_rows) / static_cast<double>(input_rows) : 0.0), ``` ########## be/src/runtime_filter/runtime_filter_selectivity.h: ########## @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "common/config.h" +#include "common/logging.h" + +namespace doris { + +// Used to track the selectivity of runtime filters +// If the selectivity of a runtime filter is very low, it is considered ineffective and can be ignored +// Considering that the selectivity of runtime filters may change with data variations +// A dynamic selectivity tracking mechanism is needed +// Note: this is not a thread-safe class + +class RuntimeFilterSelectivity { +public: + RuntimeFilterSelectivity() = default; + + RuntimeFilterSelectivity(const RuntimeFilterSelectivity&&) = delete; Review Comment: The move constructor is marked as deleted but uses `const RuntimeFilterSelectivity&&`. The `const` qualifier is incorrect for a move constructor - it should be `RuntimeFilterSelectivity(RuntimeFilterSelectivity&&) = delete;` without the `const`. ```suggestion RuntimeFilterSelectivity(RuntimeFilterSelectivity&&) = delete; ``` ########## be/test/runtime_filter/runtime_filter_selectivity_test.cpp: ########## @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime_filter/runtime_filter_selectivity.h" + +#include <glog/logging.h> +#include <gtest/gtest.h> + +namespace doris { + +class RuntimeFilterSelectivityTest : public testing::Test {}; + +TEST_F(RuntimeFilterSelectivityTest, basic) { + RuntimeFilterSelectivity selectivity; + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, judge_selectivity_low_filter_rate) { + bool always_true = false; + RuntimeFilterSelectivity::judge_selectivity(0.1, 5, 100, always_true); + EXPECT_TRUE(always_true); +} + +TEST_F(RuntimeFilterSelectivityTest, judge_selectivity_high_filter_rate) { + bool always_true = false; + RuntimeFilterSelectivity::judge_selectivity(0.1, 50, 100, always_true); + EXPECT_FALSE(always_true); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_not_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 50, 100, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 5, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_selectivity_once_true_always_true) { + RuntimeFilterSelectivity selectivity; + selectivity.update_judge_selectivity(-1, 5, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); + + // After marked as always_true, subsequent updates should be ignored + selectivity.update_judge_selectivity(-1, 90, 100, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); +} + +TEST_F(RuntimeFilterSelectivityTest, update_judge_counter_reset) { Review Comment: [nitpick] The test name `update_judge_counter_reset` doesn't clearly describe what is being tested. Consider renaming to `counter_reset_clears_always_true_flag` or similar to better describe the expected behavior. ```suggestion TEST_F(RuntimeFilterSelectivityTest, counter_reset_clears_always_true_flag) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
