Copilot commented on code in PR #3229:
URL: https://github.com/apache/brpc/pull/3229#discussion_r2853185055


##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <new>                             // std::nothrow
+#include <gflags/gflags.h>
+#include "butil/logging.h"
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1] enables rate limiting. Values <= 0 disable it "
+    "(-1 is default). Can be overridden per-channel via "
+    "ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be in [1, 3600].");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1 && v <= 3600) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be in [1, 3600]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        return _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+    }
+
+    void OnRPCEnd(const Controller* controller) {
+        _total_count << 1;
+        if (controller->has_backup_request()) {
+            _backup_count << 1;
+        }
+    }
+
+private:
+    double  _max_backup_ratio;
+    int64_t _update_interval_us;
+
+    bvar::Adder<int64_t>   _total_count;
+    bvar::Adder<int64_t>   _backup_count;
+    bvar::Window<bvar::Adder<int64_t>> _total_window;
+    bvar::Window<bvar::Adder<int64_t>> _backup_window;

Review Comment:
   Each BackupRateLimiter instance constructs two bvar::Adder reducers wrapped 
by bvar::Window, which schedules two sampler tasks that run periodically in the 
global sampler thread. With many channels/policies this adds O(#policies) 
per-second sampling overhead. If high channel counts are expected, consider 
documenting this cost and/or using a lighter-weight sliding-window counter 
implementation without per-instance samplers.



##########
test/brpc_channel_unittest.cpp:
##########
@@ -2803,6 +2807,145 @@ TEST_F(ChannelTest, backup_request_policy) {
     }
 }
 
+TEST_F(ChannelTest, backup_request_rate_limited) {
+    ASSERT_EQ(0, StartAccept(_ep));
+
+    // Test 1: Policy is created when backup_request_max_ratio > 0
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0.3;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+    }
+
+    // Test 2: No policy when per-channel ratio is -1 and global gflag is also 
disabled
+    {
+        const double saved = brpc::FLAGS_backup_request_max_ratio;
+        brpc::FLAGS_backup_request_max_ratio = -1;
+        BRPC_SCOPE_EXIT { brpc::FLAGS_backup_request_max_ratio = saved; };
+
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = -1;  // fallback to gflag (-1) = 
disabled
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }
+
+    // Test 3: max_ratio=0 explicitly disables even when global gflag is set
+    {
+        const double saved = brpc::FLAGS_backup_request_max_ratio;
+        brpc::FLAGS_backup_request_max_ratio = 0.5;
+        BRPC_SCOPE_EXIT { brpc::FLAGS_backup_request_max_ratio = saved; };
+
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0;  // explicitly disabled
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }
+
+    // Test 4: max_ratio=1.0 creates policy but allows all backups
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 1.0;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+    }
+
+    // Test 5: max_ratio > 1.0 is clamped to 1.0, policy still created
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 2.0;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+    }
+
+    // Test 6: Custom policy is preserved, not replaced
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;
+        opt.backup_request_max_ratio = 0.3;
+        opt.backup_request_policy = &_backup_request_policy;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_EQ(&_backup_request_policy,
+                  channel.options().backup_request_policy);
+    }
+
+    // Test 7: No policy when backup_request_ms < 0 (backup disabled)
+    {
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = -1;
+        opt.backup_request_max_ratio = 0.3;
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy == NULL);
+    }
+
+    // Test 8: Functional — rate limiting reduces backup requests.
+    // Use RAII guard for gflag restore so cleanup runs even on ASSERT failure.
+    {
+        const int32_t saved_interval =
+            brpc::FLAGS_backup_request_ratio_update_interval_s;
+        const int32_t saved_window =
+            brpc::FLAGS_backup_request_ratio_window_size_s;
+        const double saved_ratio = brpc::FLAGS_backup_request_max_ratio;
+        brpc::FLAGS_backup_request_ratio_update_interval_s = 1;
+        brpc::FLAGS_backup_request_ratio_window_size_s = 2;
+        BRPC_SCOPE_EXIT {
+            brpc::FLAGS_backup_request_ratio_update_interval_s = 
saved_interval;
+            brpc::FLAGS_backup_request_ratio_window_size_s = saved_window;
+            brpc::FLAGS_backup_request_max_ratio = saved_ratio;
+        };
+
+        brpc::ChannelOptions opt;
+        opt.backup_request_ms = 10;  // 10ms backup timeout
+        opt.backup_request_max_ratio = 0.3;
+        opt.timeout_ms = 500;
+        opt.max_retry = 1;
+
+        brpc::Channel channel;
+        ASSERT_EQ(0, channel.Init(_ep, &opt));
+        ASSERT_TRUE(channel.options().backup_request_policy != NULL);
+
+        int backup_count = 0;
+        const int N = 40;
+        for (int i = 0; i < N; ++i) {
+            test::EchoRequest req;
+            test::EchoResponse res;
+            brpc::Controller cntl;
+            req.set_message(__FUNCTION__);
+            req.set_sleep_us(50000);  // 50ms > 10ms, triggers backup timer

Review Comment:
   This test’s assertions depend on timing (bvar::Window sampling is driven by 
a background sampler and returns 0 until enough samples exist), and the loop 
adds ~2s wall time (40 * 50ms). To reduce flakiness and runtime, consider 
warming up until the window has samples (sleep/poll) before the loop and/or 
reducing N/sleep while keeping the bound meaningful.



##########
src/brpc/backup_request_policy.cpp:
##########
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/backup_request_policy.h"
+
+#include <new>                             // std::nothrow
+#include <gflags/gflags.h>
+#include "butil/logging.h"
+#include "brpc/reloadable_flags.h"
+#include "bvar/reducer.h"
+#include "bvar/window.h"
+#include "butil/atomicops.h"
+#include "butil/time.h"
+
+namespace brpc {
+
+DEFINE_double(backup_request_max_ratio, -1,
+    "Maximum ratio of backup requests to total requests. "
+    "Value in (0, 1] enables rate limiting. Values <= 0 disable it "
+    "(-1 is default). Can be overridden per-channel via "
+    "ChannelOptions.backup_request_max_ratio. "
+    "Note: takes effect at Channel::Init() time; changing this flag "
+    "at runtime does not affect already-created channels.");
+
+static bool validate_backup_request_max_ratio(const char*, double v) {
+    if (v <= 0) return true;  // non-positive means disabled
+    if (v <= 1.0) return true;
+    LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
+               << ", must be <= 0 (disabled) or in (0, 1]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
+                     validate_backup_request_max_ratio);
+
+DEFINE_int32(backup_request_ratio_window_size_s, 10,
+    "Window size in seconds for computing the backup request ratio. "
+    "Must be in [1, 3600].");
+
+static bool validate_backup_request_ratio_window_size_s(
+        const char*, int32_t v) {
+    if (v >= 1 && v <= 3600) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
+               << ", must be in [1, 3600]";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
+                     validate_backup_request_ratio_window_size_s);
+
+DEFINE_int32(backup_request_ratio_update_interval_s, 5,
+    "Interval in seconds between ratio cache updates. Must be >= 1.");
+
+static bool validate_backup_request_ratio_update_interval_s(
+        const char*, int32_t v) {
+    if (v >= 1) return true;
+    LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
+               << ", must be >= 1";
+    return false;
+}
+BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
+                     validate_backup_request_ratio_update_interval_s);
+
+// Standalone statistics module for tracking backup/total request ratio
+// within a sliding time window.
+class BackupRateLimiter {
+public:
+    BackupRateLimiter(double max_backup_ratio,
+                      int window_size_seconds,
+                      int update_interval_seconds)
+        : _max_backup_ratio(max_backup_ratio)
+        , _update_interval_us(update_interval_seconds * 1000000LL)
+        , _total_window(&_total_count, window_size_seconds)
+        , _backup_window(&_backup_count, window_size_seconds)
+        , _cached_ratio(0.0)
+        , _last_update_us(0) {
+    }
+
+    // All atomic operations use relaxed ordering intentionally.
+    // This is best-effort rate limiting: a slightly stale ratio is
+    // acceptable for approximate throttling.
+    bool ShouldAllow() const {
+        const int64_t now_us = butil::cpuwide_time_us();
+        int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
+        double ratio = _cached_ratio.load(butil::memory_order_relaxed);
+
+        if (now_us - last_us >= _update_interval_us) {
+            if (_last_update_us.compare_exchange_strong(
+                    last_us, now_us, butil::memory_order_relaxed)) {
+                int64_t total = _total_window.get_value();
+                int64_t backup = _backup_window.get_value();
+                ratio = (total > 0) ? static_cast<double>(backup) / total : 
0.0;
+                _cached_ratio.store(ratio, butil::memory_order_relaxed);
+            }
+        }
+
+        // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
+        return _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
+    }
+
+    void OnRPCEnd(const Controller* controller) {
+        _total_count << 1;
+        if (controller->has_backup_request()) {
+            _backup_count << 1;
+        }

Review Comment:
   Backup/total accounting is updated only in OnRPCEnd(), meaning the limiter 
won’t reflect a surge of backup decisions until RPCs finish. Under downstream 
latency spikes (the storm scenario), many backups can be issued before any 
OnRPCEnd() runs, so suppression may kick in too late. Consider counting backups 
at the time DoBackup() returns true (and adjusting OnRPCEnd to only record 
total), and/or revisiting how the denominator is tracked so the ratio responds 
during long in-flight periods.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to