moonchen commented on code in PR #13219:
URL: https://github.com/apache/trafficserver/pull/13219#discussion_r3336810074
##########
src/iocore/net/P_UnixNet.h:
##########
@@ -105,6 +105,14 @@ check_shedding_warning()
TS_INLINE bool
check_net_throttle(ThrottleType t)
{
+ // Throttle new inbound connections when resident memory exceeds
+ // proxy.config.memory.max_usage (set by the MemoryLimit continuation). Only
+ // ACCEPT is throttled; outbound CONNECTs may be needed to let in-flight
+ // transactions complete and release memory.
+ if (t == ACCEPT && net_memory_throttle) {
+ return true;
+ }
Review Comment:
Good catch. Made `net_memory_throttle` a `std::atomic<bool>` (decl in
P_UnixNet.h, def in UnixNet.cc) and this read now uses
`load(std::memory_order_relaxed)`. The race was dormant only because the flag
had no reader; reviving the accept-path read reintroduced it.
##########
src/traffic_server/traffic_server.cc:
##########
@@ -489,41 +488,43 @@ class MemoryLimit : public Continuation
return EVENT_DONE;
}
- // "reload" the setting, we don't do this often so not expensive
+ // "reload" the setting, we don't do this often so not expensive.
+ // proxy.config.memory.max_usage is in bytes; 0 (the default) disables the
+ // feature. We only schedule this continuation when it is enabled, but
+ // re-check here defensively and stop monitoring if it is ever cleared.
_memory_limit =
RecGetRecordInt("proxy.config.memory.max_usage").value_or(0);
- _memory_limit = _memory_limit >> 10; // divide by 1024
-
- if (getrusage(RUSAGE_SELF, &_usage) == 0) {
- ts::Metrics::Gauge::store(memory_rss, _usage.ru_maxrss << 10); // * 1024
- Dbg(dbg_ctl_server, "memory usage - ru_maxrss: %ld memory limit: %"
PRId64, _usage.ru_maxrss, _memory_limit);
- if (_memory_limit > 0) {
- if (_usage.ru_maxrss > _memory_limit) {
- if (net_memory_throttle == false) {
- net_memory_throttle = true;
- Dbg(dbg_ctl_server, "memory usage exceeded limit - ru_maxrss: %ld
memory limit: %" PRId64, _usage.ru_maxrss,
- _memory_limit);
- }
- } else {
- if (net_memory_throttle == true) {
- net_memory_throttle = false;
- Dbg(dbg_ctl_server, "memory usage under limit - ru_maxrss: %ld
memory limit: %" PRId64, _usage.ru_maxrss,
- _memory_limit);
- }
- }
- } else {
- // this feature has not been enabled
- Dbg(dbg_ctl_server, "limiting connections based on memory usage has
been disabled");
- e->cancel();
- delete this;
- return EVENT_DONE;
+ if (_memory_limit <= 0) {
+ Dbg(dbg_ctl_server, "limiting connections based on memory usage has been
disabled");
+ e->cancel();
+ delete this;
+ return EVENT_DONE;
+ }
+
+ // Sample and publish the *current* RSS only while the feature is enabled,
+ // so we incur the cost of reading RSS only when it is needed.
+ // ink_get_current_rss() reports current (not peak) RSS in bytes, portably,
+ // so the gauge can both rise and fall and the throttle releases correctly.
+ uint64_t rss = ink_get_current_rss();
+ ts::Metrics::Gauge::store(memory_rss, static_cast<int64_t>(rss));
+ Dbg(dbg_ctl_server, "memory usage - current rss: %" PRIu64 " bytes memory
limit: %" PRId64 " bytes", rss, _memory_limit);
+
+ if (rss > static_cast<uint64_t>(_memory_limit)) {
+ if (net_memory_throttle == false) {
+ net_memory_throttle = true;
+ Dbg(dbg_ctl_server, "memory usage exceeded limit - current rss: %"
PRIu64 " memory limit: %" PRId64, rss, _memory_limit);
+ }
Review Comment:
Done — the writes here now use `net_memory_throttle.store(...,
std::memory_order_relaxed)` (and `load` for the compare), matching the atomic
decl. See the paired reply on P_UnixNet.h for the read side.
##########
src/tscore/unit_tests/test_ink_sys_control.cc:
##########
@@ -0,0 +1,63 @@
+/** @file
+
+ test ink_sys_control.h - system resource helpers
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <tscore/ink_sys_control.h>
+#include <catch2/catch_test_macros.hpp>
+
+#include <cstdlib>
+#include <memory>
+#include <vector>
+
+TEST_CASE("ink_get_current_rss reports a plausible current RSS",
"[ink_sys_control]")
+{
+#if defined(__linux__) || defined(darwin)
+ uint64_t rss = ink_get_current_rss();
+
+ // The test process is obviously resident, so current RSS must be non-zero
+ // and well above a trivially small floor (at least one page).
+ REQUIRE(rss > 0);
+ REQUIRE(rss >= 4096);
+
+ SECTION("RSS grows when we touch a large allocation")
+ {
+ uint64_t before = ink_get_current_rss();
+
+ // Allocate and touch ~64 MiB so the pages become resident. Touching every
+ // page (not just allocating) is what forces them into the RSS.
+ constexpr size_t bytes = 64 * 1024 * 1024;
+ std::unique_ptr<char[]> buf(new char[bytes]);
+ for (size_t i = 0; i < bytes; i += 4096) {
+ buf[i] = static_cast<char>(i);
+ }
+ // Defeat any dead-store elimination.
+ volatile char sink = buf[bytes - 1];
+ (void)sink;
+
+ uint64_t after = ink_get_current_rss();
+ REQUIRE(after > before);
+ }
Review Comment:
Switched to an explicit `mmap(MAP_PRIVATE | MAP_ANONYMOUS)` of fresh pages,
which are guaranteed not previously resident, so touching them
deterministically raises RSS. Removes the allocator-dependent flakiness.
##########
src/tscore/unit_tests/test_ink_sys_control.cc:
##########
@@ -0,0 +1,63 @@
+/** @file
+
+ test ink_sys_control.h - system resource helpers
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include <tscore/ink_sys_control.h>
+#include <catch2/catch_test_macros.hpp>
+
+#include <cstdlib>
+#include <memory>
+#include <vector>
Review Comment:
Cleaned up: dropped the unused `<vector>`/`<memory>`, and now include
exactly what's used (`<cstdint>`, `<sys/mman.h>`, `<unistd.h>`) guarded by the
same platform check as the test body.
##########
src/tscore/ink_sys_control.cc:
##########
@@ -98,3 +104,43 @@ ink_get_max_files()
return RLIM_INFINITY;
}
+
+uint64_t
+ink_get_current_rss()
+{
+#if defined(__linux__)
+ // /proc/self/statm reports sizes in pages. The second field is the resident
+ // set size (number of resident pages).
+ FILE *fp = fopen("/proc/self/statm", "r");
+ if (fp == nullptr) {
+ return 0;
+ }
+
+ unsigned long total_pages = 0;
+ unsigned long resident_pages = 0;
+ int matched = fscanf(fp, "%lu %lu", &total_pages,
&resident_pages);
+ fclose(fp);
+
+ if (matched != 2) {
+ return 0;
+ }
+
+ long page_size = sysconf(_SC_PAGESIZE);
+ if (page_size <= 0) {
+ return 0;
+ }
+
+ return static_cast<uint64_t>(resident_pages) *
static_cast<uint64_t>(page_size);
+#elif defined(darwin)
+ // On macOS, task_info() reports resident_size in bytes.
+ mach_task_basic_info_data_t info;
+ mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
+ if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
reinterpret_cast<task_info_t>(&info), &count) != KERN_SUCCESS) {
+ return 0;
+ }
+
+ return static_cast<uint64_t>(info.resident_size);
+#else
+ return 0;
+#endif
Review Comment:
Added a FreeBSD branch using `sysctl(KERN_PROC_PID)` and
`kinfo_proc.ki_rssize` (pages * page size), so FreeBSD no longer regresses to
0. Other non-Linux/macOS/FreeBSD platforms still return 0; the metric/throttle
simply stay inactive there.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]