Copilot commented on code in PR #3331:
URL: https://github.com/apache/brpc/pull/3331#discussion_r3360772115
##########
src/bthread/task_control.cpp:
##########
@@ -107,10 +112,12 @@ void* TaskControl::worker_thread(void* arg) {
g->_tid = pthread_self();
- int worker_id = c->_next_worker_id.fetch_add(
- 1, butil::memory_order_relaxed);
- if (!c->_cpus.empty()) {
- bind_thread_to_cpu(pthread_self(), c->_cpus[worker_id %
c->_cpus.size()]);
+ // worker_id is global and monotonically increasing; it doubles as the
+ // round-robin index for CPU affinity (same counter for name and binding).
+ int worker_id = c->_next_worker_id.fetch_add(1,
butil::memory_order_relaxed);
+ if (!c->_tag_cpus[tag].empty()) {
+ const auto& cpus = c->_tag_cpus[tag];
+ bind_thread_to_cpu(pthread_self(), cpus[worker_id % cpus.size()]);
Review Comment:
CPU affinity round-robin uses the global `_next_worker_id` counter, which
can completely break per-tag spreading. Example: with `FLAGS_task_group_ntags
== 2` and `cpus.size() == 2`, workers for tag 0 get worker_id 0,2,4,... so
`worker_id % 2` is always 0 and all tag-0 workers bind to the first CPU only.
This defeats the intended per-tag round-robin binding.
##########
src/bthread/task_control.cpp:
##########
@@ -328,40 +336,104 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t
tag) {
return NULL;
}
-int TaskControl::parse_cpuset(std::string value, std::vector<unsigned>& cpus) {
+// Parse a single cpu-range-list such as "0-3,5,7" into a sorted, deduplicated
+// vector of CPU IDs. Returns 0 on success, -1 on error.
+static int parse_one_cpuset(const std::string& value, std::vector<unsigned>&
cpus) {
static std::regex r("(\\d+-)?(\\d+)(,(\\d+-)?(\\d+))*");
std::smatch match;
std::set<unsigned> cpuset;
if (value.empty()) {
return -1;
}
- if (std::regex_match(value, match, r)) {
- for (butil::StringSplitter split(value.data(), ','); split; ++split) {
- butil::StringPiece cpu_ids(split.field(), split.length());
- cpu_ids.trim_spaces();
- butil::StringPiece begin = cpu_ids;
- butil::StringPiece end = cpu_ids;
- auto dash = cpu_ids.find('-');
- if (dash != cpu_ids.npos) {
- begin = cpu_ids.substr(0, dash);
- end = cpu_ids.substr(dash + 1);
+ if (!std::regex_match(value, match, r)) {
+ return -1;
+ }
+ for (butil::StringSplitter split(value.data(), ','); split; ++split) {
+ butil::StringPiece cpu_ids(split.field(), split.length());
+ cpu_ids.trim_spaces();
+ butil::StringPiece begin = cpu_ids;
+ butil::StringPiece end = cpu_ids;
+ auto dash = cpu_ids.find('-');
+ if (dash != cpu_ids.npos) {
+ begin = cpu_ids.substr(0, dash);
+ end = cpu_ids.substr(dash + 1);
+ }
+ unsigned first = UINT_MAX;
+ unsigned last = 0;
+ int ret = butil::StringSplitter(begin, '\t').to_uint(&first);
+ ret = ret | butil::StringSplitter(end, '\t').to_uint(&last);
+ if (ret != 0 || first > last) {
+ return -1;
+ }
+ for (auto i = first; i <= last; ++i) {
+ cpuset.insert(i);
+ }
+ }
+ cpus.assign(cpuset.begin(), cpuset.end());
+ return 0;
+}
+
+int TaskControl::parse_cpuset(const std::string& value,
+ std::vector<std::vector<unsigned>>& tag_cpus,
+ int ntags) {
+ if (value.empty()) {
+ return -1;
+ }
+ // Detect per-tag format by the presence of ':' or ';'.
+ // Legacy format ("0-3,5,7") never contains these characters.
+ bool per_tag_format = (value.find(';') != std::string::npos ||
+ value.find(':') != std::string::npos);
+
+ if (per_tag_format) {
+ // Per-tag format: "0:0-3,5,7;1:6-9,4"
+ for (butil::StringSplitter seg_split(value.data(), ';'); seg_split;
++seg_split) {
+ std::string segment(seg_split.field(), seg_split.length());
+ // Trim leading/trailing spaces.
+ auto s = segment.find_first_not_of(' ');
+ auto e = segment.find_last_not_of(' ');
+ if (s == std::string::npos) { continue; } // blank segment
+ segment = segment.substr(s, e - s + 1);
+
+ auto colon = segment.find(':');
+ if (colon == std::string::npos) {
+ LOG(ERROR) << "cpu_set per-tag segment missing ':': " <<
segment;
+ return -1;
+ }
+ std::string tag_str = segment.substr(0, colon);
+ std::string cpus_str = segment.substr(colon + 1);
+
+ unsigned tag_id = 0;
+ butil::StringPiece tag_sp(tag_str);
+ if (butil::StringSplitter(tag_sp, '\t').to_uint(&tag_id) != 0) {
+ LOG(ERROR) << "cpu_set invalid tag '" << tag_str << "'";
+ return -1;
Review Comment:
Per-tag parsing does not trim whitespace around the tag id or the per-tag
CPU list. Inputs like `--cpu_set="0: 0-3,5,7"` (common when formatting) will
fail because `cpus_str` starts with a space and `parse_one_cpuset()` requires
an exact regex match. Similarly `tag_str` with spaces around ':' will fail to
parse.
##########
src/bthread/task_control.cpp:
##########
@@ -328,40 +336,104 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t
tag) {
return NULL;
}
-int TaskControl::parse_cpuset(std::string value, std::vector<unsigned>& cpus) {
+// Parse a single cpu-range-list such as "0-3,5,7" into a sorted, deduplicated
+// vector of CPU IDs. Returns 0 on success, -1 on error.
+static int parse_one_cpuset(const std::string& value, std::vector<unsigned>&
cpus) {
static std::regex r("(\\d+-)?(\\d+)(,(\\d+-)?(\\d+))*");
std::smatch match;
std::set<unsigned> cpuset;
if (value.empty()) {
return -1;
}
- if (std::regex_match(value, match, r)) {
- for (butil::StringSplitter split(value.data(), ','); split; ++split) {
- butil::StringPiece cpu_ids(split.field(), split.length());
- cpu_ids.trim_spaces();
- butil::StringPiece begin = cpu_ids;
- butil::StringPiece end = cpu_ids;
- auto dash = cpu_ids.find('-');
- if (dash != cpu_ids.npos) {
- begin = cpu_ids.substr(0, dash);
- end = cpu_ids.substr(dash + 1);
+ if (!std::regex_match(value, match, r)) {
+ return -1;
+ }
+ for (butil::StringSplitter split(value.data(), ','); split; ++split) {
+ butil::StringPiece cpu_ids(split.field(), split.length());
+ cpu_ids.trim_spaces();
+ butil::StringPiece begin = cpu_ids;
+ butil::StringPiece end = cpu_ids;
+ auto dash = cpu_ids.find('-');
+ if (dash != cpu_ids.npos) {
+ begin = cpu_ids.substr(0, dash);
+ end = cpu_ids.substr(dash + 1);
+ }
+ unsigned first = UINT_MAX;
+ unsigned last = 0;
+ int ret = butil::StringSplitter(begin, '\t').to_uint(&first);
+ ret = ret | butil::StringSplitter(end, '\t').to_uint(&last);
+ if (ret != 0 || first > last) {
+ return -1;
+ }
+ for (auto i = first; i <= last; ++i) {
+ cpuset.insert(i);
+ }
+ }
+ cpus.assign(cpuset.begin(), cpuset.end());
+ return 0;
+}
+
+int TaskControl::parse_cpuset(const std::string& value,
+ std::vector<std::vector<unsigned>>& tag_cpus,
+ int ntags) {
+ if (value.empty()) {
+ return -1;
+ }
+ // Detect per-tag format by the presence of ':' or ';'.
+ // Legacy format ("0-3,5,7") never contains these characters.
+ bool per_tag_format = (value.find(';') != std::string::npos ||
+ value.find(':') != std::string::npos);
+
Review Comment:
`parse_cpuset()` indexes `tag_cpus[tag_id]` / `tag_cpus[i]` without ensuring
`tag_cpus.size() >= ntags`, even though the header comment says the function
resizes the output. This can lead to out-of-bounds writes if a caller passes an
empty vector (which the API contract implies is OK), and it also means tags not
mentioned in the per-tag format may retain old values if `tag_cpus` is reused.
##########
src/bthread/task_control.cpp:
##########
@@ -328,40 +336,104 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t
tag) {
return NULL;
}
-int TaskControl::parse_cpuset(std::string value, std::vector<unsigned>& cpus) {
+// Parse a single cpu-range-list such as "0-3,5,7" into a sorted, deduplicated
+// vector of CPU IDs. Returns 0 on success, -1 on error.
+static int parse_one_cpuset(const std::string& value, std::vector<unsigned>&
cpus) {
static std::regex r("(\\d+-)?(\\d+)(,(\\d+-)?(\\d+))*");
std::smatch match;
std::set<unsigned> cpuset;
if (value.empty()) {
return -1;
}
- if (std::regex_match(value, match, r)) {
- for (butil::StringSplitter split(value.data(), ','); split; ++split) {
- butil::StringPiece cpu_ids(split.field(), split.length());
- cpu_ids.trim_spaces();
- butil::StringPiece begin = cpu_ids;
- butil::StringPiece end = cpu_ids;
- auto dash = cpu_ids.find('-');
- if (dash != cpu_ids.npos) {
- begin = cpu_ids.substr(0, dash);
- end = cpu_ids.substr(dash + 1);
+ if (!std::regex_match(value, match, r)) {
+ return -1;
+ }
+ for (butil::StringSplitter split(value.data(), ','); split; ++split) {
+ butil::StringPiece cpu_ids(split.field(), split.length());
+ cpu_ids.trim_spaces();
+ butil::StringPiece begin = cpu_ids;
+ butil::StringPiece end = cpu_ids;
+ auto dash = cpu_ids.find('-');
+ if (dash != cpu_ids.npos) {
+ begin = cpu_ids.substr(0, dash);
+ end = cpu_ids.substr(dash + 1);
+ }
+ unsigned first = UINT_MAX;
+ unsigned last = 0;
+ int ret = butil::StringSplitter(begin, '\t').to_uint(&first);
+ ret = ret | butil::StringSplitter(end, '\t').to_uint(&last);
+ if (ret != 0 || first > last) {
+ return -1;
+ }
+ for (auto i = first; i <= last; ++i) {
+ cpuset.insert(i);
+ }
+ }
+ cpus.assign(cpuset.begin(), cpuset.end());
+ return 0;
+}
+
+int TaskControl::parse_cpuset(const std::string& value,
+ std::vector<std::vector<unsigned>>& tag_cpus,
+ int ntags) {
+ if (value.empty()) {
+ return -1;
+ }
+ // Detect per-tag format by the presence of ':' or ';'.
+ // Legacy format ("0-3,5,7") never contains these characters.
+ bool per_tag_format = (value.find(';') != std::string::npos ||
+ value.find(':') != std::string::npos);
+
Review Comment:
`parse_cpuset()` now has substantially more parsing behavior (legacy vs
per-tag dispatch, tag range validation, etc.) but there are no unit tests
covering the new formats or error cases. Given the large existing bthread test
suite, adding tests for parsing (including invalid inputs and out-of-range
tags) would help prevent regressions.
##########
src/bthread/task_control.h:
##########
@@ -91,7 +91,19 @@ friend bthread_t init_for_pthread_stack_trace();
// If this method is called after init(), it never returns NULL.
TaskGroup* choose_one_group(bthread_tag_t tag);
- static int parse_cpuset(std::string value, std::vector<unsigned>& cpus);
+ // Parse a cpu-set string into a per-tag list of CPU IDs.
+ //
+ // Two formats are accepted:
+ // Legacy (all tags share one set): "0-3,5,7"
+ // Per-tag: "0:0-3,5,7;1:6-9,4"
+ //
+ // On success populates |tag_cpus| (indexed by tag) and returns 0.
+ // |tag_cpus| is resized to max_tag+1; tags that are not mentioned
+ // get an empty cpu list (= no binding).
+ // Returns -1 on parse error.
+ static int parse_cpuset(const std::string& value,
+ std::vector<std::vector<unsigned>>& tag_cpus,
+ int ntags);
Review Comment:
Header comment for `parse_cpuset()` says `|tag_cpus| is resized to
max_tag+1`, but the API takes `ntags` and the implementation (and typical
callers) use `FLAGS_task_group_ntags`. This is misleading for callers and
should describe resizing to `ntags` (and that `ntags` must be > 0).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]