Copilot commented on code in PR #3130:
URL: https://github.com/apache/kvrocks/pull/3130#discussion_r2463866651
##########
src/types/redis_tdigest.cc:
##########
@@ -67,18 +67,18 @@ class DummyCentroids {
if (Valid()) {
std::advance(iter_, 1);
}
- return iter_ != centroids_.cend();
+ return Valid();
}
// The Prev function can only be called for item is not cend,
// because we must guarantee the iterator to be inside the valid range
before iteration.
bool Prev() {
- if (Valid() && iter_ != centroids_.cbegin()) {
+ if (Valid()) {
Review Comment:
After calling `std::advance(iter_, -1)`, the iterator could become invalid
(move before `cbegin()`). The validity check on line 79 will return true even
when `iter_` is before `cbegin()`, allowing subsequent operations on an
out-of-bounds iterator. The validity check should occur before the advance
operation, and the advance should only happen if `iter_ != centroids_.cbegin()`.
```suggestion
if (Valid() && iter_ != centroids_.cbegin()) {
```
##########
src/types/tdigest.h:
##########
@@ -150,3 +152,70 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q)
{
diff /= (lc.weight / 2 + rc.weight / 2);
return Lerp(lc.mean, rc.mean, diff);
}
+
+inline void AssignRankForEqualInputs(const std::vector<size_t>& indices,
double cumulative_weight,
+ std::vector<int>& result) {
+ for (auto index : indices) {
+ result[index] = static_cast<int>(cumulative_weight);
+ }
+}
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs,
std::vector<int>& result) {
+ std::map<double, std::vector<size_t>> value_to_indices;
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ value_to_indices[inputs[i]].push_back(i);
+ }
+
+ double cumulative_weight = 0;
+ result.resize(inputs.size());
+ auto it = value_to_indices.rbegin();
+
+ // handle inputs larger than maximum
+ while (it != value_to_indices.rend() && it->first > td.Max()) {
+ AssignRankForEqualInputs(it->second, -1, result);
+ ++it;
+ }
+
+ auto iter = td.End();
+ while (iter->Valid() && it != value_to_indices.rend()) {
+ auto centroid = GET_OR_RET(iter->GetCentroid());
+ auto input_value = it->first;
+ if (centroid.mean == input_value) {
+ auto current_mean = centroid.mean;
+ auto current_mean_cumulative_weight = cumulative_weight +
centroid.weight / 2;
+ cumulative_weight += centroid.weight;
+
+ // handle all the prev centroids which has the same mean
+ while (iter->Prev()) {
+ auto next_centroid = GET_OR_RET(iter->GetCentroid());
+ if (current_mean != next_centroid.mean) {
+ // move back to the last equal centroid, because we will process it
in the next loop
+ iter->Next();
+ break;
+ }
+ current_mean_cumulative_weight += centroid.weight / 2;
+ cumulative_weight += centroid.weight;
Review Comment:
This line incorrectly adds `centroid.weight / 2` instead of
`next_centroid.weight / 2`. The variable `next_centroid` was retrieved on line
191 but is not being used for the weight calculation, leading to incorrect
cumulative weight when processing multiple centroids with the same mean.
```suggestion
current_mean_cumulative_weight += next_centroid.weight / 2;
cumulative_weight += next_centroid.weight;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]