Copilot commented on code in PR #3130:
URL: https://github.com/apache/kvrocks/pull/3130#discussion_r2463866651


##########
src/types/redis_tdigest.cc:
##########
@@ -67,18 +67,18 @@ class DummyCentroids {
       if (Valid()) {
         std::advance(iter_, 1);
       }
-      return iter_ != centroids_.cend();
+      return Valid();
     }
 
     // The Prev function can only be called for item is not cend,
     // because we must guarantee the iterator to be inside the valid range 
before iteration.
     bool Prev() {
-      if (Valid() && iter_ != centroids_.cbegin()) {
+      if (Valid()) {

Review Comment:
   After calling `std::advance(iter_, -1)`, the iterator could become invalid 
(move before `cbegin()`). The validity check on line 79 will return true even 
when `iter_` is before `cbegin()`, allowing subsequent operations on an 
out-of-bounds iterator. The validity check should occur before the advance 
operation, and the advance should only happen if `iter_ != centroids_.cbegin()`.
   ```suggestion
         if (Valid() && iter_ != centroids_.cbegin()) {
   ```



##########
src/types/tdigest.h:
##########
@@ -150,3 +152,70 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) 
{
   diff /= (lc.weight / 2 + rc.weight / 2);
   return Lerp(lc.mean, rc.mean, diff);
 }
+
+inline void AssignRankForEqualInputs(const std::vector<size_t>& indices, 
double cumulative_weight,
+                                     std::vector<int>& result) {
+  for (auto index : indices) {
+    result[index] = static_cast<int>(cumulative_weight);
+  }
+}
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, 
std::vector<int>& result) {
+  std::map<double, std::vector<size_t>> value_to_indices;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    value_to_indices[inputs[i]].push_back(i);
+  }
+
+  double cumulative_weight = 0;
+  result.resize(inputs.size());
+  auto it = value_to_indices.rbegin();
+
+  // handle inputs larger than maximum
+  while (it != value_to_indices.rend() && it->first > td.Max()) {
+    AssignRankForEqualInputs(it->second, -1, result);
+    ++it;
+  }
+
+  auto iter = td.End();
+  while (iter->Valid() && it != value_to_indices.rend()) {
+    auto centroid = GET_OR_RET(iter->GetCentroid());
+    auto input_value = it->first;
+    if (centroid.mean == input_value) {
+      auto current_mean = centroid.mean;
+      auto current_mean_cumulative_weight = cumulative_weight + 
centroid.weight / 2;
+      cumulative_weight += centroid.weight;
+
+      // handle all the prev centroids which has the same mean
+      while (iter->Prev()) {
+        auto next_centroid = GET_OR_RET(iter->GetCentroid());
+        if (current_mean != next_centroid.mean) {
+          // move back to the last equal centroid, because we will process it 
in the next loop
+          iter->Next();
+          break;
+        }
+        current_mean_cumulative_weight += centroid.weight / 2;
+        cumulative_weight += centroid.weight;

Review Comment:
   This line incorrectly adds `centroid.weight / 2` instead of 
`next_centroid.weight / 2`. The variable `next_centroid` was retrieved on line 
191 but is not being used for the weight calculation, leading to incorrect 
cumulative weight when processing multiple centroids with the same mean.
   ```suggestion
           current_mean_cumulative_weight += next_centroid.weight / 2;
           cumulative_weight += next_centroid.weight;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to