Copilot commented on code in PR #3130:
URL: https://github.com/apache/kvrocks/pull/3130#discussion_r2485447892


##########
src/types/tdigest.h:
##########
@@ -150,3 +152,92 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) 
{
   diff /= (lc.weight / 2 + rc.weight / 2);
   return Lerp(lc.mean, rc.mean, diff);
 }
+
+inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  double diff = a - b;
+  double adiff = std::abs(diff);
+  if (adiff <= abs_eps) return 0;
+  double maxab = std::max(std::abs(a), std::abs(b));
+  if (adiff <= maxab * rel_eps) return 0;
+  return (diff < 0) ? -1 : 1;
+}
+
+inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  return DoubleCompare(a, b, rel_eps, abs_eps) == 0;
+}
+
+struct DoubleComparator {
+  bool operator()(const double& a, const double& b) const { return 
DoubleCompare(a, b) == -1; }
+};
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, 
std::vector<int>& result) {
+  std::map<double, size_t, DoubleComparator> value_to_indices;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    value_to_indices[inputs[i]] = i;
+  }
+
+  result.clear();
+  result.resize(inputs.size(), -2);
+  auto it = value_to_indices.rbegin();
+
+  // handle inputs larger than maximum
+  while (it != value_to_indices.rend() && it->first > td.Max()) {
+    result[it->second] = -1;
+    ++it;
+  }
+
+  auto iter = td.End();
+  double cumulative_weight = 0;
+  while (iter->Valid() && it != value_to_indices.rend()) {
+    auto centroid = GET_OR_RET(iter->GetCentroid());
+    auto input_value = it->first;
+    if (DoubleEqual(centroid.mean, input_value)) {
+      auto current_mean = centroid.mean;
+      auto current_mean_cumulative_weight = cumulative_weight + 
centroid.weight / 2;
+      cumulative_weight += centroid.weight;
+
+      // handle all the prev centroids which has the same mean

Review Comment:
   Corrected spelling of 'prev' to 'previous' in comment for clarity.
   ```suggestion
         // handle all the previous centroids which has the same mean
   ```



##########
src/types/tdigest.h:
##########
@@ -150,3 +152,92 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) 
{
   diff /= (lc.weight / 2 + rc.weight / 2);
   return Lerp(lc.mean, rc.mean, diff);
 }
+
+inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  double diff = a - b;
+  double adiff = std::abs(diff);
+  if (adiff <= abs_eps) return 0;
+  double maxab = std::max(std::abs(a), std::abs(b));
+  if (adiff <= maxab * rel_eps) return 0;
+  return (diff < 0) ? -1 : 1;
+}
+
+inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  return DoubleCompare(a, b, rel_eps, abs_eps) == 0;
+}
+
+struct DoubleComparator {
+  bool operator()(const double& a, const double& b) const { return 
DoubleCompare(a, b) == -1; }
+};
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, 
std::vector<int>& result) {
+  std::map<double, size_t, DoubleComparator> value_to_indices;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    value_to_indices[inputs[i]] = i;
+  }

Review Comment:
   When duplicate input values exist, this map will only store the index of the 
last occurrence, causing incorrect results to be returned. The 
`value_to_indices` map should use a multimap or map to vector to store all 
indices for duplicate values, not just the last one.



##########
src/types/tdigest.h:
##########
@@ -150,3 +152,92 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) 
{
   diff /= (lc.weight / 2 + rc.weight / 2);
   return Lerp(lc.mean, rc.mean, diff);
 }
+
+inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  double diff = a - b;
+  double adiff = std::abs(diff);
+  if (adiff <= abs_eps) return 0;
+  double maxab = std::max(std::abs(a), std::abs(b));
+  if (adiff <= maxab * rel_eps) return 0;
+  return (diff < 0) ? -1 : 1;
+}
+
+inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  return DoubleCompare(a, b, rel_eps, abs_eps) == 0;
+}
+
+struct DoubleComparator {
+  bool operator()(const double& a, const double& b) const { return 
DoubleCompare(a, b) == -1; }
+};
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, 
std::vector<int>& result) {
+  std::map<double, size_t, DoubleComparator> value_to_indices;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    value_to_indices[inputs[i]] = i;
+  }
+
+  result.clear();
+  result.resize(inputs.size(), -2);
+  auto it = value_to_indices.rbegin();
+
+  // handle inputs larger than maximum
+  while (it != value_to_indices.rend() && it->first > td.Max()) {
+    result[it->second] = -1;
+    ++it;
+  }
+
+  auto iter = td.End();
+  double cumulative_weight = 0;
+  while (iter->Valid() && it != value_to_indices.rend()) {
+    auto centroid = GET_OR_RET(iter->GetCentroid());
+    auto input_value = it->first;
+    if (DoubleEqual(centroid.mean, input_value)) {
+      auto current_mean = centroid.mean;
+      auto current_mean_cumulative_weight = cumulative_weight + 
centroid.weight / 2;
+      cumulative_weight += centroid.weight;
+
+      // handle all the prev centroids which has the same mean
+      while (!iter->IsBegin() && iter->Prev()) {
+        auto next_centroid = GET_OR_RET(iter->GetCentroid());
+        if (!DoubleEqual(current_mean, next_centroid.mean)) {
+          // move back to the last equal centroid, because we will process it 
in the next loop
+          iter->Next();
+          break;
+        }
+        current_mean_cumulative_weight += next_centroid.weight / 2;
+        cumulative_weight += next_centroid.weight;
+      }
+
+      // handle the prev inputs which has the same value

Review Comment:
   Grammatical error: 'has' should be 'have' to agree with the plural subject 
'inputs'.
   ```suggestion
         // handle the prev inputs which have the same value
   ```



##########
src/commands/cmd_tdigest.cc:
##########
@@ -176,6 +176,53 @@ class CommandTDigestAdd : public Commander {
   std::vector<double> values_;
 };
 
+class CommandTDigestRevRank : public Commander {
+ public:
+  Status Parse(const std::vector<std::string> &args) override {
+    key_name_ = args[1];
+
+    std::unordered_set<std::string> unique_inputs_set(args.begin() + 2, 
args.end());
+    origin_inputs_.assign(args.begin() + 2, args.end());
+
+    unique_inputs_.reserve(unique_inputs_set.size());
+    size_t i = 0;
+    for (const auto &input : unique_inputs_set) {
+      auto value = ParseFloat(input);
+      if (!value) {
+        return {Status::RedisParseErr, errValueIsNotFloat};
+      }
+      unique_inputs_.push_back(*value);
+      unique_inputs_order_[input] = i++;
+    }

Review Comment:
   The iteration order of `std::unordered_set` is not deterministic, which 
means the mapping in `unique_inputs_order_` may not correspond correctly to the 
indices in `unique_inputs_`. This can lead to incorrect results when looking up 
values. Consider using the string representation as the key and ensuring 
consistent ordering, or iterate over `unique_inputs_` after population to build 
the mapping.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to