Re: [PR] feat(tdigest): add the support of TDIGEST.REVRANK command [kvrocks]

via GitHub Wed, 29 Oct 2025 17:09:38 -0700


Copilot commented on code in PR #3130:
URL: https://github.com/apache/kvrocks/pull/3130#discussion_r2476039814



##########
tests/gocase/unit/type/tdigest/tdigest_test.go:
##########
@@ -518,4 +518,73 @@ func tdigestTests(t *testing.T, configs 
util.KvrocksServerConfigs) {
                validation(newDestKey1)
                validation(newDestKey2)
        })
+
+       t.Run("tdigest.revrank with different arguments", func(t *testing.T) {
+               keyPrefix := "tdigest_revrank_"
+
+               // Test invalid arguments
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK").Err(), 
errMsgWrongNumberArg)
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK", 
keyPrefix+"nonexistent").Err(), errMsgWrongNumberArg)
+
+               // Test Non-existent key
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK", 
keyPrefix+"nonexistent", "10").Err(), errMsgKeyNotExist)
+
+               // Test with empty tdigest
+               key := keyPrefix + "test1"
+               require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", key, 
"compression", "100").Err())
+               rsp := rdb.Do(ctx, "TDIGEST.REVRANK", key, "10")
+               require.NoError(t, rsp.Err())
+               vals, err := rsp.Slice()
+               require.NoError(t, err)
+               require.Len(t, vals, 1)
+               expected := []int64{-2}
+               for i, v := range vals {
+                       rank, ok := v.(int64)
+                       require.True(t, ok, "expected int64 but got %T at index 
%d", v, i)
+                       require.EqualValues(t, rank, expected[i])
+               }
+
+               // Test with set_contains several identical elements

Review Comment:
   Corrected phrasing from 'set_contains' to 'set containing' for clarity.
   ```suggestion
                // Test with set containing several identical elements
   ```



##########
tests/gocase/unit/type/tdigest/tdigest_test.go:
##########
@@ -518,4 +518,73 @@ func tdigestTests(t *testing.T, configs 
util.KvrocksServerConfigs) {
                validation(newDestKey1)
                validation(newDestKey2)
        })
+
+       t.Run("tdigest.revrank with different arguments", func(t *testing.T) {
+               keyPrefix := "tdigest_revrank_"
+
+               // Test invalid arguments
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK").Err(), 
errMsgWrongNumberArg)
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK", 
keyPrefix+"nonexistent").Err(), errMsgWrongNumberArg)
+
+               // Test Non-existent key
+               require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK", 
keyPrefix+"nonexistent", "10").Err(), errMsgKeyNotExist)
+
+               // Test with empty tdigest
+               key := keyPrefix + "test1"
+               require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", key, 
"compression", "100").Err())
+               rsp := rdb.Do(ctx, "TDIGEST.REVRANK", key, "10")
+               require.NoError(t, rsp.Err())
+               vals, err := rsp.Slice()
+               require.NoError(t, err)
+               require.Len(t, vals, 1)
+               expected := []int64{-2}
+               for i, v := range vals {
+                       rank, ok := v.(int64)
+                       require.True(t, ok, "expected int64 but got %T at index 
%d", v, i)
+                       require.EqualValues(t, rank, expected[i])
+               }
+
+               // Test with set_contains several identical elements
+               require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", key, "10", "10", 
"10", "20", "20").Err())
+               rsp = rdb.Do(ctx, "TDIGEST.REVRANK", key, "10", "20")
+               require.NoError(t, rsp.Err())
+               vals, err = rsp.Slice()
+               require.NoError(t, err)
+               require.Len(t, vals, 2)
+               expected = []int64{3, 1}
+               for i, v := range vals {
+                       rank, ok := v.(int64)
+                       require.True(t, ok, "expected int64 but got %T at index 
%d", v, i)
+                       require.EqualValues(t, rank, expected[i])
+               }
+
+               require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", key, "10").Err())
+               rsp = rdb.Do(ctx, "TDIGEST.REVRANK", key, "10", "20")
+               require.NoError(t, rsp.Err())
+               vals, err = rsp.Slice()
+               require.NoError(t, err)
+               require.Len(t, vals, 2)
+               expected = []int64{4, 1}
+               for i, v := range vals {
+                       rank, ok := v.(int64)
+                       require.True(t, ok, "expected int64 but got %T at index 
%d", v, i)
+                       require.EqualValues(t, rank, expected[i])
+               }
+
+               // Test with set_contains different elements

Review Comment:
   Corrected phrasing from 'set_contains' to 'set containing' for clarity.
   ```suggestion
                // Test with set containing different elements
   ```



##########
src/types/tdigest.h:
##########
@@ -150,3 +152,93 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) 
{
   diff /= (lc.weight / 2 + rc.weight / 2);
   return Lerp(lc.mean, rc.mean, diff);
 }
+
+inline void AssignRankForEqualInputs(const std::vector<size_t>& indices, 
double cumulative_weight,
+                                     std::vector<int>& result) {
+  for (auto index : indices) {
+    result[index] = static_cast<int>(cumulative_weight);
+  }
+}
+
+inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  double diff = a - b;
+  double adiff = std::abs(diff);
+  if (adiff <= abs_eps) return 0;
+  double maxab = std::max(std::abs(a), std::abs(b));
+  if (adiff <= maxab * rel_eps) return 0;
+  return (diff < 0) ? -1 : 1;
+}
+
+inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double 
abs_eps = 1e-9) {
+  return DoubleCompare(a, b, rel_eps, abs_eps) == 0;
+}
+
+struct DoubleComparator {
+  bool operator()(const double& a, const double& b) const { return 
DoubleCompare(a, b) == -1; }
+};
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, 
std::vector<int>& result) {
+  std::map<double, std::vector<size_t>, DoubleComparator> value_to_indices;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    value_to_indices[inputs[i]].push_back(i);
+  }
+
+  double cumulative_weight = 0;
+  result.resize(inputs.size());
+  auto it = value_to_indices.rbegin();
+
+  // handle inputs larger than maximum
+  while (it != value_to_indices.rend() && it->first > td.Max()) {
+    AssignRankForEqualInputs(it->second, -1, result);
+    ++it;
+  }
+
+  auto iter = td.End();
+  while (iter->Valid() && it != value_to_indices.rend()) {
+    auto centroid = GET_OR_RET(iter->GetCentroid());
+    auto input_value = it->first;
+    if (DoubleEqual(centroid.mean, input_value)) {
+      auto current_mean = centroid.mean;
+      auto current_mean_cumulative_weight = cumulative_weight + 
centroid.weight / 2;
+      cumulative_weight += centroid.weight;
+
+      // handle all the prev centroids which has the same mean
+      while (!iter->IsBegin() && iter->Prev()) {
+        auto next_centroid = GET_OR_RET(iter->GetCentroid());
+        if (current_mean != next_centroid.mean) {

Review Comment:
   Inconsistent comparison method: Line 209 uses direct equality comparison 
(`!=`) between doubles, while line 201 correctly uses `DoubleEqual()` for 
floating-point comparison. This inconsistency could lead to incorrect behavior 
when centroids have nearly equal means. Use `!DoubleEqual(current_mean, 
next_centroid.mean)` instead.
   ```suggestion
           if (!DoubleEqual(current_mean, next_centroid.mean)) {
   ```



##########
tests/cppunit/types/tdigest_test.cc:
##########
@@ -298,3 +298,79 @@ TEST_F(RedisTDigestTest, 
Quantile_returns_nan_on_empty_tdigest) {
   ASSERT_TRUE(status.ok()) << status.ToString();
   ASSERT_FALSE(result.quantiles) << "should not have quantiles with empty 
tdigest";
 }
+
+TEST_F(RedisTDigestTest, RevRank_on_the_set_contains_different_elements) {
+  std::string test_digest_name = "test_digest_revrank" + 
std::to_string(util::GetTimeStampMS());
+  bool exists = false;
+  auto status = tdigest_->Create(*ctx_, test_digest_name, {100}, &exists);
+  ASSERT_FALSE(exists);
+  ASSERT_TRUE(status.ok());
+  std::vector<double> input{10, 20, 30, 40, 50, 60};
+  status = tdigest_->Add(*ctx_, test_digest_name, input);
+  ASSERT_TRUE(status.ok()) << status.ToString();
+
+  std::vector<int> result;
+  result.reserve(input.size());
+  const std::vector<double> value = {0, 10, 20, 30, 40, 50, 60, 70};
+  status = tdigest_->RevRank(*ctx_, test_digest_name, value, result);
+  const auto expect_result = std::vector<double>{6, 5, 4, 3, 2, 1, 0, -1};
+
+  for (size_t i = 0; i < result.size(); i++) {
+    auto got = result[i];
+    EXPECT_EQ(got, expect_result[i]);
+  }
+  ASSERT_TRUE(status.ok()) << status.ToString();
+}
+
+TEST_F(RedisTDigestTest, 
RevRank_on_the_set_contains_several_identical_elements) {

Review Comment:
   Corrected phrasing from 'set_contains' to 'set_containing' for proper naming 
convention.
   ```suggestion
   TEST_F(RedisTDigestTest, 
RevRank_on_the_set_containing_several_identical_elements) {
   ```



##########
tests/cppunit/types/tdigest_test.cc:
##########
@@ -298,3 +298,79 @@ TEST_F(RedisTDigestTest, 
Quantile_returns_nan_on_empty_tdigest) {
   ASSERT_TRUE(status.ok()) << status.ToString();
   ASSERT_FALSE(result.quantiles) << "should not have quantiles with empty 
tdigest";
 }
+
+TEST_F(RedisTDigestTest, RevRank_on_the_set_contains_different_elements) {

Review Comment:
   Corrected phrasing from 'set_contains' to 'set_containing' for proper naming 
convention.
   ```suggestion
   TEST_F(RedisTDigestTest, RevRank_on_the_set_containing_different_elements) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(tdigest): add the support of TDIGEST.REVRANK command [kvrocks]

Reply via email to