Copilot commented on code in PR #3130:
URL: https://github.com/apache/kvrocks/pull/3130#discussion_r2476039814
##########
tests/gocase/unit/type/tdigest/tdigest_test.go:
##########
@@ -518,4 +518,73 @@ func tdigestTests(t *testing.T, configs
util.KvrocksServerConfigs) {
validation(newDestKey1)
validation(newDestKey2)
})
+
+ t.Run("tdigest.revrank with different arguments", func(t *testing.T) {
+ keyPrefix := "tdigest_revrank_"
+
+ // Test invalid arguments
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK").Err(),
errMsgWrongNumberArg)
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK",
keyPrefix+"nonexistent").Err(), errMsgWrongNumberArg)
+
+ // Test Non-existent key
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK",
keyPrefix+"nonexistent", "10").Err(), errMsgKeyNotExist)
+
+ // Test with empty tdigest
+ key := keyPrefix + "test1"
+ require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", key,
"compression", "100").Err())
+ rsp := rdb.Do(ctx, "TDIGEST.REVRANK", key, "10")
+ require.NoError(t, rsp.Err())
+ vals, err := rsp.Slice()
+ require.NoError(t, err)
+ require.Len(t, vals, 1)
+ expected := []int64{-2}
+ for i, v := range vals {
+ rank, ok := v.(int64)
+ require.True(t, ok, "expected int64 but got %T at index
%d", v, i)
+ require.EqualValues(t, rank, expected[i])
+ }
+
+ // Test with set_contains several identical elements
Review Comment:
Corrected phrasing from 'set_contains' to 'set containing' for clarity.
```suggestion
// Test with set containing several identical elements
```
##########
tests/gocase/unit/type/tdigest/tdigest_test.go:
##########
@@ -518,4 +518,73 @@ func tdigestTests(t *testing.T, configs
util.KvrocksServerConfigs) {
validation(newDestKey1)
validation(newDestKey2)
})
+
+ t.Run("tdigest.revrank with different arguments", func(t *testing.T) {
+ keyPrefix := "tdigest_revrank_"
+
+ // Test invalid arguments
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK").Err(),
errMsgWrongNumberArg)
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK",
keyPrefix+"nonexistent").Err(), errMsgWrongNumberArg)
+
+ // Test Non-existent key
+ require.ErrorContains(t, rdb.Do(ctx, "TDIGEST.REVRANK",
keyPrefix+"nonexistent", "10").Err(), errMsgKeyNotExist)
+
+ // Test with empty tdigest
+ key := keyPrefix + "test1"
+ require.NoError(t, rdb.Do(ctx, "TDIGEST.CREATE", key,
"compression", "100").Err())
+ rsp := rdb.Do(ctx, "TDIGEST.REVRANK", key, "10")
+ require.NoError(t, rsp.Err())
+ vals, err := rsp.Slice()
+ require.NoError(t, err)
+ require.Len(t, vals, 1)
+ expected := []int64{-2}
+ for i, v := range vals {
+ rank, ok := v.(int64)
+ require.True(t, ok, "expected int64 but got %T at index
%d", v, i)
+ require.EqualValues(t, rank, expected[i])
+ }
+
+ // Test with set_contains several identical elements
+ require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", key, "10", "10",
"10", "20", "20").Err())
+ rsp = rdb.Do(ctx, "TDIGEST.REVRANK", key, "10", "20")
+ require.NoError(t, rsp.Err())
+ vals, err = rsp.Slice()
+ require.NoError(t, err)
+ require.Len(t, vals, 2)
+ expected = []int64{3, 1}
+ for i, v := range vals {
+ rank, ok := v.(int64)
+ require.True(t, ok, "expected int64 but got %T at index
%d", v, i)
+ require.EqualValues(t, rank, expected[i])
+ }
+
+ require.NoError(t, rdb.Do(ctx, "TDIGEST.ADD", key, "10").Err())
+ rsp = rdb.Do(ctx, "TDIGEST.REVRANK", key, "10", "20")
+ require.NoError(t, rsp.Err())
+ vals, err = rsp.Slice()
+ require.NoError(t, err)
+ require.Len(t, vals, 2)
+ expected = []int64{4, 1}
+ for i, v := range vals {
+ rank, ok := v.(int64)
+ require.True(t, ok, "expected int64 but got %T at index
%d", v, i)
+ require.EqualValues(t, rank, expected[i])
+ }
+
+ // Test with set_contains different elements
Review Comment:
Corrected phrasing from 'set_contains' to 'set containing' for clarity.
```suggestion
// Test with set containing different elements
```
##########
src/types/tdigest.h:
##########
@@ -150,3 +152,93 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q)
{
diff /= (lc.weight / 2 + rc.weight / 2);
return Lerp(lc.mean, rc.mean, diff);
}
+
+inline void AssignRankForEqualInputs(const std::vector<size_t>& indices,
double cumulative_weight,
+ std::vector<int>& result) {
+ for (auto index : indices) {
+ result[index] = static_cast<int>(cumulative_weight);
+ }
+}
+
+inline int DoubleCompare(double a, double b, double rel_eps = 1e-12, double
abs_eps = 1e-9) {
+ double diff = a - b;
+ double adiff = std::abs(diff);
+ if (adiff <= abs_eps) return 0;
+ double maxab = std::max(std::abs(a), std::abs(b));
+ if (adiff <= maxab * rel_eps) return 0;
+ return (diff < 0) ? -1 : 1;
+}
+
+inline bool DoubleEqual(double a, double b, double rel_eps = 1e-12, double
abs_eps = 1e-9) {
+ return DoubleCompare(a, b, rel_eps, abs_eps) == 0;
+}
+
+struct DoubleComparator {
+ bool operator()(const double& a, const double& b) const { return
DoubleCompare(a, b) == -1; }
+};
+
+template <typename TD>
+inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs,
std::vector<int>& result) {
+ std::map<double, std::vector<size_t>, DoubleComparator> value_to_indices;
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ value_to_indices[inputs[i]].push_back(i);
+ }
+
+ double cumulative_weight = 0;
+ result.resize(inputs.size());
+ auto it = value_to_indices.rbegin();
+
+ // handle inputs larger than maximum
+ while (it != value_to_indices.rend() && it->first > td.Max()) {
+ AssignRankForEqualInputs(it->second, -1, result);
+ ++it;
+ }
+
+ auto iter = td.End();
+ while (iter->Valid() && it != value_to_indices.rend()) {
+ auto centroid = GET_OR_RET(iter->GetCentroid());
+ auto input_value = it->first;
+ if (DoubleEqual(centroid.mean, input_value)) {
+ auto current_mean = centroid.mean;
+ auto current_mean_cumulative_weight = cumulative_weight +
centroid.weight / 2;
+ cumulative_weight += centroid.weight;
+
+ // handle all the prev centroids which has the same mean
+ while (!iter->IsBegin() && iter->Prev()) {
+ auto next_centroid = GET_OR_RET(iter->GetCentroid());
+ if (current_mean != next_centroid.mean) {
Review Comment:
Inconsistent comparison method: Line 209 uses direct equality comparison
(`!=`) between doubles, while line 201 correctly uses `DoubleEqual()` for
floating-point comparison. This inconsistency could lead to incorrect behavior
when centroids have nearly equal means. Use `!DoubleEqual(current_mean,
next_centroid.mean)` instead.
```suggestion
if (!DoubleEqual(current_mean, next_centroid.mean)) {
```
##########
tests/cppunit/types/tdigest_test.cc:
##########
@@ -298,3 +298,79 @@ TEST_F(RedisTDigestTest,
Quantile_returns_nan_on_empty_tdigest) {
ASSERT_TRUE(status.ok()) << status.ToString();
ASSERT_FALSE(result.quantiles) << "should not have quantiles with empty
tdigest";
}
+
+TEST_F(RedisTDigestTest, RevRank_on_the_set_contains_different_elements) {
+ std::string test_digest_name = "test_digest_revrank" +
std::to_string(util::GetTimeStampMS());
+ bool exists = false;
+ auto status = tdigest_->Create(*ctx_, test_digest_name, {100}, &exists);
+ ASSERT_FALSE(exists);
+ ASSERT_TRUE(status.ok());
+ std::vector<double> input{10, 20, 30, 40, 50, 60};
+ status = tdigest_->Add(*ctx_, test_digest_name, input);
+ ASSERT_TRUE(status.ok()) << status.ToString();
+
+ std::vector<int> result;
+ result.reserve(input.size());
+ const std::vector<double> value = {0, 10, 20, 30, 40, 50, 60, 70};
+ status = tdigest_->RevRank(*ctx_, test_digest_name, value, result);
+ const auto expect_result = std::vector<double>{6, 5, 4, 3, 2, 1, 0, -1};
+
+ for (size_t i = 0; i < result.size(); i++) {
+ auto got = result[i];
+ EXPECT_EQ(got, expect_result[i]);
+ }
+ ASSERT_TRUE(status.ok()) << status.ToString();
+}
+
+TEST_F(RedisTDigestTest,
RevRank_on_the_set_contains_several_identical_elements) {
Review Comment:
Corrected phrasing from 'set_contains' to 'set_containing' for proper naming
convention.
```suggestion
TEST_F(RedisTDigestTest,
RevRank_on_the_set_containing_several_identical_elements) {
```
##########
tests/cppunit/types/tdigest_test.cc:
##########
@@ -298,3 +298,79 @@ TEST_F(RedisTDigestTest,
Quantile_returns_nan_on_empty_tdigest) {
ASSERT_TRUE(status.ok()) << status.ToString();
ASSERT_FALSE(result.quantiles) << "should not have quantiles with empty
tdigest";
}
+
+TEST_F(RedisTDigestTest, RevRank_on_the_set_contains_different_elements) {
Review Comment:
Corrected phrasing from 'set_contains' to 'set_containing' for proper naming
convention.
```suggestion
TEST_F(RedisTDigestTest, RevRank_on_the_set_containing_different_elements) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]