This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new a28b8c2674 Improve rowkey object size estimate (#13319)
a28b8c2674 is described below

commit a28b8c26745cc9656ce4aa3e0301f6ebd8ed76f4
Author: Adarsh Sanjeev <[email protected]>
AuthorDate: Tue Nov 8 10:12:07 2022 +0530

    Improve rowkey object size estimate (#13319)
    
    * Improve rowkey object size estimate
    
    * Address review comments
    
    * Update comment
    
    * Fix test
---
 .../druid/msq/statistics/DelegateOrMinKeyCollector.java      |  2 +-
 .../apache/druid/msq/statistics/DistinctKeyCollector.java    |  6 +++---
 .../druid/msq/statistics/QuantilesSketchKeyCollector.java    |  2 +-
 .../msq/statistics/QuantilesSketchKeyCollectorFactory.java   |  2 +-
 .../druid/msq/statistics/DelegateOrMinKeyCollectorTest.java  |  8 ++++----
 .../msq/statistics/QuantilesSketchKeyCollectorTest.java      |  6 +++---
 .../src/main/java/org/apache/druid/frame/key/RowKey.java     | 12 ++++++++++--
 .../src/test/java/org/apache/druid/frame/key/RowKeyTest.java |  4 ++--
 8 files changed, 25 insertions(+), 17 deletions(-)

diff --git 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
index e2e2282a6b..e076194f2a 100644
--- 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
+++ 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
@@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollector<TDelegate extends 
KeyCollector<TDelegate>
     if (delegate != null) {
       return delegate.estimatedRetainedBytes();
     } else {
-      return minKey != null ? minKey.getNumberOfBytes() : 0;
+      return minKey != null ? minKey.estimatedObjectSizeBytes() : 0;
     }
   }
 
diff --git 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
index 9a1716a9fb..f88174b333 100644
--- 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
+++ 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
@@ -121,13 +121,13 @@ public class DistinctKeyCollector implements 
KeyCollector<DistinctKeyCollector>
       if (isNewMin && !retainedKeys.isEmpty() && 
!isKeySelected(retainedKeys.firstKey())) {
         // Old min should be kicked out.
         totalWeightUnadjusted -= 
retainedKeys.removeLong(retainedKeys.firstKey());
-        retainedBytes -= retainedKeys.firstKey().getNumberOfBytes();
+        retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
       }
 
       if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {
         // We did add this key. (Previous value was zero, meaning absent.)
         totalWeightUnadjusted += weight;
-        retainedBytes += key.getNumberOfBytes();
+        retainedBytes += key.estimatedObjectSizeBytes();
       }
 
       while (retainedBytes >= maxBytes) {
@@ -305,7 +305,7 @@ public class DistinctKeyCollector implements 
KeyCollector<DistinctKeyCollector>
 
       if (!isKeySelected(key)) {
         totalWeightUnadjusted -= entry.getLongValue();
-        retainedBytes -= entry.getKey().getNumberOfBytes();
+        retainedBytes -= entry.getKey().estimatedObjectSizeBytes();
         iterator.remove();
       }
     }
diff --git 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
index 1620324735..2a5ee777a0 100644
--- 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
+++ 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
@@ -64,7 +64,7 @@ public class QuantilesSketchKeyCollector implements 
KeyCollector<QuantilesSketch
   {
     double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN();
     // The key is added "weight" times to the sketch, we can update the total 
weight directly.
-    estimatedTotalSketchSizeInBytes += key.getNumberOfBytes() * weight;
+    estimatedTotalSketchSizeInBytes += key.estimatedObjectSizeBytes() * weight;
     for (int i = 0; i < weight; i++) {
       // Add the same key multiple times to make it "heavier".
       sketch.update(key);
diff --git 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
index cfc2bd9a54..1682987bec 100644
--- 
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
+++ 
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
@@ -106,7 +106,7 @@ public class QuantilesSketchKeyCollectorFactory
       int serializedSize = Integer.BYTES * items.length;
 
       for (final RowKey key : items) {
-        serializedSize += key.getNumberOfBytes();
+        serializedSize += key.array().length;
       }
 
       final byte[] serializedBytes = new byte[serializedSize];
diff --git 
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
 
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
index e054dcf98b..bf27234ecc 100644
--- 
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
+++ 
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
@@ -89,7 +89,7 @@ public class DelegateOrMinKeyCollectorTest
     Assert.assertTrue(collector.getDelegate().isPresent());
     Assert.assertFalse(collector.isEmpty());
     Assert.assertEquals(key, collector.minKey());
-    Assert.assertEquals(key.getNumberOfBytes(), 
collector.estimatedRetainedBytes(), 0);
+    Assert.assertEquals(key.estimatedObjectSizeBytes(), 
collector.estimatedRetainedBytes(), 0);
     Assert.assertEquals(1, collector.estimatedTotalWeight());
   }
 
@@ -110,7 +110,7 @@ public class DelegateOrMinKeyCollectorTest
     Assert.assertTrue(collector.getDelegate().isPresent());
     Assert.assertFalse(collector.isEmpty());
     Assert.assertEquals(key, collector.minKey());
-    Assert.assertEquals(key.getNumberOfBytes(), 
collector.estimatedRetainedBytes(), 0);
+    Assert.assertEquals(key.estimatedObjectSizeBytes(), 
collector.estimatedRetainedBytes(), 0);
     Assert.assertEquals(1, collector.estimatedTotalWeight());
 
     // Should not have actually downsampled, because the quantiles-based 
collector does nothing when
@@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollectorTest
     RowKey key = createKey(1L);
     collector.add(key, 1);
     collector.add(key, 1);
-    int expectedRetainedBytes = 2 * key.getNumberOfBytes();
+    int expectedRetainedBytes = 2 * key.estimatedObjectSizeBytes();
 
     Assert.assertTrue(collector.getDelegate().isPresent());
     Assert.assertFalse(collector.isEmpty());
@@ -144,7 +144,7 @@ public class DelegateOrMinKeyCollectorTest
     while (collector.getDelegate().isPresent()) {
       Assert.assertTrue(collector.downSample());
     }
-    expectedRetainedBytes = key.getNumberOfBytes();
+    expectedRetainedBytes = key.estimatedObjectSizeBytes();
 
     Assert.assertFalse(collector.getDelegate().isPresent());
     Assert.assertFalse(collector.isEmpty());
diff --git 
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
 
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
index 0f8147eb92..79b8b06b70 100644
--- 
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
+++ 
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
@@ -195,13 +195,13 @@ public class QuantilesSketchKeyCollectorTest
 
 
     collector.add(smallKey, 3);
-    Assert.assertEquals(smallKey.getNumberOfBytes(), 
collector.getAverageKeyLength(), 0);
+    Assert.assertEquals(smallKey.estimatedObjectSizeBytes(), 
collector.getAverageKeyLength(), 0);
 
     other.add(largeKey, 5);
-    Assert.assertEquals(largeKey.getNumberOfBytes(), 
other.getAverageKeyLength(), 0);
+    Assert.assertEquals(largeKey.estimatedObjectSizeBytes(), 
other.getAverageKeyLength(), 0);
 
     collector.addAll(other);
-    Assert.assertEquals((smallKey.getNumberOfBytes() * 3 + 
largeKey.getNumberOfBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
+    Assert.assertEquals((smallKey.estimatedObjectSizeBytes() * 3 + 
largeKey.estimatedObjectSizeBytes() * 5) / 8.0, 
collector.getAverageKeyLength(), 0);
   }
 
   @Test
diff --git a/processing/src/main/java/org/apache/druid/frame/key/RowKey.java 
b/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
index aa3701ba90..89328414b9 100644
--- a/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
+++ b/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
@@ -32,6 +32,10 @@ public class RowKey
 {
   private static final RowKey EMPTY_KEY = new RowKey(new byte[0]);
 
+  // Constant to account for hashcode and object overhead
+  // 24 bytes (header) + 8 bytes (reference) + 8 bytes (hashCode long) + 4 
bytes (safe estimate of hashCodeComputed)
+  static final int OBJECT_OVERHEAD_SIZE_BYTES = 44;
+
   private final byte[] key;
 
   // Cached hashcode. Computed on demand, not in the constructor, to avoid 
unnecessary computation.
@@ -109,8 +113,12 @@ public class RowKey
     return Arrays.toString(key);
   }
 
-  public int getNumberOfBytes()
+  /**
+   * Estimate number of bytes taken by an object of {@link RowKey}. Only 
returns an estimate and does not account for
+   * platform or JVM specific implementation.
+   */
+  public int estimatedObjectSizeBytes()
   {
-    return array().length;
+    return OBJECT_OVERHEAD_SIZE_BYTES + array().length;
   }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java 
b/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
index 20e8fb981a..41afa74a2b 100644
--- a/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
+++ b/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
@@ -97,11 +97,11 @@ public class RowKeyTest extends InitializedNullHandlingTest
   {
     final RowSignature signatureLong = RowSignature.builder().add("1", 
ColumnType.LONG).build();
     final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc");
-    Assert.assertEquals(longKey.array().length, longKey.getNumberOfBytes());
+    Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + 
longKey.array().length, longKey.estimatedObjectSizeBytes());
 
     final RowSignature signatureLongString =
         RowSignature.builder().add("1", ColumnType.LONG).add("2", 
ColumnType.STRING).build();
     final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString, 
1L, "abc");
-    Assert.assertEquals(longStringKey.array().length, 
longStringKey.getNumberOfBytes());
+    Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + 
longStringKey.array().length, longStringKey.estimatedObjectSizeBytes());
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to