This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new a28b8c2674 Improve rowkey object size estimate (#13319)
a28b8c2674 is described below
commit a28b8c26745cc9656ce4aa3e0301f6ebd8ed76f4
Author: Adarsh Sanjeev <[email protected]>
AuthorDate: Tue Nov 8 10:12:07 2022 +0530
Improve rowkey object size estimate (#13319)
* Improve rowkey object size estimate
* Address review comments
* Update comment
* Fix test
---
.../druid/msq/statistics/DelegateOrMinKeyCollector.java | 2 +-
.../apache/druid/msq/statistics/DistinctKeyCollector.java | 6 +++---
.../druid/msq/statistics/QuantilesSketchKeyCollector.java | 2 +-
.../msq/statistics/QuantilesSketchKeyCollectorFactory.java | 2 +-
.../druid/msq/statistics/DelegateOrMinKeyCollectorTest.java | 8 ++++----
.../msq/statistics/QuantilesSketchKeyCollectorTest.java | 6 +++---
.../src/main/java/org/apache/druid/frame/key/RowKey.java | 12 ++++++++++--
.../src/test/java/org/apache/druid/frame/key/RowKeyTest.java | 4 ++--
8 files changed, 25 insertions(+), 17 deletions(-)
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
index e2e2282a6b..e076194f2a 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java
@@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollector<TDelegate extends
KeyCollector<TDelegate>
if (delegate != null) {
return delegate.estimatedRetainedBytes();
} else {
- return minKey != null ? minKey.getNumberOfBytes() : 0;
+ return minKey != null ? minKey.estimatedObjectSizeBytes() : 0;
}
}
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
index 9a1716a9fb..f88174b333 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java
@@ -121,13 +121,13 @@ public class DistinctKeyCollector implements
KeyCollector<DistinctKeyCollector>
if (isNewMin && !retainedKeys.isEmpty() &&
!isKeySelected(retainedKeys.firstKey())) {
// Old min should be kicked out.
totalWeightUnadjusted -=
retainedKeys.removeLong(retainedKeys.firstKey());
- retainedBytes -= retainedKeys.firstKey().getNumberOfBytes();
+ retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
}
if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {
// We did add this key. (Previous value was zero, meaning absent.)
totalWeightUnadjusted += weight;
- retainedBytes += key.getNumberOfBytes();
+ retainedBytes += key.estimatedObjectSizeBytes();
}
while (retainedBytes >= maxBytes) {
@@ -305,7 +305,7 @@ public class DistinctKeyCollector implements
KeyCollector<DistinctKeyCollector>
if (!isKeySelected(key)) {
totalWeightUnadjusted -= entry.getLongValue();
- retainedBytes -= entry.getKey().getNumberOfBytes();
+ retainedBytes -= entry.getKey().estimatedObjectSizeBytes();
iterator.remove();
}
}
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
index 1620324735..2a5ee777a0 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java
@@ -64,7 +64,7 @@ public class QuantilesSketchKeyCollector implements
KeyCollector<QuantilesSketch
{
double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN();
// The key is added "weight" times to the sketch, we can update the total
weight directly.
- estimatedTotalSketchSizeInBytes += key.getNumberOfBytes() * weight;
+ estimatedTotalSketchSizeInBytes += key.estimatedObjectSizeBytes() * weight;
for (int i = 0; i < weight; i++) {
// Add the same key multiple times to make it "heavier".
sketch.update(key);
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
index cfc2bd9a54..1682987bec 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorFactory.java
@@ -106,7 +106,7 @@ public class QuantilesSketchKeyCollectorFactory
int serializedSize = Integer.BYTES * items.length;
for (final RowKey key : items) {
- serializedSize += key.getNumberOfBytes();
+ serializedSize += key.array().length;
}
final byte[] serializedBytes = new byte[serializedSize];
diff --git
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
index e054dcf98b..bf27234ecc 100644
---
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
+++
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollectorTest.java
@@ -89,7 +89,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey());
- Assert.assertEquals(key.getNumberOfBytes(),
collector.estimatedRetainedBytes(), 0);
+ Assert.assertEquals(key.estimatedObjectSizeBytes(),
collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight());
}
@@ -110,7 +110,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey());
- Assert.assertEquals(key.getNumberOfBytes(),
collector.estimatedRetainedBytes(), 0);
+ Assert.assertEquals(key.estimatedObjectSizeBytes(),
collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight());
// Should not have actually downsampled, because the quantiles-based
collector does nothing when
@@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollectorTest
RowKey key = createKey(1L);
collector.add(key, 1);
collector.add(key, 1);
- int expectedRetainedBytes = 2 * key.getNumberOfBytes();
+ int expectedRetainedBytes = 2 * key.estimatedObjectSizeBytes();
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
@@ -144,7 +144,7 @@ public class DelegateOrMinKeyCollectorTest
while (collector.getDelegate().isPresent()) {
Assert.assertTrue(collector.downSample());
}
- expectedRetainedBytes = key.getNumberOfBytes();
+ expectedRetainedBytes = key.estimatedObjectSizeBytes();
Assert.assertFalse(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
diff --git
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
index 0f8147eb92..79b8b06b70 100644
---
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
+++
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
@@ -195,13 +195,13 @@ public class QuantilesSketchKeyCollectorTest
collector.add(smallKey, 3);
- Assert.assertEquals(smallKey.getNumberOfBytes(),
collector.getAverageKeyLength(), 0);
+ Assert.assertEquals(smallKey.estimatedObjectSizeBytes(),
collector.getAverageKeyLength(), 0);
other.add(largeKey, 5);
- Assert.assertEquals(largeKey.getNumberOfBytes(),
other.getAverageKeyLength(), 0);
+ Assert.assertEquals(largeKey.estimatedObjectSizeBytes(),
other.getAverageKeyLength(), 0);
collector.addAll(other);
- Assert.assertEquals((smallKey.getNumberOfBytes() * 3 +
largeKey.getNumberOfBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
+ Assert.assertEquals((smallKey.estimatedObjectSizeBytes() * 3 +
largeKey.estimatedObjectSizeBytes() * 5) / 8.0,
collector.getAverageKeyLength(), 0);
}
@Test
diff --git a/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
b/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
index aa3701ba90..89328414b9 100644
--- a/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
+++ b/processing/src/main/java/org/apache/druid/frame/key/RowKey.java
@@ -32,6 +32,10 @@ public class RowKey
{
private static final RowKey EMPTY_KEY = new RowKey(new byte[0]);
+ // Constant to account for hashcode and object overhead
+ // 24 bytes (header) + 8 bytes (reference) + 8 bytes (hashCode long) + 4
bytes (safe estimate of hashCodeComputed)
+ static final int OBJECT_OVERHEAD_SIZE_BYTES = 44;
+
private final byte[] key;
// Cached hashcode. Computed on demand, not in the constructor, to avoid
unnecessary computation.
@@ -109,8 +113,12 @@ public class RowKey
return Arrays.toString(key);
}
- public int getNumberOfBytes()
+ /**
+ * Estimate number of bytes taken by an object of {@link RowKey}. Only
returns an estimate and does not account for
+ * platform or JVM specific implementation.
+ */
+ public int estimatedObjectSizeBytes()
{
- return array().length;
+ return OBJECT_OVERHEAD_SIZE_BYTES + array().length;
}
}
diff --git
a/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
b/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
index 20e8fb981a..41afa74a2b 100644
--- a/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
+++ b/processing/src/test/java/org/apache/druid/frame/key/RowKeyTest.java
@@ -97,11 +97,11 @@ public class RowKeyTest extends InitializedNullHandlingTest
{
final RowSignature signatureLong = RowSignature.builder().add("1",
ColumnType.LONG).build();
final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc");
- Assert.assertEquals(longKey.array().length, longKey.getNumberOfBytes());
+ Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES +
longKey.array().length, longKey.estimatedObjectSizeBytes());
final RowSignature signatureLongString =
RowSignature.builder().add("1", ColumnType.LONG).add("2",
ColumnType.STRING).build();
final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString,
1L, "abc");
- Assert.assertEquals(longStringKey.array().length,
longStringKey.getNumberOfBytes());
+ Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES +
longStringKey.array().length, longStringKey.estimatedObjectSizeBytes());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]