This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b7a4c79 Null handling fixes for DS HLL and Theta sketches. (#11830)
b7a4c79 is described below
commit b7a4c79314fb52e88f53f84b56c362e5abc205c2
Author: Gian Merlino <[email protected]>
AuthorDate: Fri Oct 22 19:09:00 2021 -0700
Null handling fixes for DS HLL and Theta sketches. (#11830)
* Null handling fixes for DS HLL and Theta sketches.
For HLL, this fixes an NPE when processing a null in a multi-value
dimension.
For both, empty strings are now properly treated as nulls (and ignored) in
replace-with-default mode. Behavior in SQL-compatible mode is unchanged.
* Fix expectation.
---
.../datasketches/hll/HllSketchBuildAggregator.java | 13 +-
.../datasketches/theta/SketchAggregator.java | 6 +-
.../hll/HllSketchBuildAggregatorTest.java | 135 +++++++++++++++++++++
.../datasketches/theta/SketchAggregationTest.java | 1 +
4 files changed, 150 insertions(+), 5 deletions(-)
diff --git
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
index be60842..de76898 100644
---
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
+++
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
@@ -21,6 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.hll;
import org.apache.datasketches.hll.HllSketch;
import org.apache.datasketches.hll.TgtHllType;
+import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.segment.ColumnValueSelector;
@@ -102,10 +103,14 @@ public class HllSketchBuildAggregator implements
Aggregator
} else if (value instanceof String) {
sketch.update(((String) value).toCharArray());
} else if (value instanceof List) {
- // noinspection unchecked
- List<String> list = (List<String>) value;
- for (String v : list) {
- sketch.update(v.toCharArray());
+ // noinspection rawtypes
+ for (Object entry : (List) value) {
+ if (entry != null) {
+ final String asString = entry.toString();
+ if (!NullHandling.isNullOrEquivalent(asString)) {
+ sketch.update(asString);
+ }
+ }
}
} else if (value instanceof char[]) {
sketch.update((char[]) value);
diff --git
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
index cf930bc..b53d89d 100644
---
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
+++
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
@@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import org.apache.datasketches.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
+import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
@@ -122,7 +123,10 @@ public class SketchAggregator implements Aggregator
} else if (update instanceof List) {
for (Object entry : (List) update) {
if (entry != null) {
- union.update(entry.toString());
+ final String asString = entry.toString();
+ if (!NullHandling.isNullOrEquivalent(asString)) {
+ union.update(asString);
+ }
}
}
} else {
diff --git
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
new file mode 100644
index 0000000..f116340
--- /dev/null
+++
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll;
+
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link HllSketchBuildAggregator#updateSketch}.
+ *
+ * Tests of the aggregator generally should go in {@link
HllSketchAggregatorTest} instead.
+ */
+public class HllSketchBuildAggregatorTest extends InitializedNullHandlingTest
+{
+ private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K);
+
+ @Test
+ public void testUpdateSketchVariousNumbers()
+ {
+ updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0),
3.0);
+ assertSketchEstimate(4);
+ }
+
+ @Test
+ public void testUpdateSketchStrings()
+ {
+ updateSketch("foo", null, "bar", "");
+ assertSketchEstimate(2);
+ }
+
+ @Test
+ public void testUpdateSketchListsOfStrings()
+ {
+ updateSketch(
+ Arrays.asList("1", "2"),
+ Arrays.asList("2", "", "3", "11"),
+ Arrays.asList("1", null, "3", "12"),
+ Arrays.asList("1", "3", "13")
+ );
+
+ assertSketchEstimate(6);
+ }
+
+ @Test
+ public void testUpdateSketchCharArray()
+ {
+ updateSketch(
+ new char[]{1, 2},
+ new char[]{2, 3, 11},
+ new char[]{1, 2},
+ new char[]{1, 3, 13}
+ );
+
+ assertSketchEstimate(3);
+ }
+
+ @Test
+ public void testUpdateSketchByteArray()
+ {
+ updateSketch(
+ new byte[]{1, 2},
+ new byte[]{2, 3, 11},
+ new byte[]{1, 2},
+ new byte[]{1, 3, 13}
+ );
+
+ assertSketchEstimate(3);
+ }
+
+ @Test
+ public void testUpdateSketchIntArray()
+ {
+ updateSketch(
+ new int[]{1, 2},
+ new int[]{2, 3, 11},
+ new int[]{1, 2},
+ new int[]{1, 3, 13}
+ );
+
+ assertSketchEstimate(3);
+ }
+
+ @Test
+ public void testUpdateSketchLongArray()
+ {
+ updateSketch(
+ new long[]{1, 2},
+ new long[]{2, 3, 11},
+ new long[]{1, 2},
+ new long[]{1, 3, 13}
+ );
+
+ assertSketchEstimate(3);
+ }
+
+ private void updateSketch(final Object first, final Object... others)
+ {
+ // first != null check mimics how updateSketch is called: it's always
guarded by a null check on the outer value.
+ if (first != null) {
+ HllSketchBuildAggregator.updateSketch(sketch, first);
+ }
+
+ for (final Object o : others) {
+ if (o != null) {
+ HllSketchBuildAggregator.updateSketch(sketch, o);
+ }
+ }
+ }
+
+ private void assertSketchEstimate(final long estimate)
+ {
+ Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1);
+ }
+}
diff --git
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
index 9820b83..054693f 100644
---
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
+++
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
@@ -517,6 +517,7 @@ public class SketchAggregationTest
List<String> value = new ArrayList<>();
value.add("foo");
value.add(null);
+ value.add("");
value.add("bar");
List[] columnValues = new List[]{value};
final TestObjectColumnSelector selector = new
TestObjectColumnSelector(columnValues);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]