This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new b7a4c79  Null handling fixes for DS HLL and Theta sketches. (#11830)
b7a4c79 is described below

commit b7a4c79314fb52e88f53f84b56c362e5abc205c2
Author: Gian Merlino <[email protected]>
AuthorDate: Fri Oct 22 19:09:00 2021 -0700

    Null handling fixes for DS HLL and Theta sketches. (#11830)
    
    * Null handling fixes for DS HLL and Theta sketches.
    
    For HLL, this fixes an NPE when processing a null in a multi-value 
dimension.
    
    For both, empty strings are now properly treated as nulls (and ignored) in
    replace-with-default mode. Behavior in SQL-compatible mode is unchanged.
    
    * Fix expectation.
---
 .../datasketches/hll/HllSketchBuildAggregator.java |  13 +-
 .../datasketches/theta/SketchAggregator.java       |   6 +-
 .../hll/HllSketchBuildAggregatorTest.java          | 135 +++++++++++++++++++++
 .../datasketches/theta/SketchAggregationTest.java  |   1 +
 4 files changed, 150 insertions(+), 5 deletions(-)

diff --git 
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
 
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
index be60842..de76898 100644
--- 
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
+++ 
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregator.java
@@ -21,6 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.hll;
 
 import org.apache.datasketches.hll.HllSketch;
 import org.apache.datasketches.hll.TgtHllType;
+import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.query.aggregation.Aggregator;
 import org.apache.druid.segment.ColumnValueSelector;
@@ -102,10 +103,14 @@ public class HllSketchBuildAggregator implements 
Aggregator
     } else if (value instanceof String) {
       sketch.update(((String) value).toCharArray());
     } else if (value instanceof List) {
-      // noinspection unchecked
-      List<String> list = (List<String>) value;
-      for (String v : list) {
-        sketch.update(v.toCharArray());
+      // noinspection rawtypes
+      for (Object entry : (List) value) {
+        if (entry != null) {
+          final String asString = entry.toString();
+          if (!NullHandling.isNullOrEquivalent(asString)) {
+            sketch.update(asString);
+          }
+        }
       }
     } else if (value instanceof char[]) {
       sketch.update((char[]) value);
diff --git 
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
 
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
index cf930bc..b53d89d 100644
--- 
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
+++ 
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregator.java
@@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
 import org.apache.datasketches.Family;
 import org.apache.datasketches.theta.SetOperation;
 import org.apache.datasketches.theta.Union;
+import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.query.aggregation.Aggregator;
 import org.apache.druid.segment.BaseObjectColumnValueSelector;
@@ -122,7 +123,10 @@ public class SketchAggregator implements Aggregator
     } else if (update instanceof List) {
       for (Object entry : (List) update) {
         if (entry != null) {
-          union.update(entry.toString());
+          final String asString = entry.toString();
+          if (!NullHandling.isNullOrEquivalent(asString)) {
+            union.update(asString);
+          }
         }
       }
     } else {
diff --git 
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
 
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
new file mode 100644
index 0000000..f116340
--- /dev/null
+++ 
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorTest.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll;
+
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link HllSketchBuildAggregator#updateSketch}.
+ *
+ * Tests of the aggregator generally should go in {@link 
HllSketchAggregatorTest} instead.
+ */
+public class HllSketchBuildAggregatorTest extends InitializedNullHandlingTest
+{
+  private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K);
+
+  @Test
+  public void testUpdateSketchVariousNumbers()
+  {
+    updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0), 
3.0);
+    assertSketchEstimate(4);
+  }
+
+  @Test
+  public void testUpdateSketchStrings()
+  {
+    updateSketch("foo", null, "bar", "");
+    assertSketchEstimate(2);
+  }
+
+  @Test
+  public void testUpdateSketchListsOfStrings()
+  {
+    updateSketch(
+        Arrays.asList("1", "2"),
+        Arrays.asList("2", "", "3", "11"),
+        Arrays.asList("1", null, "3", "12"),
+        Arrays.asList("1", "3", "13")
+    );
+
+    assertSketchEstimate(6);
+  }
+
+  @Test
+  public void testUpdateSketchCharArray()
+  {
+    updateSketch(
+        new char[]{1, 2},
+        new char[]{2, 3, 11},
+        new char[]{1, 2},
+        new char[]{1, 3, 13}
+    );
+
+    assertSketchEstimate(3);
+  }
+
+  @Test
+  public void testUpdateSketchByteArray()
+  {
+    updateSketch(
+        new byte[]{1, 2},
+        new byte[]{2, 3, 11},
+        new byte[]{1, 2},
+        new byte[]{1, 3, 13}
+    );
+
+    assertSketchEstimate(3);
+  }
+
+  @Test
+  public void testUpdateSketchIntArray()
+  {
+    updateSketch(
+        new int[]{1, 2},
+        new int[]{2, 3, 11},
+        new int[]{1, 2},
+        new int[]{1, 3, 13}
+    );
+
+    assertSketchEstimate(3);
+  }
+
+  @Test
+  public void testUpdateSketchLongArray()
+  {
+    updateSketch(
+        new long[]{1, 2},
+        new long[]{2, 3, 11},
+        new long[]{1, 2},
+        new long[]{1, 3, 13}
+    );
+
+    assertSketchEstimate(3);
+  }
+
+  private void updateSketch(final Object first, final Object... others)
+  {
+    // first != null check mimics how updateSketch is called: it's always 
guarded by a null check on the outer value.
+    if (first != null) {
+      HllSketchBuildAggregator.updateSketch(sketch, first);
+    }
+
+    for (final Object o : others) {
+      if (o != null) {
+        HllSketchBuildAggregator.updateSketch(sketch, o);
+      }
+    }
+  }
+
+  private void assertSketchEstimate(final long estimate)
+  {
+    Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1);
+  }
+}
diff --git 
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
 
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
index 9820b83..054693f 100644
--- 
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
+++ 
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java
@@ -517,6 +517,7 @@ public class SketchAggregationTest
     List<String> value = new ArrayList<>();
     value.add("foo");
     value.add(null);
+    value.add("");
     value.add("bar");
     List[] columnValues = new List[]{value};
     final TestObjectColumnSelector selector = new 
TestObjectColumnSelector(columnValues);

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to