This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new fee509df2eb fix NestedDataColumnIndexerV4 to not report cardinality 
(#16507)
fee509df2eb is described below

commit fee509df2ebade0d6a17f96ee0637a27acedb45d
Author: Clint Wylie <[email protected]>
AuthorDate: Tue Jun 11 20:58:12 2024 -0700

    fix NestedDataColumnIndexerV4 to not report cardinality (#16507)
    
    * fix NestedDataColumnIndexerV4 to not report cardinality
    changes:
    * fix issue similar to #16489 but for NestedDataColumnIndexerV4, which can 
report STRING type if it only processes a single type of values. this should be 
less common than the auto indexer problem
    * fix some issues with sql benchmarks
---
 .../benchmark/query/SqlExpressionBenchmark.java    | 15 ++++++----
 .../druid/benchmark/query/SqlGroupByBenchmark.java |  9 ++++--
 .../benchmark/query/SqlNestedDataBenchmark.java    | 33 +++++++++++-----------
 .../druid/segment/NestedDataColumnIndexerV4.java   |  2 +-
 .../druid/segment/AutoTypeColumnIndexerTest.java   |  2 ++
 .../segment/NestedDataColumnIndexerV4Test.java     | 30 +++++++++++---------
 .../calcite/SqlVectorizedExpressionSanityTest.java |  8 +++---
 7 files changed, 56 insertions(+), 43 deletions(-)

diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index c04d4e31f95..d4c2e0906b4 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -360,13 +360,16 @@ public class SqlExpressionBenchmark
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           QUERIES.get(Integer.parseInt(query))
       );
+      log.info("non-vectorized and vectorized results match");
     }
-    catch (Throwable ignored) {
-      // the show must go on
+    catch (Throwable ex) {
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
+
     final String sql = QUERIES.get(Integer.parseInt(query));
 
     try (final DruidPlanner planner = 
plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, 
ImmutableMap.of("useNativeQueryExplain", true))) {
@@ -378,8 +381,8 @@ public class SqlExpressionBenchmark
                          .writeValueAsString(jsonMapper.readValue((String) 
planResult[0], List.class))
       );
     }
-    catch (JsonProcessingException ignored) {
-
+    catch (JsonProcessingException ex) {
+      log.warn(ex, "explain failed");
     }
 
     try (final DruidPlanner planner = 
plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
@@ -393,8 +396,8 @@ public class SqlExpressionBenchmark
       }
       log.info("Total result row count:" + rowCounter);
     }
-    catch (Throwable ignored) {
-
+    catch (Throwable ex) {
+      log.warn(ex, "failed to count rows");
     }
   }
 
diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
index 52745e62fb3..80b6647a0ee 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.java.util.common.logger.Logger;
 import org.apache.druid.math.expr.ExpressionProcessing;
 import org.apache.druid.query.DruidProcessingConfig;
 import org.apache.druid.query.QueryRunnerFactoryConglomerate;
@@ -91,6 +92,8 @@ public class SqlGroupByBenchmark
     NestedDataModule.registerHandlersAndSerde();
   }
 
+  private static final Logger log = new Logger(SqlGroupByBenchmark.class);
+
   private static final DruidProcessingConfig PROCESSING_CONFIG = new 
DruidProcessingConfig()
   {
     @Override
@@ -349,12 +352,14 @@ public class SqlGroupByBenchmark
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           sqlQuery(groupingDimension)
       );
+      log.info("non-vectorized and vectorized results match");
     }
-    catch (Throwable ignored) {
-      // the show must go on
+    catch (Throwable ex) {
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
   }
 
diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
index 0be1f4d52e6..c329e9da30e 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
@@ -20,7 +20,6 @@
 package org.apache.druid.benchmark.query;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -301,7 +300,7 @@ public class SqlNestedDataBenchmark
   private SqlEngine engine;
   @Nullable
   private PlannerFactory plannerFactory;
-  private Closer closer = Closer.create();
+  private final Closer closer = Closer.create();
 
   @Setup(Level.Trial)
   public void setup()
@@ -345,16 +344,19 @@ public class SqlNestedDataBenchmark
     }
     final QueryableIndex index;
     if ("auto".equals(schema)) {
-      List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
-                                                      .getDimensions()
-                                                      .stream()
-                                                      .map(x -> new 
AutoTypeColumnSchema(x.getName(), null))
-                                                      
.collect(Collectors.toList());
+      Iterable<DimensionSchema> columnSchemas = Iterables.concat(
+          schemaInfo.getDimensionsSpec()
+                    .getDimensions()
+                    .stream()
+                    .map(x -> new AutoTypeColumnSchema(x.getName(), null))
+                    .collect(Collectors.toList()),
+          Collections.singletonList(new AutoTypeColumnSchema("nested", null))
+      );
       index = segmentGenerator.generate(
           dataSegment,
           schemaInfo,
-          DimensionsSpec.builder().setDimensions(columnSchemas).build(),
-          TransformSpec.NONE,
+          
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
+          transformSpec,
           
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
           Granularities.NONE,
           rowsPerSegment
@@ -368,7 +370,7 @@ public class SqlNestedDataBenchmark
           dataSegment,
           schemaInfo,
           
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
-          TransformSpec.NONE,
+          transformSpec,
           
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
           Granularities.NONE,
           rowsPerSegment
@@ -405,12 +407,14 @@ public class SqlNestedDataBenchmark
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           QUERIES.get(Integer.parseInt(query))
       );
+      log.info("non-vectorized and vectorized results match");
     }
     catch (Throwable ex) {
-      log.warn(ex, "failed to sanity check");
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
 
     final String sql = QUERIES.get(Integer.parseInt(query));
@@ -424,11 +428,8 @@ public class SqlNestedDataBenchmark
                          .writeValueAsString(jsonMapper.readValue((String) 
planResult[0], List.class))
       );
     }
-    catch (JsonMappingException e) {
-      throw new RuntimeException(e);
-    }
-    catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    catch (JsonProcessingException ex) {
+      log.warn(ex, "explain failed");
     }
 
     try (final DruidPlanner planner = 
plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
diff --git 
a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
 
b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
index ecaf89e8483..f3c92806e4f 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
@@ -154,7 +154,7 @@ public class NestedDataColumnIndexerV4 implements 
DimensionIndexer<StructuredDat
   @Override
   public int getCardinality()
   {
-    return globalDictionary.getCardinality();
+    return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
   }
 
   @Override
diff --git 
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
 
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
index 3b0e4ec74fe..e397267907e 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
@@ -69,6 +69,7 @@ public class AutoTypeColumnIndexerTest extends 
InitializedNullHandlingTest
   public void testKeySizeEstimation()
   {
     AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null);
+    Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, 
indexer.getCardinality());
     int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
     Assert.assertEquals(baseCardinality, 
indexer.globalDictionary.getCardinality());
 
@@ -134,6 +135,7 @@ public class AutoTypeColumnIndexerTest extends 
InitializedNullHandlingTest
       Assert.assertEquals(48, key.getEffectiveSizeBytes());
       Assert.assertEquals(baseCardinality + 8, 
indexer.globalDictionary.getCardinality());
     }
+    Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, 
indexer.getCardinality());
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
 
b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
index ec691867a31..e670ae10003 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
@@ -68,71 +68,73 @@ public class NestedDataColumnIndexerV4Test extends 
InitializedNullHandlingTest
   public void testKeySizeEstimation()
   {
     NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4();
+    Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, 
indexer.getCardinality());
     int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
-    Assert.assertEquals(baseCardinality, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality, 
indexer.globalDictionary.getCardinality());
 
     EncodedKeyComponent<StructuredData> key;
     // new raw value, new field, new dictionary entry
     key = 
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", 
"foo"), false);
     Assert.assertEquals(228, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 1, 
indexer.globalDictionary.getCardinality());
     // adding same value only adds estimated size of value itself
     key = 
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", 
"foo"), false);
     Assert.assertEquals(112, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 1, 
indexer.globalDictionary.getCardinality());
     // new raw value, new field, new dictionary entry
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
     Assert.assertEquals(94, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 2, 
indexer.globalDictionary.getCardinality());
     // adding same value only adds estimated size of value itself
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
     Assert.assertEquals(16, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 2, 
indexer.globalDictionary.getCardinality());
     // new raw value, new dictionary entry
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
     Assert.assertEquals(48, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 3, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 3, 
indexer.globalDictionary.getCardinality());
 
     // new raw value, new fields
     key = 
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 
10L), false);
     Assert.assertEquals(276, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 5, 
indexer.globalDictionary.getCardinality());
     // new raw value, re-use fields and dictionary
     key = 
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 
10L), false);
     Assert.assertEquals(56, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 5, 
indexer.globalDictionary.getCardinality());
     // new raw value, new fields
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(
         ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
         false
     );
     Assert.assertEquals(286, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 5, 
indexer.globalDictionary.getCardinality());
     // new raw value
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(
         ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
         false
     );
     Assert.assertEquals(118, key.getEffectiveSizeBytes());
-    Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+    Assert.assertEquals(baseCardinality + 5, 
indexer.globalDictionary.getCardinality());
 
     key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
     if (NullHandling.replaceWithDefault()) {
       Assert.assertEquals(0, key.getEffectiveSizeBytes());
-      Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+      Assert.assertEquals(baseCardinality + 6, 
indexer.globalDictionary.getCardinality());
     } else {
       Assert.assertEquals(104, key.getEffectiveSizeBytes());
-      Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+      Assert.assertEquals(baseCardinality + 6, 
indexer.globalDictionary.getCardinality());
     }
 
     key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false);
     if (NullHandling.replaceWithDefault()) {
       Assert.assertEquals(16, key.getEffectiveSizeBytes());
-      Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+      Assert.assertEquals(baseCardinality + 6, 
indexer.globalDictionary.getCardinality());
     } else {
       Assert.assertEquals(48, key.getEffectiveSizeBytes());
-      Assert.assertEquals(baseCardinality + 7, indexer.getCardinality());
+      Assert.assertEquals(baseCardinality + 7, 
indexer.globalDictionary.getCardinality());
     }
+    Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, 
indexer.getCardinality());
   }
 
   @Test
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
index 6c87c321a42..304400bc3d8 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
@@ -178,10 +178,10 @@ public class SqlVectorizedExpressionSanityTest extends 
InitializedNullHandlingTe
   @Test
   public void testQuery()
   {
-    sanityTestVectorizedSqlQueries(PLANNER_FACTORY, query);
+    sanityTestVectorizedSqlQueries(ENGINE, PLANNER_FACTORY, query);
   }
 
-  public static void sanityTestVectorizedSqlQueries(PlannerFactory 
plannerFactory, String query)
+  public static void sanityTestVectorizedSqlQueries(SqlEngine engine, 
PlannerFactory plannerFactory, String query)
   {
     final Map<String, Object> vector = ImmutableMap.of(
             QueryContexts.VECTORIZE_KEY, "force",
@@ -193,8 +193,8 @@ public class SqlVectorizedExpressionSanityTest extends 
InitializedNullHandlingTe
     );
 
     try (
-        final DruidPlanner vectorPlanner = 
plannerFactory.createPlannerForTesting(ENGINE, query, vector);
-        final DruidPlanner nonVectorPlanner = 
plannerFactory.createPlannerForTesting(ENGINE, query, nonvector)
+        final DruidPlanner vectorPlanner = 
plannerFactory.createPlannerForTesting(engine, query, vector);
+        final DruidPlanner nonVectorPlanner = 
plannerFactory.createPlannerForTesting(engine, query, nonvector)
     ) {
       final PlannerResult vectorPlan = vectorPlanner.plan();
       final PlannerResult nonVectorPlan = nonVectorPlanner.plan();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to