This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new fee509df2eb fix NestedDataColumnIndexerV4 to not report cardinality
(#16507)
fee509df2eb is described below
commit fee509df2ebade0d6a17f96ee0637a27acedb45d
Author: Clint Wylie <[email protected]>
AuthorDate: Tue Jun 11 20:58:12 2024 -0700
fix NestedDataColumnIndexerV4 to not report cardinality (#16507)
* fix NestedDataColumnIndexerV4 to not report cardinality
changes:
* fix issue similar to #16489 but for NestedDataColumnIndexerV4, which can
report STRING type if it only processes a single type of values. this should be
less common than the auto indexer problem
* fix some issues with sql benchmarks
---
.../benchmark/query/SqlExpressionBenchmark.java | 15 ++++++----
.../druid/benchmark/query/SqlGroupByBenchmark.java | 9 ++++--
.../benchmark/query/SqlNestedDataBenchmark.java | 33 +++++++++++-----------
.../druid/segment/NestedDataColumnIndexerV4.java | 2 +-
.../druid/segment/AutoTypeColumnIndexerTest.java | 2 ++
.../segment/NestedDataColumnIndexerV4Test.java | 30 +++++++++++---------
.../calcite/SqlVectorizedExpressionSanityTest.java | 8 +++---
7 files changed, 56 insertions(+), 43 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index c04d4e31f95..d4c2e0906b4 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -360,13 +360,16 @@ public class SqlExpressionBenchmark
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+ engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
+ log.info("non-vectorized and vectorized results match");
}
- catch (Throwable ignored) {
- // the show must go on
+ catch (Throwable ex) {
+ log.warn(ex, "non-vectorized and vectorized results do not match");
}
+
final String sql = QUERIES.get(Integer.parseInt(query));
try (final DruidPlanner planner =
plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql,
ImmutableMap.of("useNativeQueryExplain", true))) {
@@ -378,8 +381,8 @@ public class SqlExpressionBenchmark
.writeValueAsString(jsonMapper.readValue((String)
planResult[0], List.class))
);
}
- catch (JsonProcessingException ignored) {
-
+ catch (JsonProcessingException ex) {
+ log.warn(ex, "explain failed");
}
try (final DruidPlanner planner =
plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
@@ -393,8 +396,8 @@ public class SqlExpressionBenchmark
}
log.info("Total result row count:" + rowCounter);
}
- catch (Throwable ignored) {
-
+ catch (Throwable ex) {
+ log.warn(ex, "failed to count rows");
}
}
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
index 52745e62fb3..80b6647a0ee 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
@@ -91,6 +92,8 @@ public class SqlGroupByBenchmark
NestedDataModule.registerHandlersAndSerde();
}
+ private static final Logger log = new Logger(SqlGroupByBenchmark.class);
+
private static final DruidProcessingConfig PROCESSING_CONFIG = new
DruidProcessingConfig()
{
@Override
@@ -349,12 +352,14 @@ public class SqlGroupByBenchmark
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+ engine,
plannerFactory,
sqlQuery(groupingDimension)
);
+ log.info("non-vectorized and vectorized results match");
}
- catch (Throwable ignored) {
- // the show must go on
+ catch (Throwable ex) {
+ log.warn(ex, "non-vectorized and vectorized results do not match");
}
}
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
index 0be1f4d52e6..c329e9da30e 100644
---
a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
@@ -20,7 +20,6 @@
package org.apache.druid.benchmark.query;
import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@@ -301,7 +300,7 @@ public class SqlNestedDataBenchmark
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
- private Closer closer = Closer.create();
+ private final Closer closer = Closer.create();
@Setup(Level.Trial)
public void setup()
@@ -345,16 +344,19 @@ public class SqlNestedDataBenchmark
}
final QueryableIndex index;
if ("auto".equals(schema)) {
- List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
- .getDimensions()
- .stream()
- .map(x -> new
AutoTypeColumnSchema(x.getName(), null))
-
.collect(Collectors.toList());
+ Iterable<DimensionSchema> columnSchemas = Iterables.concat(
+ schemaInfo.getDimensionsSpec()
+ .getDimensions()
+ .stream()
+ .map(x -> new AutoTypeColumnSchema(x.getName(), null))
+ .collect(Collectors.toList()),
+ Collections.singletonList(new AutoTypeColumnSchema("nested", null))
+ );
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
- DimensionsSpec.builder().setDimensions(columnSchemas).build(),
- TransformSpec.NONE,
+
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
+ transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
@@ -368,7 +370,7 @@ public class SqlNestedDataBenchmark
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
- TransformSpec.NONE,
+ transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
@@ -405,12 +407,14 @@ public class SqlNestedDataBenchmark
try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+ engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
+ log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
- log.warn(ex, "failed to sanity check");
+ log.warn(ex, "non-vectorized and vectorized results do not match");
}
final String sql = QUERIES.get(Integer.parseInt(query));
@@ -424,11 +428,8 @@ public class SqlNestedDataBenchmark
.writeValueAsString(jsonMapper.readValue((String)
planResult[0], List.class))
);
}
- catch (JsonMappingException e) {
- throw new RuntimeException(e);
- }
- catch (JsonProcessingException e) {
- throw new RuntimeException(e);
+ catch (JsonProcessingException ex) {
+ log.warn(ex, "explain failed");
}
try (final DruidPlanner planner =
plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
diff --git
a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
index ecaf89e8483..f3c92806e4f 100644
---
a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
+++
b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java
@@ -154,7 +154,7 @@ public class NestedDataColumnIndexerV4 implements
DimensionIndexer<StructuredDat
@Override
public int getCardinality()
{
- return globalDictionary.getCardinality();
+ return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}
@Override
diff --git
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
index 3b0e4ec74fe..e397267907e 100644
---
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
@@ -69,6 +69,7 @@ public class AutoTypeColumnIndexerTest extends
InitializedNullHandlingTest
public void testKeySizeEstimation()
{
AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer("test", null);
+ Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN,
indexer.getCardinality());
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
Assert.assertEquals(baseCardinality,
indexer.globalDictionary.getCardinality());
@@ -134,6 +135,7 @@ public class AutoTypeColumnIndexerTest extends
InitializedNullHandlingTest
Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 8,
indexer.globalDictionary.getCardinality());
}
+ Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN,
indexer.getCardinality());
}
@Test
diff --git
a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
index ec691867a31..e670ae10003 100644
---
a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
+++
b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java
@@ -68,71 +68,73 @@ public class NestedDataColumnIndexerV4Test extends
InitializedNullHandlingTest
public void testKeySizeEstimation()
{
NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4();
+ Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN,
indexer.getCardinality());
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
- Assert.assertEquals(baseCardinality, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality,
indexer.globalDictionary.getCardinality());
EncodedKeyComponent<StructuredData> key;
// new raw value, new field, new dictionary entry
key =
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x",
"foo"), false);
Assert.assertEquals(228, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 1,
indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself
key =
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x",
"foo"), false);
Assert.assertEquals(112, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 1,
indexer.globalDictionary.getCardinality());
// new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(94, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 2,
indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(16, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 2,
indexer.globalDictionary.getCardinality());
// new raw value, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
Assert.assertEquals(48, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 3, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 3,
indexer.globalDictionary.getCardinality());
// new raw value, new fields
key =
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L,
10L), false);
Assert.assertEquals(276, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 5,
indexer.globalDictionary.getCardinality());
// new raw value, re-use fields and dictionary
key =
indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L,
10L), false);
Assert.assertEquals(56, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 5,
indexer.globalDictionary.getCardinality());
// new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false
);
Assert.assertEquals(286, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 5,
indexer.globalDictionary.getCardinality());
// new raw value
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false
);
Assert.assertEquals(118, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 5,
indexer.globalDictionary.getCardinality());
key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 6,
indexer.globalDictionary.getCardinality());
} else {
Assert.assertEquals(104, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 6,
indexer.globalDictionary.getCardinality());
}
key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false);
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(16, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 6,
indexer.globalDictionary.getCardinality());
} else {
Assert.assertEquals(48, key.getEffectiveSizeBytes());
- Assert.assertEquals(baseCardinality + 7, indexer.getCardinality());
+ Assert.assertEquals(baseCardinality + 7,
indexer.globalDictionary.getCardinality());
}
+ Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN,
indexer.getCardinality());
}
@Test
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
index 6c87c321a42..304400bc3d8 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/SqlVectorizedExpressionSanityTest.java
@@ -178,10 +178,10 @@ public class SqlVectorizedExpressionSanityTest extends
InitializedNullHandlingTe
@Test
public void testQuery()
{
- sanityTestVectorizedSqlQueries(PLANNER_FACTORY, query);
+ sanityTestVectorizedSqlQueries(ENGINE, PLANNER_FACTORY, query);
}
- public static void sanityTestVectorizedSqlQueries(PlannerFactory
plannerFactory, String query)
+ public static void sanityTestVectorizedSqlQueries(SqlEngine engine,
PlannerFactory plannerFactory, String query)
{
final Map<String, Object> vector = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, "force",
@@ -193,8 +193,8 @@ public class SqlVectorizedExpressionSanityTest extends
InitializedNullHandlingTe
);
try (
- final DruidPlanner vectorPlanner =
plannerFactory.createPlannerForTesting(ENGINE, query, vector);
- final DruidPlanner nonVectorPlanner =
plannerFactory.createPlannerForTesting(ENGINE, query, nonvector)
+ final DruidPlanner vectorPlanner =
plannerFactory.createPlannerForTesting(engine, query, vector);
+ final DruidPlanner nonVectorPlanner =
plannerFactory.createPlannerForTesting(engine, query, nonvector)
) {
final PlannerResult vectorPlan = vectorPlanner.plan();
final PlannerResult nonVectorPlan = nonVectorPlanner.plan();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]