This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new f9b406c add backwards compatibility mode for multi-value string array
null value coercion (#12210)
f9b406c is described below
commit f9b406c8f2c2bc84f54d2e8f5f2a2720f49eeba3
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Jan 31 22:38:15 2022 -0800
add backwards compatibility mode for multi-value string array null value
coercion (#12210)
---
.../druid/math/expr/ExpressionProcessing.java | 52 +++++++++++------
.../math/expr/ExpressionProcessingConfig.java | 47 +++++++++------
.../druid/segment/virtual/ExpressionSelectors.java | 42 ++++++++-----
.../druid/query/MultiValuedDimensionTest.java | 52 +++++++++++++++++
.../druid/sql/calcite/CalciteArraysQueryTest.java | 3 -
.../calcite/CalciteMultiValueStringQueryTest.java | 68 +++++++++++++++++++++-
.../sql/calcite/CalciteParameterQueryTest.java | 3 -
7 files changed, 210 insertions(+), 57 deletions(-)
diff --git
a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java
b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java
index c62f7f4..905e085 100644
--- a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java
+++ b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java
@@ -48,13 +48,19 @@ public class ExpressionProcessing
@VisibleForTesting
public static void initializeForTests(@Nullable Boolean allowNestedArrays)
{
- INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null);
+ INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null,
null);
}
@VisibleForTesting
public static void initializeForStrictBooleansTests(boolean useStrict)
{
- INSTANCE = new ExpressionProcessingConfig(null, useStrict, null);
+ INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null);
+ }
+
+ @VisibleForTesting
+ public static void initializeForHomogenizeNullMultiValueStrings()
+ {
+ INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
}
/**
@@ -62,35 +68,47 @@ public class ExpressionProcessing
*/
public static boolean allowNestedArrays()
{
- // this should only be null in a unit test context
- // in production this will be injected by the expression processing module
- if (INSTANCE == null) {
- throw new IllegalStateException(
- "Expressions module not initialized, call
ExpressionProcessing.initializeForTests()"
- );
- }
+ checkInitialized();
return INSTANCE.allowNestedArrays();
}
-
+ /**
+ * All boolean expressions are {@link ExpressionType#LONG}
+ */
public static boolean useStrictBooleans()
{
- // this should only be null in a unit test context, in production this
will be injected by the null handling module
- if (INSTANCE == null) {
- throw new IllegalStateException("ExpressionProcessing module not
initialized, call ExpressionProcessing.initializeForTests()");
- }
+ checkInitialized();
return INSTANCE.isUseStrictBooleans();
}
-
+ /**
+ * All {@link ExprType#ARRAY} values will be converted to {@link
ExpressionType#STRING} by their column selectors
+ * (not within expression processing) to be treated as multi-value strings
instead of native arrays.
+ */
public static boolean processArraysAsMultiValueStrings()
{
+ checkInitialized();
+ return INSTANCE.processArraysAsMultiValueStrings();
+ }
+
+ /**
+ * All multi-value string expression input values of 'null', '[]', and
'[null]' will be coerced to '[null]'. If false,
+ * (the default) this will only be done when single value expressions are
implicitly mapped across multi-value rows,
+ * so that the single valued expression will always be evaluated with an
input value of 'null'
+ */
+ public static boolean isHomogenizeNullMultiValueStringArrays()
+ {
+ checkInitialized();
+ return INSTANCE.isHomogenizeNullMultiValueStringArrays();
+ }
+
+ private static void checkInitialized()
+ {
// this should only be null in a unit test context, in production this
will be injected by the null handling module
if (INSTANCE == null) {
throw new IllegalStateException(
- "ExpressionProcessing module not initialized, call
ExpressionProcessing.initializeForTests()"
+ "ExpressionProcessing module not initialized, call
ExpressionProcessing.initializeForTests() or one of its variants"
);
}
- return INSTANCE.processArraysAsMultiValueStrings();
}
}
diff --git
a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java
b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java
index 1009a9d..b832578 100644
---
a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java
+++
b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java
@@ -29,8 +29,11 @@ public class ExpressionProcessingConfig
public static final String NESTED_ARRAYS_CONFIG_STRING =
"druid.expressions.allowNestedArrays";
public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING =
"druid.expressions.useStrictBooleans";
// Coerce arrays to multi value strings
- public static final String
- PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
"druid.expressions.processArraysAsMultiValueStrings";
+ public static final String
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
+ "druid.expressions.processArraysAsMultiValueStrings";
+ // Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with
0.22 and earlier
+ public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
+ "druid.expressions.homogenizeNullMultiValueStringArrays";
@JsonProperty("allowNestedArrays")
private final boolean allowNestedArrays;
@@ -41,27 +44,27 @@ public class ExpressionProcessingConfig
@JsonProperty("processArraysAsMultiValueStrings")
private final boolean processArraysAsMultiValueStrings;
+ @JsonProperty("homogenizeNullMultiValueStringArrays")
+ private final boolean homogenizeNullMultiValueStringArrays;
+
@JsonCreator
public ExpressionProcessingConfig(
@JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays,
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
- @JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean
processArraysAsMultiValueStrings
+ @JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean
processArraysAsMultiValueStrings,
+ @JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean
homogenizeNullMultiValueStringArrays
)
{
- this.allowNestedArrays = allowNestedArrays == null
- ?
Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false"))
- : allowNestedArrays;
- if (useStrictBooleans == null) {
- this.useStrictBooleans = Boolean.parseBoolean(
- System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false")
- );
- } else {
- this.useStrictBooleans = useStrictBooleans;
- }
- this.processArraysAsMultiValueStrings
- = processArraysAsMultiValueStrings == null
- ?
Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING,
"false"))
- : processArraysAsMultiValueStrings;
+ this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays,
NESTED_ARRAYS_CONFIG_STRING);
+ this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans,
NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING);
+ this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse(
+ processArraysAsMultiValueStrings,
+ PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING
+ );
+ this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse(
+ homogenizeNullMultiValueStringArrays,
+ HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
+ );
}
public boolean allowNestedArrays()
@@ -78,4 +81,14 @@ public class ExpressionProcessingConfig
{
return processArraysAsMultiValueStrings;
}
+
+ public boolean isHomogenizeNullMultiValueStringArrays()
+ {
+ return homogenizeNullMultiValueStringArrays;
+ }
+
+ private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value,
String property)
+ {
+ return value != null ? value :
Boolean.valueOf(System.getProperty(property, "false"));
+ }
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index f9ff05c..ec5f8bf 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.NonnullPair;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@@ -256,30 +257,43 @@ public class ExpressionSelectors
final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new
HashMap<>();
for (String columnName : columns) {
- final ColumnCapabilities columnCapabilities =
columnSelectorFactory.getColumnCapabilities(columnName);
- final boolean multiVal = columnCapabilities != null &&
columnCapabilities.hasMultipleValues().isTrue();
+ final ColumnCapabilities capabilities =
columnSelectorFactory.getColumnCapabilities(columnName);
+ final boolean multiVal = capabilities != null &&
capabilities.hasMultipleValues().isTrue();
final Supplier<Object> supplier;
- final ExpressionType expressionType =
ExpressionType.fromColumnType(columnCapabilities);
-
- if (columnCapabilities == null ||
- columnCapabilities.isArray() ||
- (plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) &&
!plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))
- ) {
- // Unknown ValueType or array type. Try making an Object selector and
see if that gives us anything useful.
+ final ExpressionType expressionType =
ExpressionType.fromColumnType(capabilities);
+
+ final boolean useObjectSupplierForMultiValueStringArray =
+ capabilities != null
+ // if homogenizing null multi-value string arrays, or if a single
valued function that must be applied across
+ // multi-value rows, we can just use the dimension selector, which
has the homogenization behavior built-in
+ && ((!capabilities.is(ValueType.STRING))
+ || (capabilities.is(ValueType.STRING)
+ &&
!ExpressionProcessing.isHomogenizeNullMultiValueStringArrays()
+ && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
+ )
+ )
+ // expression has array output
+ && plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
+
+ final boolean homogenizeNullMultiValueStringArrays =
+ plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) ||
ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
+
+ if (capabilities == null || capabilities.isArray() ||
useObjectSupplierForMultiValueStringArray) {
+ // Unknown type, array type, or output array uses an Object selector
and see if that gives anything useful
supplier = supplierFromObjectSelector(
columnSelectorFactory.makeColumnValueSelector(columnName),
- plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
+ homogenizeNullMultiValueStringArrays
);
- } else if (columnCapabilities.is(ValueType.FLOAT)) {
+ } else if (capabilities.is(ValueType.FLOAT)) {
ColumnValueSelector<?> selector =
columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getFloat);
- } else if (columnCapabilities.is(ValueType.LONG)) {
+ } else if (capabilities.is(ValueType.LONG)) {
ColumnValueSelector<?> selector =
columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getLong);
- } else if (columnCapabilities.is(ValueType.DOUBLE)) {
+ } else if (capabilities.is(ValueType.DOUBLE)) {
ColumnValueSelector<?> selector =
columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getDouble);
- } else if (columnCapabilities.is(ValueType.STRING)) {
+ } else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new
DefaultDimensionSpec(columnName, columnName)),
multiVal
diff --git
a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
index 80b5768..c6ef667 100644
---
a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
+++
b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java
@@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@@ -503,6 +504,57 @@ public class MultiValuedDimensionTest extends
InitializedNullHandlingTest
}
@Test
+ public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder()
+ {
+ try {
+ ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("GroupBy v1 does not support dimension
selectors with unknown cardinality.");
+ }
+ GroupByQuery query = GroupByQuery
+ .builder()
+ .setDataSource("xx")
+ .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
+ .setVirtualColumns(
+ new ExpressionVirtualColumn(
+ "texpr",
+ "cartesian_map((x,y) -> concat(x, y), tags, othertags)",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ )
+ )
+ .setLimit(5)
+ .setAggregatorSpecs(new CountAggregatorFactory("count"))
+ .setContext(context)
+ .build();
+
+ Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(
+ ImmutableList.of(
+ new QueryableIndexSegment(queryableIndex,
SegmentId.dummy("sid1")),
+ new IncrementalIndexSegment(incrementalIndex,
SegmentId.dummy("sid2"))
+ ),
+ query
+ );
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970",
"texpr", "t1u1", "count", 2L),
+ GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970",
"texpr", "t1u2", "count", 2L),
+ GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970",
"texpr", "t2u1", "count", 2L),
+ GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970",
"texpr", "t2u2", "count", 2L),
+ GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970",
"texpr", "t3u1", "count", 2L)
+ );
+
+ TestHelper.assertExpectedObjects(expectedResults, result.toList(),
"expr-multi-multi");
+ }
+ finally {
+ ExpressionProcessing.initializeForTests(null);
+ }
+ }
+
+ @Test
public void testGroupByExpressionMultiMultiAuto()
{
if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index 6e42936..0d20bad 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -21,7 +21,6 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
-import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.HumanReadableBytes;
import org.apache.druid.java.util.common.IAE;
@@ -56,7 +55,6 @@ import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
-import org.junit.runner.RunWith;
import java.util.Arrays;
import java.util.Collections;
@@ -65,7 +63,6 @@ import java.util.List;
/**
* Tests for array functions and array types
*/
-@RunWith(JUnitParamsRunner.class)
public class CalciteArraysQueryTest extends BaseCalciteQueryTest
{
// test some query stuffs, sort of limited since no native array column
types so either need to use constructor or
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
index 49b9591..33b105b 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
@@ -21,9 +21,9 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
-import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.Druids;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@@ -42,11 +42,9 @@ import org.apache.druid.sql.SqlPlanningException;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
-import org.junit.runner.RunWith;
import java.util.List;
-@RunWith(JUnitParamsRunner.class)
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
{
// various queries on multi-valued string dimensions using them like strings
@@ -656,6 +654,70 @@ public class CalciteMultiValueStringQueryTest extends
BaseCalciteQueryTest
}
@Test
+ public void testMultiValueStringConcatBackwardsCompat0dot22andOlder() throws
Exception
+ {
+ try {
+ ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
+ // Cannot vectorize due to usage of expressions.
+ cannotVectorize();
+
+ ImmutableList<Object[]> results;
+ if (useDefault) {
+ results = ImmutableList.of(
+ new Object[]{"", 6L},
+ new Object[]{"b", 4L},
+ new Object[]{"a", 2L},
+ new Object[]{"c", 2L},
+ new Object[]{"d", 2L}
+ );
+ } else {
+ results = ImmutableList.of(
+ new Object[]{null, 4L},
+ new Object[]{"b", 4L},
+ new Object[]{"", 2L},
+ new Object[]{"a", 2L},
+ new Object[]{"c", 2L},
+ new Object[]{"d", 2L}
+ );
+ }
+ testQuery(
+ "SELECT MV_CONCAT(dim3, dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1
ORDER BY 2 DESC",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(expressionVirtualColumn(
+ "v0",
+ "array_concat(\"dim3\",\"dim3\")",
+ ColumnType.STRING
+ ))
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.STRING)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ results
+ );
+ }
+ finally {
+ ExpressionProcessing.initializeForTests(null);
+ }
+ }
+
+ @Test
public void testMultiValueStringOffset() throws Exception
{
// Cannot vectorize due to usage of expressions.
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java
index 8994806..2750032 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java
@@ -20,7 +20,6 @@
package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
-import junitparams.JUnitParamsRunner;
import org.apache.calcite.avatica.SqlType;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
@@ -43,7 +42,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.http.SqlParameter;
import org.junit.Test;
-import org.junit.runner.RunWith;
import java.util.ArrayList;
import java.util.List;
@@ -54,7 +52,6 @@ import java.util.List;
* were merely chosen to produce a selection of parameter types and positions
within query expressions and have been
* renamed to reflect this
*/
-@RunWith(JUnitParamsRunner.class)
public class CalciteParameterQueryTest extends BaseCalciteQueryTest
{
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]