This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b746bf9129 fix virtual column cycle bug, sql virtual column optimize
bug (#12576)
b746bf9129 is described below
commit b746bf91298e704c16375a60df73bd73fe4e2309
Author: Clint Wylie <[email protected]>
AuthorDate: Mon May 30 23:51:21 2022 -0700
fix virtual column cycle bug, sql virtual column optimize bug (#12576)
* fix virtual column cycle bug, sql virtual column optimize bug
* more test
---
.../org/apache/druid/segment/VirtualColumns.java | 7 +-
.../druid/segment/virtual/VirtualColumnsTest.java | 39 +++++
.../sql/calcite/expression/DruidExpression.java | 2 +-
.../MultiValueStringOperatorConversions.java | 8 +-
.../calcite/CalciteMultiValueStringQueryTest.java | 180 +++++++++++++++++++++
5 files changed, 229 insertions(+), 7 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
index bbbd6b0e1f..3384222df7 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
@@ -464,12 +464,11 @@ public class VirtualColumns implements Cacheable
: Sets.newHashSet(columnNames);
for (String columnName : virtualColumn.requiredColumns()) {
- if (!nextSet.add(columnName)) {
- throw new IAE("Self-referential column[%s]", columnName);
- }
-
final VirtualColumn dependency = getVirtualColumn(columnName);
if (dependency != null) {
+ if (!nextSet.add(columnName)) {
+ throw new IAE("Self-referential column[%s]", columnName);
+ }
detectCycles(dependency, nextSet);
}
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
index cb46c634d0..fd3d037057 100644
---
a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
@@ -392,6 +392,45 @@ public class VirtualColumnsTest extends
InitializedNullHandlingTest
);
}
+ @Test
+ public void testCompositeVirtualColumnsCycles()
+ {
+ final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1
+ x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1
+ y", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0",
"case_searched(notnull(1 + x), v1, v2)", ColumnType.LONG,
TestExprMacroTable.INSTANCE);
+ final VirtualColumns virtualColumns =
VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
+
+ Assert.assertTrue(virtualColumns.exists("v0"));
+ Assert.assertTrue(virtualColumns.exists("v1"));
+ Assert.assertTrue(virtualColumns.exists("v2"));
+ }
+
+ @Test
+ public void testCompositeVirtualColumnsCyclesSiblings()
+ {
+ final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1
+ x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1
+ y", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0",
"case_searched(notnull(v1), v1, v2)", ColumnType.LONG,
TestExprMacroTable.INSTANCE);
+ final VirtualColumns virtualColumns =
VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
+
+ Assert.assertTrue(virtualColumns.exists("v0"));
+ Assert.assertTrue(virtualColumns.exists("v1"));
+ Assert.assertTrue(virtualColumns.exists("v2"));
+ }
+
+ @Test
+ public void testCompositeVirtualColumnsCyclesTree()
+ {
+ final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1
+ x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1
+ v1", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0",
"v1 + v2", ColumnType.LONG, TestExprMacroTable.INSTANCE);
+ final VirtualColumns virtualColumns =
VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
+
+ Assert.assertTrue(virtualColumns.exists("v0"));
+ Assert.assertTrue(virtualColumns.exists("v1"));
+ Assert.assertTrue(virtualColumns.exists("v2"));
+ }
+
private VirtualColumns makeVirtualColumns()
{
final ExpressionVirtualColumn expr = new ExpressionVirtualColumn(
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
index 030668ce63..950fc93783 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java
@@ -495,7 +495,7 @@ public class DruidExpression
{
List<DruidExpression> list = new ArrayList<>(expressions.size());
for (DruidExpression expr : expressions) {
- list.add(visit(expr));
+ list.add(visit(expr.visit(this)));
}
return list;
}
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
index 25d4bb8a10..1d553f7363 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java
@@ -19,7 +19,7 @@
package org.apache.druid.sql.calcite.expression.builtin;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
@@ -44,6 +44,8 @@ import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import javax.annotation.Nullable;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
/**
@@ -340,6 +342,8 @@ public class MultiValueStringOperatorConversions
if (lit == null || lit.length == 0) {
return null;
}
+ HashSet<String> literals = Sets.newHashSetWithExpectedSize(lit.length);
+ literals.addAll(Arrays.asList(lit));
final DruidExpression.ExpressionGenerator builder = (args) -> {
final StringBuilder expressionBuilder;
@@ -364,7 +368,7 @@ public class MultiValueStringOperatorConversions
(name, outputType, expression, macroTable) -> new
ListFilteredVirtualColumn(
name,
druidExpressions.get(0).getSimpleExtraction().toDimensionSpec(druidExpressions.get(0).getDirectColumn(),
outputType),
- ImmutableSet.copyOf(lit),
+ literals,
isAllowList()
)
);
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
index 0c0937d35f..ca591ead9f 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java
@@ -46,8 +46,10 @@ import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
{
@@ -1195,6 +1197,184 @@ public class CalciteMultiValueStringQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testMultiValueListFilterComposedNested() throws Exception
+ {
+ // Cannot vectorize due to usage of expressions.
+ cannotVectorize();
+
+ testQuery(
+ "SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY['b']), 'no b'), SUM(cnt)
FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(
+ expressionVirtualColumn(
+ "v0",
+ "case_searched(notnull(\"v1\"),\"v1\",'no b')",
+ ColumnType.STRING
+ ),
+ new ListFilteredVirtualColumn(
+ "v1",
+ DefaultDimensionSpec.of("dim3"),
+ ImmutableSet.of("b"),
+ true
+ )
+ )
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.STRING)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ // this is a bug
+ // instead of default values it should actually be 'no b';
+ //
+ // it happens when using 'notnull' on the mv-filtered virtual column
because deferred expression selector
+ // returns a 0 sized row, which is treated as a missing value by the
grouping and the expression which would
+ // evaluate it and return 'no b' is never evaluated (because we don't
add a null value in the forwarding
+ // dictionary even if it exists in the underlying column).
+ //
+ // if the 'notnull' was instead using the array filtering fallback
expression
+ // case_searched(notnull(filter((x) -> array_contains(array('b'), x),
\"dim3\")),\"v1\",'no b')
+ // where it doesn't use the deferred selector because it is no longer
a single input expression, it would still
+ // evaluate to null because the filter expression never returns null,
only an empty array, which is not null,
+ // so it evaluates 'v1' which of course is null because it is an empty
row.
+ useDefault ? ImmutableList.of(
+ new Object[]{"", 4L},
+ new Object[]{"b", 2L}
+ ) : ImmutableList.of(
+ new Object[]{null, 4L},
+ new Object[]{"b", 2L}
+ )
+ );
+ }
+
+ @Test
+ public void testMultiValueListFilterComposedNested2Input() throws Exception
+ {
+ // Cannot vectorize due to usage of expressions.
+ cannotVectorize();
+
+ testQuery(
+ "SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY['b']), dim1), SUM(cnt)
FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(
+ expressionVirtualColumn(
+ "v0",
+
"case_searched(notnull(\"v1\"),\"v1\",\"dim1\")",
+ ColumnType.STRING
+ ),
+ new ListFilteredVirtualColumn(
+ "v1",
+ DefaultDimensionSpec.of("dim3"),
+ ImmutableSet.of("b"),
+ true
+ )
+ )
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.STRING)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"b", 2L},
+ new Object[]{"1", 1L},
+ new Object[]{"2", 1L},
+ new Object[]{"abc", 1L},
+ new Object[]{"def", 1L}
+ )
+ );
+ }
+
+ @Test
+ public void testMultiValueListFilterComposedNestedNullLiteral() throws
Exception
+ {
+ // Cannot vectorize due to usage of expressions.
+ cannotVectorize();
+ Set<String> filter = new HashSet<>();
+ filter.add(null);
+ filter.add("b");
+
+ testQuery(
+ "SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY[NULL, 'b']), 'no b'),
SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE3)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setVirtualColumns(
+ expressionVirtualColumn(
+ "v0",
+ "case_searched(notnull(\"v1\"),\"v1\",'no b')",
+ ColumnType.STRING
+ ),
+ new ListFilteredVirtualColumn(
+ "v1",
+ DefaultDimensionSpec.of("dim3"),
+ filter,
+ true
+ )
+ )
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "_d0",
ColumnType.STRING)
+ )
+ )
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setLimitSpec(new DefaultLimitSpec(
+ ImmutableList.of(new OrderByColumnSpec(
+ "a0",
+ OrderByColumnSpec.Direction.DESCENDING,
+ StringComparators.NUMERIC
+ )),
+ Integer.MAX_VALUE
+ ))
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ // unfortunately, unable to work around the bug by adding nulls to the
allow list
+ useDefault ? ImmutableList.of(
+ new Object[]{"", 2L},
+ new Object[]{"b", 2L},
+ new Object[]{"no b", 2L}
+ ) : ImmutableList.of(
+ new Object[]{null, 3L},
+ new Object[]{"b", 2L},
+ new Object[]{"no b", 1L}
+ )
+ );
+ }
+
@Test
public void testMultiValueListFilterComposedDeny() throws Exception
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]