imply-cheddar commented on code in PR #13976:
URL: https://github.com/apache/druid/pull/13976#discussion_r1148702901
##########
sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java:
##########
@@ -94,9 +95,15 @@ public static DruidProjectOnUnnestRule instance()
public boolean matches(RelOptRuleCall call)
{
final Project rightP = call.rel(0);
- final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind();
- // allow rule to trigger only if there's a string CAST or numeric
literal cast
- return rightP.getProjects().size() == 1 && (rightProjectKind ==
SqlKind.CAST || rightProjectKind == SqlKind.LITERAL);
+ if (rightP.getChildExps().size() > 0) {
+ final SqlKind rightProjectKind =
rightP.getChildExps().get(0).getKind();
+ final SqlTypeName rightType =
rightP.getChildExps().get(0).getType().getSqlTypeName();
+ // allow rule to trigger only if there's a non-decimal CAST or numeric
literal cast
+ return rightP.getProjects().size() == 1 && ((rightProjectKind ==
SqlKind.CAST
+ && rightType !=
SqlTypeName.DECIMAL)
+ || rightProjectKind ==
SqlKind.LITERAL);
Review Comment:
Why? What's wrong with a decimal cast?
##########
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java:
##########
@@ -4168,4 +4169,118 @@ public void
testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumnD
)
);
}
+
+ @Test
+ public void testUnnestWithCountOnColumn()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT count(*) d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as
unnested (d3)",
+ QUERY_CONTEXT_UNNEST,
+ ImmutableList.of(
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ expressionVirtualColumn("j0.unnest", "\"dim3\"",
ColumnType.STRING),
+ null
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .context(QUERY_CONTEXT_UNNEST)
+ .aggregators(aggregators(new CountAggregatorFactory("a0")))
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{8L}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithGroupByHavingSelector()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3, COUNT(*) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) AS
unnested(d3) GROUP BY d3 HAVING d3='b'",
+ QUERY_CONTEXT_UNNEST,
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ expressionVirtualColumn("j0.unnest", "\"dim3\"",
ColumnType.STRING),
+ null
+ ))
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setContext(QUERY_CONTEXT_UNNEST)
+ .setDimensions(new DefaultDimensionSpec("j0.unnest",
"_d0", ColumnType.STRING))
+ .setGranularity(Granularities.ALL)
+ .setDimFilter(selector("j0.unnest", "b", null))
Review Comment:
Why does the HAVING clause get pushed down to a DimFilter here, but not all
the way down to the UnnestDataSource? If it's safe to push it down from the
HAVING to the WHERE, then it should be safe to push it down to the RHS of the
LogicalCorrelate as well.
##########
processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java:
##########
@@ -359,6 +359,59 @@ public void
test_pushdown_filters_unnested_dimension_with_unnest_adapters()
return null;
});
}
+
+
+ @Test
+ public void test_pushdown_filters_unnested_dimension_outside()
+ {
+ final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
+ new TestStorageAdapter(INCREMENTAL_INDEX),
+ new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME +
"\"", null, ExprMacroTable.nil()),
+ null
+ );
+
+ final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
+
+ final String inputColumn =
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
+
+ final Filter expectedPushDownFilter =
+ new SelectorDimFilter(inputColumn, "1", null).toFilter();
+
+
+ final Filter queryFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1",
null).toFilter();
+ final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
+ queryFilter,
+ unnestStorageAdapter.getInterval(),
+ VirtualColumns.EMPTY,
+ Granularities.ALL,
+ false,
+ null
+ );
+
+ final TestStorageAdapter base = (TestStorageAdapter)
unnestStorageAdapter.getBaseAdapter();
+ final Filter pushDownFilter = base.getPushDownFilter();
+
+ Assert.assertEquals(expectedPushDownFilter, pushDownFilter);
+ cursorSequence.accumulate(null, (accumulated, cursor) -> {
+ Assert.assertEquals(cursor.getClass(), PostJoinCursor.class);
+ final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter();
+ Assert.assertEquals(queryFilter, postFilter);
+
+ ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
+ DimensionSelector dimSelector =
factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
+ int count = 0;
+ while (!cursor.isDone()) {
+ Object dimSelectorVal = dimSelector.getObject();
+ if (dimSelectorVal == null) {
+ Assert.assertNull(dimSelectorVal);
+ }
Review Comment:
if it's null, assert that it's null? I'm not sure this is actually testing
anything?
##########
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java:
##########
@@ -4168,4 +4169,118 @@ public void
testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumnD
)
);
}
+
+ @Test
+ public void testUnnestWithCountOnColumn()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT count(*) d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as
unnested (d3)",
+ QUERY_CONTEXT_UNNEST,
+ ImmutableList.of(
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ expressionVirtualColumn("j0.unnest", "\"dim3\"",
ColumnType.STRING),
+ null
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .context(QUERY_CONTEXT_UNNEST)
+ .aggregators(aggregators(new CountAggregatorFactory("a0")))
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{8L}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithGroupByHavingSelector()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3, COUNT(*) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) AS
unnested(d3) GROUP BY d3 HAVING d3='b'",
Review Comment:
Maybe add a test that also has a WHERE clause along with the HAVING?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]