cryptoe commented on a change in pull request #12078:
URL: https://github.com/apache/druid/pull/12078#discussion_r782133152
##########
File path:
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
##########
@@ -1306,6 +1309,531 @@ public void testMultiValueDimension()
TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim");
}
+ @Test
+ public void testMultiValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING.");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("a",
"preferred"), "rows", 2L, "idx", 282L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("b",
"preferred"), "rows", 2L, "idx", 230L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("e",
"preferred"), "rows", 2L, "idx", 324L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("h",
"preferred"), "rows", 2L, "idx", 233L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("m",
"preferred"), "rows", 6L, "idx", 5317L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("n",
"preferred"), "rows", 2L, "idx", 235L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("p",
"preferred"), "rows", 6L, "idx", 5405L),
+ makeRow(query, "2011-04-01", "alias",
ComparableStringArray.of("preferred", "t"), "rows", 4L, "idx", 420L)
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testSingleValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placement)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(
+ QueryRunnerTestHelper.ROWS_COUNT,
+ new LongSumAggregatorFactory("idx", "index")
+ )
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(query, "2011-04-01", "alias",
+ ComparableStringArray.of("preferred"), "rows", 26L, "idx",
12446L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsArrayWithOtherDims()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY),
+ new DefaultDimensionSpec("quality", "quality")
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("a", "preferred"),
+ "quality",
+ "automotive",
+ "rows",
+ 2L,
+ "idx",
+ 282L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("b", "preferred"),
+ "quality",
+ "business",
+ "rows",
+ 2L,
+ "idx",
+ 230L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("e", "preferred"),
+ "quality",
+ "entertainment",
+ "rows",
+ 2L,
+ "idx",
+ 324L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("h", "preferred"),
+ "quality",
+ "health",
+ "rows",
+ 2L,
+ "idx",
+ 233L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("m", "preferred"),
+ "quality",
+ "mezzanine",
+ "rows",
+ 6L,
+ "idx",
+ 5317L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("n", "preferred"),
+ "quality",
+ "news",
+ "rows",
+ 2L,
+ "idx",
+ 235L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("p", "preferred"),
+ "quality",
+ "premium",
+ "rows",
+ 6L,
+ "idx",
+ 5405L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "technology",
+ "rows",
+ 2L,
+ "idx",
+ 175L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "travel",
+ "rows",
+ 2L,
+ "idx",
+ 245L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dims-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("Not supported for multi-value
dimensions");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placementish", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testSingleValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.String cannot be cast to
[Ljava.util.Objects");
+ }
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placement", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred"),
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ ));
+ TestHelper.assertExpectedObjects(
+ expectedResults,
+ results,
+ "single-value-dims-groupby-arrays-as-string-arrays"
+ );
+ }
+
+
+ @Test
+ public void testNumericDimAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.Double cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+
+ @Test
+ public void testMultiValueVirtualDimAsString()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.Double cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("vo", "alias", ColumnType.STRING)
+ )
+ .setDimensions(
+ new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testExtractionStringSpecWithMultiValueVirtualDimAsInput()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 does not support dimension
selectors with unknown cardinality");
+ }
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING,
+ new SubstringDimExtractionFn(1, 1)
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ null,
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ "r",
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(
+ expectedResults,
+ results,
+ "multi-value-extraction-spec-as-string-dim-groupby-arrays"
+ );
+ }
+
+
+ @Test
+ public void testExtractionStringArraySpecWithMultiValueVirtualDimAsInput()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("Not supported for multi-value
dimensions");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY,
+ new SubstringDimExtractionFn(1, 1)
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testVirtualColumnNumericTypeAsStringArray()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage(
+ "org.apache.druid.segment.data.ComparableList cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "array(index)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testNestedGroupByWithArrays()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else {
+ expectedException.expect(IAE.class);
+ expectedException.expectMessage("Cannot create query type helper from
invalid type [ARRAY<STRING>]");
Review comment:
fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]