clintropolis commented on a change in pull request #12078:
URL: https://github.com/apache/druid/pull/12078#discussion_r781661764
##########
File path: core/src/main/java/org/apache/druid/math/expr/Function.java
##########
@@ -2977,6 +2977,67 @@ public ExpressionType
getOutputType(Expr.InputBindingInspector inspector, List<E
}
}
+ class MVToArrayFunction implements Function
+ {
+ @Override
+ public String name()
+ {
+ return "mv_to_array";
+ }
+
+ @Override
+ public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
+ {
+ return args.get(0).eval(bindings);
+ }
+
+ @Override
+ public void validateArguments(List<Expr> args)
+ {
+ if (args.size() != 1) {
+ throw new IAE("Function[%s] needs exactly 1 argument of type String",
name());
+ }
+ IdentifierExpr expr = args.get(0).getIdentifierExprIfIdentifierExpr();
+
+ if (expr == null) {
+ throw new IAE(
+ "Arg %s should be an identifier expression ie refer to columns
directally. Use array[] instead",
Review comment:
```suggestion
"Arg %s should be an identifier expression ie refer to columns
directly. Use array[] instead",
```
also, in the native expression layer the syntax is actually `array(...)`
##########
File path: core/src/main/java/org/apache/druid/math/expr/Function.java
##########
@@ -2977,6 +2977,67 @@ public ExpressionType
getOutputType(Expr.InputBindingInspector inspector, List<E
}
}
+ class MVToArrayFunction implements Function
+ {
+ @Override
+ public String name()
+ {
+ return "mv_to_array";
+ }
+
+ @Override
+ public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
+ {
+ return args.get(0).eval(bindings);
Review comment:
does this need to cast the output to string array for the cases where
the input was not multi-valued?
```suggestion
return args.get(0).eval(bindings).castTo(ExpressionType.STRING_ARRAY);
```
assertArrayExpr in the tests calls `asArray` on the output before comparing
them, but i'm not sure all possible selectors that could be built on top of
this expression necessarily would, so it would probably be safer here since we
explicitly want to always return an array to do the cast (or just eliminate
this expression and use cast directly)
##########
File path:
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
##########
@@ -1306,6 +1309,531 @@ public void testMultiValueDimension()
TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim");
}
+ @Test
+ public void testMultiValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING.");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("a",
"preferred"), "rows", 2L, "idx", 282L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("b",
"preferred"), "rows", 2L, "idx", 230L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("e",
"preferred"), "rows", 2L, "idx", 324L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("h",
"preferred"), "rows", 2L, "idx", 233L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("m",
"preferred"), "rows", 6L, "idx", 5317L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("n",
"preferred"), "rows", 2L, "idx", 235L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("p",
"preferred"), "rows", 6L, "idx", 5405L),
+ makeRow(query, "2011-04-01", "alias",
ComparableStringArray.of("preferred", "t"), "rows", 4L, "idx", 420L)
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testSingleValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placement)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(
+ QueryRunnerTestHelper.ROWS_COUNT,
+ new LongSumAggregatorFactory("idx", "index")
+ )
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(query, "2011-04-01", "alias",
+ ComparableStringArray.of("preferred"), "rows", 26L, "idx",
12446L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsArrayWithOtherDims()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY),
+ new DefaultDimensionSpec("quality", "quality")
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("a", "preferred"),
+ "quality",
+ "automotive",
+ "rows",
+ 2L,
+ "idx",
+ 282L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("b", "preferred"),
+ "quality",
+ "business",
+ "rows",
+ 2L,
+ "idx",
+ 230L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("e", "preferred"),
+ "quality",
+ "entertainment",
+ "rows",
+ 2L,
+ "idx",
+ 324L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("h", "preferred"),
+ "quality",
+ "health",
+ "rows",
+ 2L,
+ "idx",
+ 233L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("m", "preferred"),
+ "quality",
+ "mezzanine",
+ "rows",
+ 6L,
+ "idx",
+ 5317L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("n", "preferred"),
+ "quality",
+ "news",
+ "rows",
+ 2L,
+ "idx",
+ 235L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("p", "preferred"),
+ "quality",
+ "premium",
+ "rows",
+ 6L,
+ "idx",
+ 5405L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "technology",
+ "rows",
+ 2L,
+ "idx",
+ 175L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "travel",
+ "rows",
+ 2L,
+ "idx",
+ 245L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dims-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("Not supported for multi-value
dimensions");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placementish", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testSingleValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.String cannot be cast to
[Ljava.util.Objects");
+ }
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placement", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred"),
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ ));
+ TestHelper.assertExpectedObjects(
+ expectedResults,
+ results,
+ "single-value-dims-groupby-arrays-as-string-arrays"
+ );
+ }
+
+
+ @Test
+ public void testNumericDimAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.Double cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+
+ @Test
+ public void testMultiValueVirtualDimAsString()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.Double cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("vo", "alias", ColumnType.STRING)
+ )
+ .setDimensions(
+ new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testExtractionStringSpecWithMultiValueVirtualDimAsInput()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 does not support dimension
selectors with unknown cardinality");
+ }
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING,
+ new SubstringDimExtractionFn(1, 1)
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ null,
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ "r",
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(
+ expectedResults,
+ results,
+ "multi-value-extraction-spec-as-string-dim-groupby-arrays"
+ );
+ }
+
+
+ @Test
+ public void testExtractionStringArraySpecWithMultiValueVirtualDimAsInput()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("Not supported for multi-value
dimensions");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY,
+ new SubstringDimExtractionFn(1, 1)
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testVirtualColumnNumericTypeAsStringArray()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage(
+ "org.apache.druid.segment.data.ComparableList cannot be cast to
[Ljava.util.Objects");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "array(index)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY
+ )
+ )
+
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testNestedGroupByWithArrays()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else {
+ expectedException.expect(IAE.class);
+ expectedException.expectMessage("Cannot create query type helper from
invalid type [ARRAY<STRING>]");
Review comment:
why doesn't this query work?
##########
File path:
sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java
##########
@@ -138,6 +138,12 @@ public static void setupNullValues()
NULL_STRING = NullHandling.defaultStringValue();
NULL_FLOAT = NullHandling.defaultFloatValue();
NULL_LONG = NullHandling.defaultLongValue();
+ NULL_LIST = new ArrayList<Object>()
+ {
+ {
+ add(null);
+ }
+ };
Review comment:
hmm, I know this is because of the coercion behavior in `ExprEval`, but
I'm having some regret about the decision to homogenize `null`, `[]`, and
`[null]` to `[null]`. I think it made the most sense when mapping back to
`STRING` type, but I think the coercion logic should probably be different when
we are grouping on a multi-value string as an ARRAY<STRING> instead of a
STRING, and the input bindings should homogenize the value to `null` instead in
that case.
We don't need to change this right now I think, since this PR still isn't
documenting this functionality, but something to think about for the future I
think, since `[null]` is sort of strange on the result side of things when
stuff is left as an actual array
##########
File path:
processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java
##########
@@ -432,7 +432,6 @@ public void serialize(
}
}
};
-
Review comment:
nit: unecessary change
##########
File path: sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
##########
@@ -404,6 +403,7 @@ private static Grouping computeGrouping(
final RelDataType dataType = rexNode.getType();
final ColumnType outputType =
Calcites.getColumnTypeForRelDataType(dataType);
+
Review comment:
nit: unnecessary changes
##########
File path:
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
##########
@@ -1640,13 +1834,65 @@ public void testArrayAggAsArrayFromJoin() throws
Exception
public void testArrayAggGroupByArrayAggFromSubquery() throws Exception
{
cannotVectorize();
- // yo, can't group on array types right now so expect failure
- expectedException.expect(RuntimeException.class);
- expectedException.expectMessage("Cannot create query type helper from
invalid type [ARRAY<STRING>]");
Review comment:
:rocket:
##########
File path:
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
##########
@@ -1306,6 +1309,531 @@ public void testMultiValueDimension()
TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim");
}
+ @Test
+ public void testMultiValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING.");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("a",
"preferred"), "rows", 2L, "idx", 282L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("b",
"preferred"), "rows", 2L, "idx", 230L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("e",
"preferred"), "rows", 2L, "idx", 324L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("h",
"preferred"), "rows", 2L, "idx", 233L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("m",
"preferred"), "rows", 6L, "idx", 5317L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("n",
"preferred"), "rows", 2L, "idx", 235L),
+ makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("p",
"preferred"), "rows", 6L, "idx", 5405L),
+ makeRow(query, "2011-04-01", "alias",
ComparableStringArray.of("preferred", "t"), "rows", 4L, "idx", 420L)
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testSingleValueDimensionAsArray()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placement)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+ .setAggregatorSpecs(
+ QueryRunnerTestHelper.ROWS_COUNT,
+ new LongSumAggregatorFactory("idx", "index")
+ )
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(query, "2011-04-01", "alias",
+ ComparableStringArray.of("preferred"), "rows", 26L, "idx",
12446L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dim-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsArrayWithOtherDims()
+ {
+ // array types don't work with group by v1
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage(
+ "GroupBy v1 only supports dimensions with an outputType of STRING");
+ }
+
+
+ // Cannot vectorize due to multi-value dimensions.
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setVirtualColumns(new ExpressionVirtualColumn(
+ "v0",
+ "mv_to_array(placementish)",
+ ColumnType.STRING_ARRAY,
+ ExprMacroTable.nil()
+ ))
+ .setDimensions(
+ new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY),
+ new DefaultDimensionSpec("quality", "quality")
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Arrays.asList(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("a", "preferred"),
+ "quality",
+ "automotive",
+ "rows",
+ 2L,
+ "idx",
+ 282L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("b", "preferred"),
+ "quality",
+ "business",
+ "rows",
+ 2L,
+ "idx",
+ 230L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("e", "preferred"),
+ "quality",
+ "entertainment",
+ "rows",
+ 2L,
+ "idx",
+ 324L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("h", "preferred"),
+ "quality",
+ "health",
+ "rows",
+ 2L,
+ "idx",
+ 233L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("m", "preferred"),
+ "quality",
+ "mezzanine",
+ "rows",
+ 6L,
+ "idx",
+ 5317L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("n", "preferred"),
+ "quality",
+ "news",
+ "rows",
+ 2L,
+ "idx",
+ 235L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("p", "preferred"),
+ "quality",
+ "premium",
+ "rows",
+ 6L,
+ "idx",
+ 5405L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "technology",
+ "rows",
+ 2L,
+ "idx",
+ 175L
+ ),
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred", "t"),
+ "quality",
+ "travel",
+ "rows",
+ 2L,
+ "idx",
+ 245L
+ )
+ );
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ TestHelper.assertExpectedObjects(expectedResults, results,
"multi-value-dims-groupby-arrays");
+ }
+
+ @Test
+ public void testMultiValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("Not supported for multi-value
dimensions");
+ }
+
+ cannotVectorize();
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placementish", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ }
+
+ @Test
+ public void testSingleValueDimensionAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.String cannot be cast to
[Ljava.util.Objects");
+ }
+ cannotVectorize();
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+ .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+ .setDimensions(
+ new DefaultDimensionSpec("placement", "alias",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new
LongSumAggregatorFactory("idx", "index"))
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ Iterable<ResultRow> results =
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+ List<ResultRow> expectedResults = ImmutableList.of(
+ makeRow(
+ query,
+ "2011-04-01",
+ "alias",
+ ComparableStringArray.of("preferred"),
+ "rows",
+ 26L,
+ "idx",
+ 12446L
+ ));
+ TestHelper.assertExpectedObjects(
+ expectedResults,
+ results,
+ "single-value-dims-groupby-arrays-as-string-arrays"
+ );
+ }
+
+
+ @Test
+ public void testNumericDimAsStringArrayWithoutExpression()
+ {
+ if
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+ expectedException.expect(UnsupportedOperationException.class);
+ expectedException.expectMessage("GroupBy v1 only supports dimensions
with an outputType of STRING");
+ } else if (!vectorize) {
+ expectedException.expect(RuntimeException.class);
+ expectedException.expectMessage("java.lang.Double cannot be cast to
[Ljava.util.Objects");
Review comment:
i wonder if we should have tighter validation on matching dimension spec
with column type, if we intend to treat dimension spec output type as a sort of
cast, instead of letting it hit actual class cast exceptions
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]