clintropolis commented on a change in pull request #12078:
URL: https://github.com/apache/druid/pull/12078#discussion_r781661764



##########
File path: core/src/main/java/org/apache/druid/math/expr/Function.java
##########
@@ -2977,6 +2977,67 @@ public ExpressionType 
getOutputType(Expr.InputBindingInspector inspector, List<E
     }
   }
 
+  class MVToArrayFunction implements Function
+  {
+    @Override
+    public String name()
+    {
+      return "mv_to_array";
+    }
+
+    @Override
+    public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
+    {
+      return args.get(0).eval(bindings);
+    }
+
+    @Override
+    public void validateArguments(List<Expr> args)
+    {
+      if (args.size() != 1) {
+        throw new IAE("Function[%s] needs exactly 1 argument of type String", 
name());
+      }
+      IdentifierExpr expr = args.get(0).getIdentifierExprIfIdentifierExpr();
+
+      if (expr == null) {
+        throw new IAE(
+            "Arg %s should be an identifier expression ie refer to columns 
directally. Use array[] instead",

Review comment:
       ```suggestion
               "Arg %s should be an identifier expression ie refer to columns 
directly. Use array[] instead",
   ```
   
   also, in the native expression layer the syntax is actually `array(...)`

##########
File path: core/src/main/java/org/apache/druid/math/expr/Function.java
##########
@@ -2977,6 +2977,67 @@ public ExpressionType 
getOutputType(Expr.InputBindingInspector inspector, List<E
     }
   }
 
+  class MVToArrayFunction implements Function
+  {
+    @Override
+    public String name()
+    {
+      return "mv_to_array";
+    }
+
+    @Override
+    public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
+    {
+      return args.get(0).eval(bindings);

Review comment:
       does this need to cast the output to string array for the cases where 
the input was not multi-valued?
   
   ```suggestion
         return args.get(0).eval(bindings).castTo(ExpressionType.STRING_ARRAY);
   ```
   
   assertArrayExpr in the tests calls `asArray` on the output before comparing 
them, but i'm not sure all possible selectors that could be built on top of 
this expression necessarily would, so it would probably be safer here since we 
explicitly want to always return an array to do the cast (or just eliminate 
this expression and use cast directly)

##########
File path: 
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
##########
@@ -1306,6 +1309,531 @@ public void testMultiValueDimension()
     TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim");
   }
 
+  @Test
+  public void testMultiValueDimensionAsArray()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING.");
+    }
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Arrays.asList(
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("a", 
"preferred"), "rows", 2L, "idx", 282L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("b", 
"preferred"), "rows", 2L, "idx", 230L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("e", 
"preferred"), "rows", 2L, "idx", 324L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("h", 
"preferred"), "rows", 2L, "idx", 233L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("m", 
"preferred"), "rows", 6L, "idx", 5317L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("n", 
"preferred"), "rows", 2L, "idx", 235L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("p", 
"preferred"), "rows", 6L, "idx", 5405L),
+        makeRow(query, "2011-04-01", "alias", 
ComparableStringArray.of("preferred", "t"), "rows", 4L, "idx", 420L)
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim-groupby-arrays");
+  }
+
+  @Test
+  public void testSingleValueDimensionAsArray()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING");
+    }
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placement)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+        .setAggregatorSpecs(
+            QueryRunnerTestHelper.ROWS_COUNT,
+            new LongSumAggregatorFactory("idx", "index")
+        )
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = ImmutableList.of(
+        makeRow(query, "2011-04-01", "alias",
+                ComparableStringArray.of("preferred"), "rows", 26L, "idx", 
12446L
+        )
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim-groupby-arrays");
+  }
+
+  @Test
+  public void testMultiValueDimensionAsArrayWithOtherDims()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING");
+    }
+
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY),
+            new DefaultDimensionSpec("quality", "quality")
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Arrays.asList(
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("a", "preferred"),
+            "quality",
+            "automotive",
+            "rows",
+            2L,
+            "idx",
+            282L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("b", "preferred"),
+            "quality",
+            "business",
+            "rows",
+            2L,
+            "idx",
+            230L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("e", "preferred"),
+            "quality",
+            "entertainment",
+            "rows",
+            2L,
+            "idx",
+            324L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("h", "preferred"),
+            "quality",
+            "health",
+            "rows",
+            2L,
+            "idx",
+            233L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("m", "preferred"),
+            "quality",
+            "mezzanine",
+            "rows",
+            6L,
+            "idx",
+            5317L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("n", "preferred"),
+            "quality",
+            "news",
+            "rows",
+            2L,
+            "idx",
+            235L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("p", "preferred"),
+            "quality",
+            "premium",
+            "rows",
+            6L,
+            "idx",
+            5405L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred", "t"),
+            "quality",
+            "technology",
+            "rows",
+            2L,
+            "idx",
+            175L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred", "t"),
+            "quality",
+            "travel",
+            "rows",
+            2L,
+            "idx",
+            245L
+        )
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dims-groupby-arrays");
+  }
+
+  @Test
+  public void testMultiValueDimensionAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("Not supported for multi-value 
dimensions");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setDimensions(
+            new DefaultDimensionSpec("placementish", "alias", 
ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+  @Test
+  public void testSingleValueDimensionAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("java.lang.String cannot be cast to 
[Ljava.util.Objects");
+    }
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setDimensions(
+            new DefaultDimensionSpec("placement", "alias", 
ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    List<ResultRow> expectedResults = ImmutableList.of(
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred"),
+            "rows",
+            26L,
+            "idx",
+            12446L
+        ));
+    TestHelper.assertExpectedObjects(
+        expectedResults,
+        results,
+        "single-value-dims-groupby-arrays-as-string-arrays"
+    );
+  }
+
+
+  @Test
+  public void testNumericDimAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("java.lang.Double cannot be cast to 
[Ljava.util.Objects");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setDimensions(
+            new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+
+  @Test
+  public void testMultiValueVirtualDimAsString()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("java.lang.Double cannot be cast to 
[Ljava.util.Objects");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("vo", "alias", ColumnType.STRING)
+        )
+        .setDimensions(
+            new DefaultDimensionSpec("index", "alias", ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+  @Test
+  public void testExtractionStringSpecWithMultiValueVirtualDimAsInput()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 does not support dimension 
selectors with unknown cardinality");
+    }
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING,
+                                        new SubstringDimExtractionFn(1, 1)
+            )
+        )
+
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Arrays.asList(
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            null,
+            "rows",
+            26L,
+            "idx",
+            12446L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            "r",
+            "rows",
+            26L,
+            "idx",
+            12446L
+        )
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(
+        expectedResults,
+        results,
+        "multi-value-extraction-spec-as-string-dim-groupby-arrays"
+    );
+  }
+
+
+  @Test
+  public void testExtractionStringArraySpecWithMultiValueVirtualDimAsInput()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("Not supported for multi-value 
dimensions");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new ExtractionDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY,
+                                        new SubstringDimExtractionFn(1, 1)
+            )
+        )
+
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+  @Test
+  public void testVirtualColumnNumericTypeAsStringArray()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage(
+          "org.apache.druid.segment.data.ComparableList cannot be cast to 
[Ljava.util.Objects");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "array(index)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY
+            )
+        )
+
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+  @Test
+  public void testNestedGroupByWithArrays()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else {
+      expectedException.expect(IAE.class);
+      expectedException.expectMessage("Cannot create query type helper from 
invalid type [ARRAY<STRING>]");

Review comment:
       why doesn't this query work?

##########
File path: 
sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java
##########
@@ -138,6 +138,12 @@ public static void setupNullValues()
     NULL_STRING = NullHandling.defaultStringValue();
     NULL_FLOAT = NullHandling.defaultFloatValue();
     NULL_LONG = NullHandling.defaultLongValue();
+    NULL_LIST = new ArrayList<Object>()
+    {
+      {
+        add(null);
+      }
+    };

Review comment:
       hmm, I know this is because of the coercion behavior in `ExprEval`, but 
I'm having some regret about the decision to homogenize `null`, `[]`, and 
`[null]` to `[null]`. I think it made the most sense when mapping back to 
`STRING` type, but I think the coercion logic should probably be different when 
we are grouping on a multi-value string as an ARRAY<STRING> instead of a 
STRING, and the input bindings should homogenize the value to `null` instead in 
that case.
   
   We don't need to change this right now I think, since this PR still isn't 
documenting this functionality, but something to think about for the future I 
think, since `[null]` is sort of strange on the result side of things when 
stuff is left as an actual array

##########
File path: 
processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java
##########
@@ -432,7 +432,6 @@ public void serialize(
         }
       }
     };
-

Review comment:
       nit: unecessary change

##########
File path: sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
##########
@@ -404,6 +403,7 @@ private static Grouping computeGrouping(
 
       final RelDataType dataType = rexNode.getType();
       final ColumnType outputType = 
Calcites.getColumnTypeForRelDataType(dataType);
+

Review comment:
       nit: unnecessary changes

##########
File path: 
sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
##########
@@ -1640,13 +1834,65 @@ public void testArrayAggAsArrayFromJoin() throws 
Exception
   public void testArrayAggGroupByArrayAggFromSubquery() throws Exception
   {
     cannotVectorize();
-    // yo, can't group on array types right now so expect failure
-    expectedException.expect(RuntimeException.class);
-    expectedException.expectMessage("Cannot create query type helper from 
invalid type [ARRAY<STRING>]");

Review comment:
       :rocket:

##########
File path: 
processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java
##########
@@ -1306,6 +1309,531 @@ public void testMultiValueDimension()
     TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim");
   }
 
+  @Test
+  public void testMultiValueDimensionAsArray()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING.");
+    }
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Arrays.asList(
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("a", 
"preferred"), "rows", 2L, "idx", 282L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("b", 
"preferred"), "rows", 2L, "idx", 230L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("e", 
"preferred"), "rows", 2L, "idx", 324L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("h", 
"preferred"), "rows", 2L, "idx", 233L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("m", 
"preferred"), "rows", 6L, "idx", 5317L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("n", 
"preferred"), "rows", 2L, "idx", 235L),
+        makeRow(query, "2011-04-01", "alias", ComparableStringArray.of("p", 
"preferred"), "rows", 6L, "idx", 5405L),
+        makeRow(query, "2011-04-01", "alias", 
ComparableStringArray.of("preferred", "t"), "rows", 4L, "idx", 420L)
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim-groupby-arrays");
+  }
+
+  @Test
+  public void testSingleValueDimensionAsArray()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING");
+    }
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placement)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY))
+        .setAggregatorSpecs(
+            QueryRunnerTestHelper.ROWS_COUNT,
+            new LongSumAggregatorFactory("idx", "index")
+        )
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = ImmutableList.of(
+        makeRow(query, "2011-04-01", "alias",
+                ComparableStringArray.of("preferred"), "rows", 26L, "idx", 
12446L
+        )
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dim-groupby-arrays");
+  }
+
+  @Test
+  public void testMultiValueDimensionAsArrayWithOtherDims()
+  {
+    // array types don't work with group by v1
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage(
+          "GroupBy v1 only supports dimensions with an outputType of STRING");
+    }
+
+
+    // Cannot vectorize due to multi-value dimensions.
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setVirtualColumns(new ExpressionVirtualColumn(
+            "v0",
+            "mv_to_array(placementish)",
+            ColumnType.STRING_ARRAY,
+            ExprMacroTable.nil()
+        ))
+        .setDimensions(
+            new DefaultDimensionSpec("v0", "alias", ColumnType.STRING_ARRAY),
+            new DefaultDimensionSpec("quality", "quality")
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Arrays.asList(
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("a", "preferred"),
+            "quality",
+            "automotive",
+            "rows",
+            2L,
+            "idx",
+            282L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("b", "preferred"),
+            "quality",
+            "business",
+            "rows",
+            2L,
+            "idx",
+            230L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("e", "preferred"),
+            "quality",
+            "entertainment",
+            "rows",
+            2L,
+            "idx",
+            324L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("h", "preferred"),
+            "quality",
+            "health",
+            "rows",
+            2L,
+            "idx",
+            233L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("m", "preferred"),
+            "quality",
+            "mezzanine",
+            "rows",
+            6L,
+            "idx",
+            5317L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("n", "preferred"),
+            "quality",
+            "news",
+            "rows",
+            2L,
+            "idx",
+            235L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("p", "preferred"),
+            "quality",
+            "premium",
+            "rows",
+            6L,
+            "idx",
+            5405L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred", "t"),
+            "quality",
+            "technology",
+            "rows",
+            2L,
+            "idx",
+            175L
+        ),
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred", "t"),
+            "quality",
+            "travel",
+            "rows",
+            2L,
+            "idx",
+            245L
+        )
+    );
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"multi-value-dims-groupby-arrays");
+  }
+
+  @Test
+  public void testMultiValueDimensionAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("Not supported for multi-value 
dimensions");
+    }
+
+    cannotVectorize();
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setDimensions(
+            new DefaultDimensionSpec("placementish", "alias", 
ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+  }
+
+  @Test
+  public void testSingleValueDimensionAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("java.lang.String cannot be cast to 
[Ljava.util.Objects");
+    }
+    cannotVectorize();
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
+        .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
+        .setDimensions(
+            new DefaultDimensionSpec("placement", "alias", 
ColumnType.STRING_ARRAY)
+        )
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new 
LongSumAggregatorFactory("idx", "index"))
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    Iterable<ResultRow> results = 
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
+    List<ResultRow> expectedResults = ImmutableList.of(
+        makeRow(
+            query,
+            "2011-04-01",
+            "alias",
+            ComparableStringArray.of("preferred"),
+            "rows",
+            26L,
+            "idx",
+            12446L
+        ));
+    TestHelper.assertExpectedObjects(
+        expectedResults,
+        results,
+        "single-value-dims-groupby-arrays-as-string-arrays"
+    );
+  }
+
+
+  @Test
+  public void testNumericDimAsStringArrayWithoutExpression()
+  {
+    if 
(config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
+      expectedException.expect(UnsupportedOperationException.class);
+      expectedException.expectMessage("GroupBy v1 only supports dimensions 
with an outputType of STRING");
+    } else if (!vectorize) {
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectMessage("java.lang.Double cannot be cast to 
[Ljava.util.Objects");

Review comment:
       i wonder if we should have tighter validation on matching dimension spec 
with column type, if we intend to treat dimension spec output type as a sort of 
cast, instead of letting it hit actual class cast exceptions




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to