(druid) branch master updated: GroupBy: Vector processing for complex dimensions. (#18504)

gian Thu, 30 Oct 2025 15:16:54 -0700

This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git



The following commit(s) were added to refs/heads/master by this push:
     new 45295ca58e7 GroupBy: Vector processing for complex dimensions. (#18504)
45295ca58e7 is described below

commit 45295ca58e788cc5ce1e248aea43bec304099404
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Oct 30 15:15:36 2025 -0700

    GroupBy: Vector processing for complex dimensions. (#18504)
    
    This patch adds DictionaryBuildingComplexGroupByVectorColumnSelector,
    and refactors DictionaryBuildingSingleValueStringGroupByVectorColumnSelector
    to share a common base class. The design is similar to the classes
    used by the nonvectorized dictionary-building selectors, and reuses
    the DimensionIdCodec component.
---
 ...onaryBuildingGroupByColumnSelectorStrategy.java |   2 +-
 .../epinephelinae/column/MemoryFootprint.java      |   3 +-
 ...BuildingComplexGroupByVectorColumnSelector.java |  51 ++++++++
 ...tionaryBuildingGroupByVectorColumnSelector.java | 101 +++++++++++++++
 ...ngleValueStringGroupByVectorColumnSelector.java | 110 +++++++---------
 .../GroupByVectorColumnProcessorFactory.java       |   5 +-
 .../epinephelinae/vector/VectorGroupByEngine.java  |   4 +-
 .../sql/calcite/CalciteNestedDataQueryTest.java    | 140 ++++++++-------------
 .../apache/druid/sql/calcite/CalciteQueryTest.java |   4 -
 9 files changed, 257 insertions(+), 163 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
index cf033eaa65d..e40db65b33e 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
@@ -110,7 +110,7 @@ public class 
DictionaryBuildingGroupByColumnSelectorStrategy<DimensionType>
     );
   }
 
-  private static class UniValueDimensionIdCodec implements 
DimensionIdCodec<Object>
+  public static class UniValueDimensionIdCodec implements 
DimensionIdCodec<Object>
   {
     /**
      * Dictionary for mapping the dimension value to an index. i-th position 
in the dictionary holds the value represented
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
index 64303770d62..32cd2999e5c 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
@@ -27,8 +27,7 @@ public class MemoryFootprint<T>
   private final T value;
   private final int footprintIncrease;
 
-  // Reduced visibility
-  MemoryFootprint(T value, int footprintIncrease)
+  public MemoryFootprint(T value, int footprintIncrease)
   {
     this.value = value;
     this.footprintIncrease = footprintIncrease;
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
new file mode 100644
index 00000000000..24d101dafb5
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.groupby.epinephelinae.vector;
+
+import 
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+/**
+ * Selector that groups complex columns using a dictionary.
+ *
+ * @see DictionaryBuildingSingleValueStringGroupByVectorColumnSelector similar 
selector for non-dict-encoded strings
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType) 
which creates the nonvectorized version
+ */
+public class DictionaryBuildingComplexGroupByVectorColumnSelector
+    extends DictionaryBuildingGroupByVectorColumnSelector<Object>
+{
+  public DictionaryBuildingComplexGroupByVectorColumnSelector(
+      final VectorObjectSelector selector,
+      final ColumnType columnType
+  )
+  {
+    super(
+        selector,
+        new 
DictionaryBuildingGroupByColumnSelectorStrategy.UniValueDimensionIdCodec(columnType.getNullableStrategy())
+    );
+  }
+
+  @Override
+  protected Object convertValue(final Object rawValue)
+  {
+    return rawValue;
+  }
+}
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
new file mode 100644
index 00000000000..928e904df55
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.groupby.epinephelinae.vector;
+
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.druid.query.groupby.ResultRow;
+import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+import 
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec;
+import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+/**
+ * Base class for {@link GroupByVectorColumnSelector} that build dictionaries 
for values that are not
+ * natively dictionary-encoded.
+ *
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy the nonvectorized 
version
+ */
+public abstract class DictionaryBuildingGroupByVectorColumnSelector<T> 
implements GroupByVectorColumnSelector
+{
+  protected final VectorObjectSelector selector;
+  protected final DimensionIdCodec<T> dimensionIdCodec;
+
+  protected DictionaryBuildingGroupByVectorColumnSelector(
+      final VectorObjectSelector selector,
+      final DimensionIdCodec<T> dimensionIdCodec
+  )
+  {
+    this.selector = selector;
+    this.dimensionIdCodec = dimensionIdCodec;
+  }
+
+  @Override
+  public final int getGroupingKeySize()
+  {
+    return Integer.BYTES;
+  }
+
+  @Override
+  public final int writeKeys(
+      final WritableMemory keySpace,
+      final int keySize,
+      final int keyOffset,
+      final int startRow,
+      final int endRow
+  )
+  {
+    final Object[] vector = selector.getObjectVector();
+    int stateFootprintIncrease = 0;
+
+    for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
+      final T value = convertValue(vector[i]);
+      final MemoryFootprint<Integer> idAndMemoryIncrease = 
dimensionIdCodec.lookupId(value);
+      keySpace.putInt(j, idAndMemoryIncrease.value());
+      stateFootprintIncrease += idAndMemoryIncrease.memoryIncrease();
+    }
+
+    return stateFootprintIncrease;
+  }
+
+  @Override
+  public final void writeKeyToResultRow(
+      final MemoryPointer keyMemory,
+      final int keyOffset,
+      final ResultRow resultRow,
+      final int resultRowPosition
+  )
+  {
+    final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
+    final T value = dimensionIdCodec.idToKey(id);
+    resultRow.set(resultRowPosition, value);
+  }
+
+  @Override
+  public final void reset()
+  {
+    dimensionIdCodec.reset();
+  }
+
+  /**
+   * Convert raw value from the vector to the appropriate type for this 
selector.
+   */
+  protected abstract T convertValue(Object rawValue);
+}
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
index 0c4640c85f9..301821d6a6c 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
@@ -19,12 +19,14 @@
 
 package org.apache.druid.query.groupby.epinephelinae.vector;
 
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
 import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
-import org.apache.datasketches.memory.WritableMemory;
-import org.apache.druid.query.groupby.ResultRow;
 import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils;
-import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+import 
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec;
+import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint;
 import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.vector.VectorObjectSelector;
 
 import java.util.ArrayList;
@@ -34,85 +36,65 @@ import java.util.List;
  * A {@link GroupByVectorColumnSelector} that builds an internal 
String<->Integer dictionary, used for grouping
  * single-valued STRING columns which are not natively dictionary encoded, 
e.g. expression virtual columns.
  *
- * This is effectively the {@link VectorGroupByEngine} analog of
- * {@link 
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy}
 for
- * String columns
+ * @see DictionaryBuildingComplexGroupByVectorColumnSelector similar selector 
for complex columns
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType) 
which creates the nonvectorized version
  */
-public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector 
implements GroupByVectorColumnSelector
+public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector
+    extends DictionaryBuildingGroupByVectorColumnSelector<String>
 {
-  private static final int GROUP_BY_MISSING_VALUE = -1;
-
-  private final VectorObjectSelector selector;
-
-  private final List<String> dictionary = new ArrayList<>();
-  private final Object2IntOpenHashMap<String> reverseDictionary = new 
Object2IntOpenHashMap<>();
-
-  public 
DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector
 selector)
+  public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(final 
VectorObjectSelector selector)
   {
-    this.selector = selector;
-    this.reverseDictionary.defaultReturnValue(-1);
+    super(selector, new StringDimensionIdCodec());
   }
 
   @Override
-  public int getGroupingKeySize()
+  protected String convertValue(final Object rawValue)
   {
-    return Integer.BYTES;
+    return DimensionHandlerUtils.convertObjectToString(rawValue);
   }
 
-  @Override
-  public int writeKeys(
-      final WritableMemory keySpace,
-      final int keySize,
-      final int keyOffset,
-      final int startRow,
-      final int endRow
-  )
+  private static class StringDimensionIdCodec implements 
DimensionIdCodec<String>
   {
-    final Object[] vector = selector.getObjectVector();
-    int stateFootprintIncrease = 0;
+    private final List<String> dictionary = new ArrayList<>();
+    private final Object2IntMap<String> reverseDictionary = new 
Object2IntOpenHashMap<>();
 
-    for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
-      final String value = 
DimensionHandlerUtils.convertObjectToString(vector[i]);
-      final int dictId = reverseDictionary.getInt(value);
+    StringDimensionIdCodec()
+    {
+      reverseDictionary.defaultReturnValue(-1);
+    }
+
+    @Override
+    public MemoryFootprint<Integer> lookupId(final String value)
+    {
+      int dictId = reverseDictionary.getInt(value);
+      int footprintIncrease = 0;
       if (dictId < 0) {
-        final int nextId = dictionary.size();
+        dictId = dictionary.size();
         dictionary.add(value);
-        reverseDictionary.put(value, nextId);
-        keySpace.putInt(j, nextId);
-
-        // Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized 
version; dictionary structure is the same.
-        stateFootprintIncrease +=
-            DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 
: value.length()) * Character.BYTES);
-      } else {
-        keySpace.putInt(j, dictId);
+        reverseDictionary.put(value, dictId);
+        footprintIncrease =
+            DictionaryBuildingUtils.estimateEntryFootprint(value == null ? 0 : 
value.length() * Character.BYTES);
       }
+      return new MemoryFootprint<>(dictId, footprintIncrease);
     }
 
-    return stateFootprintIncrease;
-  }
+    @Override
+    public String idToKey(final int id)
+    {
+      return dictionary.get(id);
+    }
 
-  @Override
-  public void writeKeyToResultRow(
-      final MemoryPointer keyMemory,
-      final int keyOffset,
-      final ResultRow resultRow,
-      final int resultRowPosition
-  )
-  {
-    final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
-    // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are 
omitted from the result map.
-    if (id != GROUP_BY_MISSING_VALUE) {
-      final String value = dictionary.get(id);
-      resultRow.set(resultRowPosition, value);
-    } else {
-      resultRow.set(resultRowPosition, null);
+    @Override
+    public boolean canCompareIds()
+    {
+      return false;
     }
-  }
 
-  @Override
-  public void reset()
-  {
-    dictionary.clear();
-    reverseDictionary.clear();
+    @Override
+    public void reset()
+    {
+      dictionary.clear();
+      reverseDictionary.clear();
+    }
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
index 9c73d071408..2721ce75277 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
@@ -127,8 +127,11 @@ public class GroupByVectorColumnProcessorFactory 
implements VectorColumnProcesso
         );
       }
       return new 
DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector);
+    } else if (capabilities.is(ValueType.COMPLEX)) {
+      return new 
DictionaryBuildingComplexGroupByVectorColumnSelector(selector, 
capabilities.toColumnType());
+    } else {
+      return NilGroupByVectorColumnSelector.INSTANCE;
     }
-    return NilGroupByVectorColumnSelector.INSTANCE;
   }
 
   /**
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
index 957a5f4189e..f63dd862c4f 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
@@ -185,8 +185,8 @@ public class VectorGroupByEngine
         return false;
       }
 
-      if (!dimension.getOutputType().isPrimitive()) {
-        // group by on arrays and complex types is not currently supported in 
the vector processing engine
+      if (dimension.getOutputType().isArray()) {
+        // group by on arrays is not currently supported in the vector 
processing engine
         return false;
       }
 
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 95ff5b28b85..64c17a82f4d 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -52,6 +52,7 @@ import org.apache.druid.query.filter.ExpressionDimFilter;
 import org.apache.druid.query.filter.LikeDimFilter;
 import org.apache.druid.query.groupby.GroupByQuery;
 import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
+import org.apache.druid.query.groupby.orderby.NoopLimitSpec;
 import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
 import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.query.scan.ScanQuery;
@@ -75,6 +76,7 @@ import 
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFacto
 import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
 import 
org.apache.druid.sql.calcite.CalciteNestedDataQueryTest.NestedComponentSupplier;
 import org.apache.druid.sql.calcite.filtration.Filtration;
+import org.apache.druid.sql.calcite.run.EngineFeature;
 import 
org.apache.druid.sql.calcite.util.SqlTestFramework.StandardComponentSupplier;
 import org.apache.druid.sql.calcite.util.TestDataBuilder;
 import org.apache.druid.timeline.DataSegment;
@@ -619,33 +621,24 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumn()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
-                            )
-                        )
                         .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
                         .setContext(QUERY_CONTEXT_DEFAULT)
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L},
-            new Object[]{"2", 3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L},
+            new Object[]{"2", 2L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
5L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
         )
     );
   }
@@ -653,33 +646,38 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumnWithOrderBy()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
+                        .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
+                        .setLimitSpec(
+                            
queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS)
+                            ? NoopLimitSpec.instance()
+                            : new DefaultLimitSpec(
+                                ImmutableList.of(
+                                    new OrderByColumnSpec(
+                                        "d0",
+                                        OrderByColumnSpec.Direction.ASCENDING,
+                                        StringComparators.NATURAL
+                                    )
+                                ),
+                                Integer.MAX_VALUE
                             )
                         )
-                        .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
                         .setContext(QUERY_CONTEXT_DEFAULT)
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L},
-            new Object[]{"2", 3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L},
+            new Object[]{"2", 2L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
5L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
         )
     );
   }
@@ -687,24 +685,15 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumnWithOrderByAndLimit()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER 
BY 1 LIMIT 100",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1 
LIMIT 100",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
-                            )
-                        )
                         .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
                         .setLimitSpec(new DefaultLimitSpec(
                             ImmutableList.of(new OrderByColumnSpec(
                                 "d0",
@@ -717,11 +706,11 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L},
-            new Object[]{"2", 3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L},
+            new Object[]{"2", 2L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
5L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
         )
     );
   }
@@ -729,24 +718,15 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumnWithOrderByAndLimit2()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER 
BY 1 LIMIT 2",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1 
LIMIT 2",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
-                            )
-                        )
                         .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
                         .setLimitSpec(new DefaultLimitSpec(
                             ImmutableList.of(new OrderByColumnSpec(
                                 "d0",
@@ -759,8 +739,8 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L}
         )
     );
   }
@@ -768,34 +748,25 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumnWithLimit()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 
100",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 100",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
-                            )
-                        )
                         .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
                         .setLimitSpec(new DefaultLimitSpec(null, 100))
                         .setContext(QUERY_CONTEXT_DEFAULT)
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L},
-            new Object[]{"2", 3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
3L},
-            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L},
+            new Object[]{"2", 2L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 
5L},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
         )
     );
   }
@@ -803,31 +774,22 @@ public abstract class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testGroupByOnNestedColumnWithLimit2()
   {
-    cannotVectorize();
     testQuery(
-        "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 
2",
+        "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 2",
         ImmutableList.of(
             GroupByQuery.builder()
                         .setDataSource(DATA_SOURCE)
                         .setInterval(querySegmentSpec(Filtration.eternity()))
                         .setGranularity(Granularities.ALL)
-                        .setVirtualColumns(
-                            new ExpressionVirtualColumn(
-                                "v0",
-                                "strlen(\"string\")",
-                                ColumnType.LONG,
-                                queryFramework().macroTable()
-                            )
-                        )
                         .setDimensions(dimensions(new 
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
-                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "v0")))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "long")))
                         .setLimitSpec(new DefaultLimitSpec(null, 2))
                         .setContext(QUERY_CONTEXT_DEFAULT)
                         .build()
         ),
         ImmutableList.of(
-            new Object[]{null, 9L},
-            new Object[]{"\"hello\"", 3L}
+            new Object[]{null, 6L},
+            new Object[]{"\"hello\"", 4L}
         )
     );
   }
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index edd78ae2007..269f5863374 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -751,7 +751,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   @Test
   public void testGroupingOnStringSerializablePairLongString()
   {
-    cannotVectorize();
     testQuery(
         "SELECT COUNT(*) FROM (SELECT string_first_added FROM 
druid.wikipedia_first_last GROUP BY 1)",
         ImmutableList.of(
@@ -783,7 +782,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   @Test
   public void testGroupingOnStringSerializablePairLongLong()
   {
-    cannotVectorize();
     testQuery(
         "SELECT COUNT(*) FROM (SELECT long_first_added FROM 
druid.wikipedia_first_last GROUP BY 1)",
         ImmutableList.of(
@@ -815,7 +813,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   @Test
   public void testGroupingOnStringSerializablePairLongDouble()
   {
-    cannotVectorize();
     testQuery(
         "SELECT COUNT(*) FROM (SELECT double_first_added FROM 
druid.wikipedia_first_last GROUP BY 1)",
         ImmutableList.of(
@@ -847,7 +844,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   @Test
   public void testGroupingOnStringSerializablePairLongFloat()
   {
-    cannotVectorize();
     testQuery(
         "SELECT COUNT(*) FROM (SELECT float_first_added FROM 
druid.wikipedia_first_last GROUP BY 1)",
         ImmutableList.of(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(druid) branch master updated: GroupBy: Vector processing for complex dimensions. (#18504)

Reply via email to