This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 45295ca58e7 GroupBy: Vector processing for complex dimensions. (#18504)
45295ca58e7 is described below
commit 45295ca58e788cc5ce1e248aea43bec304099404
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Oct 30 15:15:36 2025 -0700
GroupBy: Vector processing for complex dimensions. (#18504)
This patch adds DictionaryBuildingComplexGroupByVectorColumnSelector,
and refactors DictionaryBuildingSingleValueStringGroupByVectorColumnSelector
to share a common base class. The design is similar to the classes
used by the nonvectorized dictionary-building selectors, and reuses
the DimensionIdCodec component.
---
...onaryBuildingGroupByColumnSelectorStrategy.java | 2 +-
.../epinephelinae/column/MemoryFootprint.java | 3 +-
...BuildingComplexGroupByVectorColumnSelector.java | 51 ++++++++
...tionaryBuildingGroupByVectorColumnSelector.java | 101 +++++++++++++++
...ngleValueStringGroupByVectorColumnSelector.java | 110 +++++++---------
.../GroupByVectorColumnProcessorFactory.java | 5 +-
.../epinephelinae/vector/VectorGroupByEngine.java | 4 +-
.../sql/calcite/CalciteNestedDataQueryTest.java | 140 ++++++++-------------
.../apache/druid/sql/calcite/CalciteQueryTest.java | 4 -
9 files changed, 257 insertions(+), 163 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
index cf033eaa65d..e40db65b33e 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java
@@ -110,7 +110,7 @@ public class
DictionaryBuildingGroupByColumnSelectorStrategy<DimensionType>
);
}
- private static class UniValueDimensionIdCodec implements
DimensionIdCodec<Object>
+ public static class UniValueDimensionIdCodec implements
DimensionIdCodec<Object>
{
/**
* Dictionary for mapping the dimension value to an index. i-th position
in the dictionary holds the value represented
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
index 64303770d62..32cd2999e5c 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java
@@ -27,8 +27,7 @@ public class MemoryFootprint<T>
private final T value;
private final int footprintIncrease;
- // Reduced visibility
- MemoryFootprint(T value, int footprintIncrease)
+ public MemoryFootprint(T value, int footprintIncrease)
{
this.value = value;
this.footprintIncrease = footprintIncrease;
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
new file mode 100644
index 00000000000..24d101dafb5
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.groupby.epinephelinae.vector;
+
+import
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+/**
+ * Selector that groups complex columns using a dictionary.
+ *
+ * @see DictionaryBuildingSingleValueStringGroupByVectorColumnSelector similar
selector for non-dict-encoded strings
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType)
which creates the nonvectorized version
+ */
+public class DictionaryBuildingComplexGroupByVectorColumnSelector
+ extends DictionaryBuildingGroupByVectorColumnSelector<Object>
+{
+ public DictionaryBuildingComplexGroupByVectorColumnSelector(
+ final VectorObjectSelector selector,
+ final ColumnType columnType
+ )
+ {
+ super(
+ selector,
+ new
DictionaryBuildingGroupByColumnSelectorStrategy.UniValueDimensionIdCodec(columnType.getNullableStrategy())
+ );
+ }
+
+ @Override
+ protected Object convertValue(final Object rawValue)
+ {
+ return rawValue;
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
new file mode 100644
index 00000000000..928e904df55
--- /dev/null
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.groupby.epinephelinae.vector;
+
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.druid.query.groupby.ResultRow;
+import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+import
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec;
+import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint;
+import org.apache.druid.segment.vector.VectorObjectSelector;
+
+/**
+ * Base class for {@link GroupByVectorColumnSelector} that build dictionaries
for values that are not
+ * natively dictionary-encoded.
+ *
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy the nonvectorized
version
+ */
+public abstract class DictionaryBuildingGroupByVectorColumnSelector<T>
implements GroupByVectorColumnSelector
+{
+ protected final VectorObjectSelector selector;
+ protected final DimensionIdCodec<T> dimensionIdCodec;
+
+ protected DictionaryBuildingGroupByVectorColumnSelector(
+ final VectorObjectSelector selector,
+ final DimensionIdCodec<T> dimensionIdCodec
+ )
+ {
+ this.selector = selector;
+ this.dimensionIdCodec = dimensionIdCodec;
+ }
+
+ @Override
+ public final int getGroupingKeySize()
+ {
+ return Integer.BYTES;
+ }
+
+ @Override
+ public final int writeKeys(
+ final WritableMemory keySpace,
+ final int keySize,
+ final int keyOffset,
+ final int startRow,
+ final int endRow
+ )
+ {
+ final Object[] vector = selector.getObjectVector();
+ int stateFootprintIncrease = 0;
+
+ for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
+ final T value = convertValue(vector[i]);
+ final MemoryFootprint<Integer> idAndMemoryIncrease =
dimensionIdCodec.lookupId(value);
+ keySpace.putInt(j, idAndMemoryIncrease.value());
+ stateFootprintIncrease += idAndMemoryIncrease.memoryIncrease();
+ }
+
+ return stateFootprintIncrease;
+ }
+
+ @Override
+ public final void writeKeyToResultRow(
+ final MemoryPointer keyMemory,
+ final int keyOffset,
+ final ResultRow resultRow,
+ final int resultRowPosition
+ )
+ {
+ final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
+ final T value = dimensionIdCodec.idToKey(id);
+ resultRow.set(resultRowPosition, value);
+ }
+
+ @Override
+ public final void reset()
+ {
+ dimensionIdCodec.reset();
+ }
+
+ /**
+ * Convert raw value from the vector to the appropriate type for this
selector.
+ */
+ protected abstract T convertValue(Object rawValue);
+}
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
index 0c4640c85f9..301821d6a6c 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java
@@ -19,12 +19,14 @@
package org.apache.druid.query.groupby.epinephelinae.vector;
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
-import org.apache.datasketches.memory.WritableMemory;
-import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils;
-import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
+import
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy;
+import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec;
+import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint;
import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.vector.VectorObjectSelector;
import java.util.ArrayList;
@@ -34,85 +36,65 @@ import java.util.List;
* A {@link GroupByVectorColumnSelector} that builds an internal
String<->Integer dictionary, used for grouping
* single-valued STRING columns which are not natively dictionary encoded,
e.g. expression virtual columns.
*
- * This is effectively the {@link VectorGroupByEngine} analog of
- * {@link
org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy}
for
- * String columns
+ * @see DictionaryBuildingComplexGroupByVectorColumnSelector similar selector
for complex columns
+ * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType)
which creates the nonvectorized version
*/
-public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector
implements GroupByVectorColumnSelector
+public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector
+ extends DictionaryBuildingGroupByVectorColumnSelector<String>
{
- private static final int GROUP_BY_MISSING_VALUE = -1;
-
- private final VectorObjectSelector selector;
-
- private final List<String> dictionary = new ArrayList<>();
- private final Object2IntOpenHashMap<String> reverseDictionary = new
Object2IntOpenHashMap<>();
-
- public
DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector
selector)
+ public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(final
VectorObjectSelector selector)
{
- this.selector = selector;
- this.reverseDictionary.defaultReturnValue(-1);
+ super(selector, new StringDimensionIdCodec());
}
@Override
- public int getGroupingKeySize()
+ protected String convertValue(final Object rawValue)
{
- return Integer.BYTES;
+ return DimensionHandlerUtils.convertObjectToString(rawValue);
}
- @Override
- public int writeKeys(
- final WritableMemory keySpace,
- final int keySize,
- final int keyOffset,
- final int startRow,
- final int endRow
- )
+ private static class StringDimensionIdCodec implements
DimensionIdCodec<String>
{
- final Object[] vector = selector.getObjectVector();
- int stateFootprintIncrease = 0;
+ private final List<String> dictionary = new ArrayList<>();
+ private final Object2IntMap<String> reverseDictionary = new
Object2IntOpenHashMap<>();
- for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
- final String value =
DimensionHandlerUtils.convertObjectToString(vector[i]);
- final int dictId = reverseDictionary.getInt(value);
+ StringDimensionIdCodec()
+ {
+ reverseDictionary.defaultReturnValue(-1);
+ }
+
+ @Override
+ public MemoryFootprint<Integer> lookupId(final String value)
+ {
+ int dictId = reverseDictionary.getInt(value);
+ int footprintIncrease = 0;
if (dictId < 0) {
- final int nextId = dictionary.size();
+ dictId = dictionary.size();
dictionary.add(value);
- reverseDictionary.put(value, nextId);
- keySpace.putInt(j, nextId);
-
- // Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized
version; dictionary structure is the same.
- stateFootprintIncrease +=
- DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0
: value.length()) * Character.BYTES);
- } else {
- keySpace.putInt(j, dictId);
+ reverseDictionary.put(value, dictId);
+ footprintIncrease =
+ DictionaryBuildingUtils.estimateEntryFootprint(value == null ? 0 :
value.length() * Character.BYTES);
}
+ return new MemoryFootprint<>(dictId, footprintIncrease);
}
- return stateFootprintIncrease;
- }
+ @Override
+ public String idToKey(final int id)
+ {
+ return dictionary.get(id);
+ }
- @Override
- public void writeKeyToResultRow(
- final MemoryPointer keyMemory,
- final int keyOffset,
- final ResultRow resultRow,
- final int resultRowPosition
- )
- {
- final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
- // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are
omitted from the result map.
- if (id != GROUP_BY_MISSING_VALUE) {
- final String value = dictionary.get(id);
- resultRow.set(resultRowPosition, value);
- } else {
- resultRow.set(resultRowPosition, null);
+ @Override
+ public boolean canCompareIds()
+ {
+ return false;
}
- }
- @Override
- public void reset()
- {
- dictionary.clear();
- reverseDictionary.clear();
+ @Override
+ public void reset()
+ {
+ dictionary.clear();
+ reverseDictionary.clear();
+ }
}
}
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
index 9c73d071408..2721ce75277 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java
@@ -127,8 +127,11 @@ public class GroupByVectorColumnProcessorFactory
implements VectorColumnProcesso
);
}
return new
DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector);
+ } else if (capabilities.is(ValueType.COMPLEX)) {
+ return new
DictionaryBuildingComplexGroupByVectorColumnSelector(selector,
capabilities.toColumnType());
+ } else {
+ return NilGroupByVectorColumnSelector.INSTANCE;
}
- return NilGroupByVectorColumnSelector.INSTANCE;
}
/**
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
index 957a5f4189e..f63dd862c4f 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
@@ -185,8 +185,8 @@ public class VectorGroupByEngine
return false;
}
- if (!dimension.getOutputType().isPrimitive()) {
- // group by on arrays and complex types is not currently supported in
the vector processing engine
+ if (dimension.getOutputType().isArray()) {
+ // group by on arrays is not currently supported in the vector
processing engine
return false;
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 95ff5b28b85..64c17a82f4d 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -52,6 +52,7 @@ import org.apache.druid.query.filter.ExpressionDimFilter;
import org.apache.druid.query.filter.LikeDimFilter;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
+import org.apache.druid.query.groupby.orderby.NoopLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.scan.ScanQuery;
@@ -75,6 +76,7 @@ import
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFacto
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
import
org.apache.druid.sql.calcite.CalciteNestedDataQueryTest.NestedComponentSupplier;
import org.apache.druid.sql.calcite.filtration.Filtration;
+import org.apache.druid.sql.calcite.run.EngineFeature;
import
org.apache.druid.sql.calcite.util.SqlTestFramework.StandardComponentSupplier;
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
@@ -619,33 +621,24 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumn()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
- )
- )
.setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L},
- new Object[]{"2", 3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L},
+ new Object[]{"2", 2L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
5L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
)
);
}
@@ -653,33 +646,38 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumnWithOrderBy()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
+ .setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
+ .setLimitSpec(
+
queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS)
+ ? NoopLimitSpec.instance()
+ : new DefaultLimitSpec(
+ ImmutableList.of(
+ new OrderByColumnSpec(
+ "d0",
+ OrderByColumnSpec.Direction.ASCENDING,
+ StringComparators.NATURAL
+ )
+ ),
+ Integer.MAX_VALUE
)
)
- .setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L},
- new Object[]{"2", 3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L},
+ new Object[]{"2", 2L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
5L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
)
);
}
@@ -687,24 +685,15 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumnWithOrderByAndLimit()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER
BY 1 LIMIT 100",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1
LIMIT 100",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
- )
- )
.setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"d0",
@@ -717,11 +706,11 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L},
- new Object[]{"2", 3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L},
+ new Object[]{"2", 2L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
5L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
)
);
}
@@ -729,24 +718,15 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumnWithOrderByAndLimit2()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER
BY 1 LIMIT 2",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1
LIMIT 2",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
- )
- )
.setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"d0",
@@ -759,8 +739,8 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L}
)
);
}
@@ -768,34 +748,25 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumnWithLimit()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT
100",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 100",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
- )
- )
.setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
.setLimitSpec(new DefaultLimitSpec(null, 100))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L},
- new Object[]{"2", 3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
3L},
- new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L},
+ new Object[]{"2", 2L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}",
5L},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L}
)
);
}
@@ -803,31 +774,22 @@ public abstract class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
@Test
public void testGroupByOnNestedColumnWithLimit2()
{
- cannotVectorize();
testQuery(
- "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT
2",
+ "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 2",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
- .setVirtualColumns(
- new ExpressionVirtualColumn(
- "v0",
- "strlen(\"string\")",
- ColumnType.LONG,
- queryFramework().macroTable()
- )
- )
.setDimensions(dimensions(new
DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA)))
- .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "v0")))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "long")))
.setLimitSpec(new DefaultLimitSpec(null, 2))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
- new Object[]{null, 9L},
- new Object[]{"\"hello\"", 3L}
+ new Object[]{null, 6L},
+ new Object[]{"\"hello\"", 4L}
)
);
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index edd78ae2007..269f5863374 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -751,7 +751,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
@Test
public void testGroupingOnStringSerializablePairLongString()
{
- cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM (SELECT string_first_added FROM
druid.wikipedia_first_last GROUP BY 1)",
ImmutableList.of(
@@ -783,7 +782,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
@Test
public void testGroupingOnStringSerializablePairLongLong()
{
- cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM (SELECT long_first_added FROM
druid.wikipedia_first_last GROUP BY 1)",
ImmutableList.of(
@@ -815,7 +813,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
@Test
public void testGroupingOnStringSerializablePairLongDouble()
{
- cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM (SELECT double_first_added FROM
druid.wikipedia_first_last GROUP BY 1)",
ImmutableList.of(
@@ -847,7 +844,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
@Test
public void testGroupingOnStringSerializablePairLongFloat()
{
- cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM (SELECT float_first_added FROM
druid.wikipedia_first_last GROUP BY 1)",
ImmutableList.of(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]