This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git
The following commit(s) were added to refs/heads/master by this push:
new 9b499df Fix ExpressionVirtualColumn capabilities; fix groupBy's
improper uses of StorageAdapter#getColumnCapabilities. (#8013)
9b499df is described below
commit 9b499df14e8e06837a847b5b82f9d48dc38f7556
Author: Gian Merlino <[email protected]>
AuthorDate: Fri Jul 5 13:17:05 2019 -0700
Fix ExpressionVirtualColumn capabilities; fix groupBy's improper uses of
StorageAdapter#getColumnCapabilities. (#8013)
* GroupBy: Fix improper uses of StorageAdapter#getColumnCapabilities.
1) A usage in "isArrayAggregateApplicable" that would potentially
incorrectly use
array-based aggregation on a virtual column that shadows a real column.
2) A usage in "process" that would potentially use the more expensive
multi-value
aggregation path on a singly-valued virtual column. (No correctness
issue, but
a performance issue.)
* Add addl javadoc.
* ExpressionVirtualColumn: Set multi-value flag.
---
.../epinephelinae/GroupByQueryEngineV2.java | 55 +++++++++++++++-------
.../org/apache/druid/segment/StorageAdapter.java | 5 ++
.../segment/virtual/ExpressionVirtualColumn.java | 5 +-
3 files changed, 48 insertions(+), 17 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
index e3c609e..7594b90 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -48,11 +48,13 @@ import
org.apache.druid.query.groupby.epinephelinae.column.LongGroupByColumnSele
import
org.apache.druid.query.groupby.epinephelinae.column.NullableValueGroupByColumnSelectorStrategy;
import
org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy;
import org.apache.druid.query.groupby.strategy.GroupByStrategyV2;
+import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.StorageAdapter;
+import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
@@ -115,14 +117,6 @@ public class GroupByQueryEngineV2
null
);
- final boolean allSingleValueDims = query
- .getDimensions()
- .stream()
- .allMatch(dimension -> {
- final ColumnCapabilities columnCapabilities =
storageAdapter.getColumnCapabilities(dimension.getDimension());
- return columnCapabilities != null &&
!columnCapabilities.hasMultipleValues();
- });
-
final ResourceHolder<ByteBuffer> bufferHolder =
intermediateResultsBufferPool.take();
final String fudgeTimestampString = NullHandling.emptyToNullIfNeeded(
@@ -140,18 +134,38 @@ public class GroupByQueryEngineV2
@Override
public GroupByEngineIterator make()
{
+ final ColumnSelectorFactory columnSelectorFactory =
cursor.getColumnSelectorFactory();
+ final boolean allSingleValueDims = query
+ .getDimensions()
+ .stream()
+ .allMatch(dimension -> {
+ final ColumnCapabilities columnCapabilities =
columnSelectorFactory.getColumnCapabilities(
+ dimension.getDimension()
+ );
+ return columnCapabilities != null &&
!columnCapabilities.hasMultipleValues();
+ });
+
ColumnSelectorPlus<GroupByColumnSelectorStrategy>[]
selectorPlus = DimensionHandlerUtils
.createColumnSelectorPluses(
STRATEGY_FACTORY,
query.getDimensions(),
- cursor.getColumnSelectorFactory()
+ columnSelectorFactory
);
GroupByColumnSelectorPlus[] dims =
createGroupBySelectorPlus(selectorPlus);
final ByteBuffer buffer = bufferHolder.get();
- // Check array-based aggregation is applicable
- if (isArrayAggregateApplicable(querySpecificConfig, query,
dims, storageAdapter, buffer)) {
+ // Check if array-based aggregation is applicable
+ final boolean useArrayAggregation = isArrayAggregateApplicable(
+ querySpecificConfig,
+ query,
+ dims,
+ storageAdapter,
+ query.getVirtualColumns(),
+ buffer
+ );
+
+ if (useArrayAggregation) {
return new ArrayAggregateIterator(
query,
querySpecificConfig,
@@ -191,6 +205,7 @@ public class GroupByQueryEngineV2
GroupByQuery query,
GroupByColumnSelectorPlus[] dims,
StorageAdapter storageAdapter,
+ VirtualColumns virtualColumns,
ByteBuffer buffer
)
{
@@ -206,11 +221,19 @@ public class GroupByQueryEngineV2
columnCapabilities = null;
cardinality = 1;
} else if (dims.length == 1) {
+ // Only real columns can use array-based aggregation, since virtual
columns cannot currently report their
+ // cardinality. We need to check if a virtual column exists with the
same name, since virtual columns can shadow
+ // real columns, and we might miss that since we're going directly to
the StorageAdapter (which only knows about
+ // real columns).
+ if (virtualColumns.exists(dims[0].getName())) {
+ return false;
+ }
+
columnCapabilities =
storageAdapter.getColumnCapabilities(dims[0].getName());
cardinality = storageAdapter.getDimensionCardinality(dims[0].getName());
} else {
- columnCapabilities = null;
- cardinality = -1; // ArrayAggregateIterator is not available
+ // Cannot use array-based aggregation with more than one dimension.
+ return false;
}
// Choose array-based aggregation if the grouping key is a single string
dimension of a
@@ -225,11 +248,11 @@ public class GroupByQueryEngineV2
aggregatorFactories
);
- // Check that all keys and aggregated values can be contained the buffer
+ // Check that all keys and aggregated values can be contained in the
buffer
return requiredBufferCapacity <= buffer.capacity();
+ } else {
+ return false;
}
-
- return false;
}
private static class GroupByStrategyFactory implements
ColumnSelectorStrategyFactory<GroupByColumnSelectorStrategy>
diff --git
a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
index 3319d09..1dfc00e 100644
--- a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
@@ -56,6 +56,11 @@ public interface StorageAdapter extends CursorFactory
* the column does exist but the capabilities are unknown. The latter is
possible with dynamically discovered
* columns.
*
+ * Note that StorageAdapters are representations of "real" segments, so they
are not aware of any virtual columns
+ * that may be involved in a query. In general, query engines should instead
use the method
+ * {@link ColumnSelectorFactory#getColumnCapabilities(String)}, which
returns capabilities for virtual columns as
+ * well.
+ *
* @param column column name
*
* @return capabilities, or null
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 871682d4..c322fff 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -105,7 +105,10 @@ public class ExpressionVirtualColumn implements
VirtualColumn
@Override
public ColumnCapabilities capabilities(String columnName)
{
- return new ColumnCapabilitiesImpl().setType(outputType);
+ // Note: Ideally we would only "setHasMultipleValues(true)" if the
expression in question could potentially return
+ // multiple values. However, we don't currently have a good way of
determining this, so to be safe we always
+ // set the flag.
+ return new
ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true);
}
@Override
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]