This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch 0.15.1-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git

commit 21d3600e61ccbb052e1618d1ac4f6be53c6687dd
Author: Gian Merlino <[email protected]>
AuthorDate: Fri Jul 5 13:17:05 2019 -0700

    Fix ExpressionVirtualColumn capabilities; fix groupBy's improper uses of 
StorageAdapter#getColumnCapabilities. (#8013)
    
    * GroupBy: Fix improper uses of StorageAdapter#getColumnCapabilities.
    
    1) A usage in "isArrayAggregateApplicable" that would potentially 
incorrectly use
       array-based aggregation on a virtual column that shadows a real column.
    2) A usage in "process" that would potentially use the more expensive 
multi-value
       aggregation path on a singly-valued virtual column. (No correctness 
issue, but
       a performance issue.)
    
    * Add addl javadoc.
    
    * ExpressionVirtualColumn: Set multi-value flag.
---
 .../epinephelinae/GroupByQueryEngineV2.java        | 55 +++++++++++++++-------
 .../org/apache/druid/segment/StorageAdapter.java   |  5 ++
 .../segment/virtual/ExpressionVirtualColumn.java   |  5 +-
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
index e3c609e..7594b90 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -48,11 +48,13 @@ import 
org.apache.druid.query.groupby.epinephelinae.column.LongGroupByColumnSele
 import 
org.apache.druid.query.groupby.epinephelinae.column.NullableValueGroupByColumnSelectorStrategy;
 import 
org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy;
 import org.apache.druid.query.groupby.strategy.GroupByStrategyV2;
+import org.apache.druid.segment.ColumnSelectorFactory;
 import org.apache.druid.segment.ColumnValueSelector;
 import org.apache.druid.segment.Cursor;
 import org.apache.druid.segment.DimensionHandlerUtils;
 import org.apache.druid.segment.DimensionSelector;
 import org.apache.druid.segment.StorageAdapter;
+import org.apache.druid.segment.VirtualColumns;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.IndexedInts;
@@ -115,14 +117,6 @@ public class GroupByQueryEngineV2
         null
     );
 
-    final boolean allSingleValueDims = query
-        .getDimensions()
-        .stream()
-        .allMatch(dimension -> {
-          final ColumnCapabilities columnCapabilities = 
storageAdapter.getColumnCapabilities(dimension.getDimension());
-          return columnCapabilities != null && 
!columnCapabilities.hasMultipleValues();
-        });
-
     final ResourceHolder<ByteBuffer> bufferHolder = 
intermediateResultsBufferPool.take();
 
     final String fudgeTimestampString = NullHandling.emptyToNullIfNeeded(
@@ -140,18 +134,38 @@ public class GroupByQueryEngineV2
               @Override
               public GroupByEngineIterator make()
               {
+                final ColumnSelectorFactory columnSelectorFactory = 
cursor.getColumnSelectorFactory();
+                final boolean allSingleValueDims = query
+                    .getDimensions()
+                    .stream()
+                    .allMatch(dimension -> {
+                      final ColumnCapabilities columnCapabilities = 
columnSelectorFactory.getColumnCapabilities(
+                          dimension.getDimension()
+                      );
+                      return columnCapabilities != null && 
!columnCapabilities.hasMultipleValues();
+                    });
+
                 ColumnSelectorPlus<GroupByColumnSelectorStrategy>[] 
selectorPlus = DimensionHandlerUtils
                     .createColumnSelectorPluses(
                         STRATEGY_FACTORY,
                         query.getDimensions(),
-                        cursor.getColumnSelectorFactory()
+                        columnSelectorFactory
                     );
                 GroupByColumnSelectorPlus[] dims = 
createGroupBySelectorPlus(selectorPlus);
 
                 final ByteBuffer buffer = bufferHolder.get();
 
-                // Check array-based aggregation is applicable
-                if (isArrayAggregateApplicable(querySpecificConfig, query, 
dims, storageAdapter, buffer)) {
+                // Check if array-based aggregation is applicable
+                final boolean useArrayAggregation = isArrayAggregateApplicable(
+                    querySpecificConfig,
+                    query,
+                    dims,
+                    storageAdapter,
+                    query.getVirtualColumns(),
+                    buffer
+                );
+
+                if (useArrayAggregation) {
                   return new ArrayAggregateIterator(
                       query,
                       querySpecificConfig,
@@ -191,6 +205,7 @@ public class GroupByQueryEngineV2
       GroupByQuery query,
       GroupByColumnSelectorPlus[] dims,
       StorageAdapter storageAdapter,
+      VirtualColumns virtualColumns,
       ByteBuffer buffer
   )
   {
@@ -206,11 +221,19 @@ public class GroupByQueryEngineV2
       columnCapabilities = null;
       cardinality = 1;
     } else if (dims.length == 1) {
+      // Only real columns can use array-based aggregation, since virtual 
columns cannot currently report their
+      // cardinality. We need to check if a virtual column exists with the 
same name, since virtual columns can shadow
+      // real columns, and we might miss that since we're going directly to 
the StorageAdapter (which only knows about
+      // real columns).
+      if (virtualColumns.exists(dims[0].getName())) {
+        return false;
+      }
+
       columnCapabilities = 
storageAdapter.getColumnCapabilities(dims[0].getName());
       cardinality = storageAdapter.getDimensionCardinality(dims[0].getName());
     } else {
-      columnCapabilities = null;
-      cardinality = -1; // ArrayAggregateIterator is not available
+      // Cannot use array-based aggregation with more than one dimension.
+      return false;
     }
 
     // Choose array-based aggregation if the grouping key is a single string 
dimension of a
@@ -225,11 +248,11 @@ public class GroupByQueryEngineV2
           aggregatorFactories
       );
 
-      // Check that all keys and aggregated values can be contained the buffer
+      // Check that all keys and aggregated values can be contained in the 
buffer
       return requiredBufferCapacity <= buffer.capacity();
+    } else {
+      return false;
     }
-
-    return false;
   }
 
   private static class GroupByStrategyFactory implements 
ColumnSelectorStrategyFactory<GroupByColumnSelectorStrategy>
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java 
b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
index 3319d09..1dfc00e 100644
--- a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java
@@ -56,6 +56,11 @@ public interface StorageAdapter extends CursorFactory
    * the column does exist but the capabilities are unknown. The latter is 
possible with dynamically discovered
    * columns.
    *
+   * Note that StorageAdapters are representations of "real" segments, so they 
are not aware of any virtual columns
+   * that may be involved in a query. In general, query engines should instead 
use the method
+   * {@link ColumnSelectorFactory#getColumnCapabilities(String)}, which 
returns capabilities for virtual columns as
+   * well.
+   *
    * @param column column name
    *
    * @return capabilities, or null
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 45a25c4..f8988d6 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -103,7 +103,10 @@ public class ExpressionVirtualColumn implements 
VirtualColumn
   @Override
   public ColumnCapabilities capabilities(String columnName)
   {
-    return new ColumnCapabilitiesImpl().setType(outputType);
+    // Note: Ideally we would only "setHasMultipleValues(true)" if the 
expression in question could potentially return
+    // multiple values. However, we don't currently have a good way of 
determining this, so to be safe we always
+    // set the flag.
+    return new 
ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true);
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to