This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 77dd5b0  ColumnCapabilities.hasMultipleValues refactor (#9731)
77dd5b0 is described below

commit 77dd5b06ae8fd7c81474cb03c0fc399cdf21646a
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jun 4 23:52:37 2020 -0700

    ColumnCapabilities.hasMultipleValues refactor (#9731)
    
    * transition ColumnCapabilities.hasMultipleValues to Capable enum, remove 
ColumnCapabilities.isComplete
    
    * remove artifical, always multi-value capabilities from 
IncrementalIndexStorageAdapter and fix up fallout from that, fix 
ColumnCapabilities merge in index merger
    
    * fix typo
    
    * remove unused method
    
    * review stuffs, revert IncrementalIndexStorageAdapater capabilities 
change, plumb lame workaround to SegmentAnalyzer
    
    * more comment
    
    * use volatile booleans
    
    * fix line length
    
    * correctly handle missing columns for vector processors
    
    * return ColumnCapabilities.Capable for 
BitmapIndexSelector.hasMultipleValues, fix vector processor selection for 
complex
    
    * false on non-existent
---
 .../druid/benchmark/BoundFilterBenchmark.java      |  3 +-
 .../DimensionPredicateFilterBenchmark.java         |  3 +-
 .../druid/benchmark/LikeFilterBenchmark.java       |  3 +-
 .../druid/query/filter/BitmapIndexSelector.java    |  3 +-
 .../epinephelinae/GroupByQueryEngineV2.java        |  8 +-
 .../druid/query/metadata/SegmentAnalyzer.java      | 28 ++++---
 .../org/apache/druid/segment/ColumnProcessors.java |  2 +-
 .../segment/ColumnSelectorBitmapIndexSelector.java |  9 ++-
 .../druid/segment/DimensionHandlerUtils.java       | 18 +++--
 .../org/apache/druid/segment/DimensionIndexer.java | 13 ++++
 .../druid/segment/DoubleDimensionIndexer.java      |  6 ++
 .../druid/segment/FloatDimensionIndexer.java       |  6 ++
 .../org/apache/druid/segment/IndexMergerV9.java    | 16 ++--
 .../apache/druid/segment/LongDimensionIndexer.java |  6 ++
 .../segment/RowBasedColumnSelectorFactory.java     |  9 ++-
 .../druid/segment/StringDimensionIndexer.java      | 13 +++-
 .../druid/segment/StringDimensionMergerV9.java     |  4 +-
 .../apache/druid/segment/column/ColumnBuilder.java |  1 -
 .../druid/segment/column/ColumnCapabilities.java   | 63 ++++++++++++----
 .../segment/column/ColumnCapabilitiesImpl.java     | 87 +++++++++++++++-------
 .../druid/segment/filter/ExpressionFilter.java     |  2 +-
 .../org/apache/druid/segment/filter/Filters.java   |  2 +-
 .../segment/incremental/IncrementalIndex.java      | 66 ++++++++--------
 .../IncrementalIndexStorageAdapter.java            | 26 ++++---
 .../table/IndexedTableColumnSelectorFactory.java   |  3 +-
 .../QueryableIndexVectorColumnSelectorFactory.java |  4 +-
 .../druid/segment/virtual/ExpressionSelectors.java | 11 ++-
 .../segment/virtual/ExpressionVirtualColumn.java   |  8 +-
 .../epinephelinae/GroupByQueryEngineV2Test.java    | 18 ++---
 .../druid/query/lookup/LookupSegmentTest.java      |  6 +-
 .../query/metadata/SegmentMetadataQueryTest.java   | 24 +++---
 .../metadata/SegmentMetadataUnionQueryTest.java    |  2 +-
 .../segment/IndexMergerV9WithSpatialIndexTest.java |  3 +-
 .../QueryableIndexColumnCapabilitiesTest.java      | 20 ++---
 .../segment/RowBasedColumnSelectorFactoryTest.java | 18 ++---
 .../druid/segment/RowBasedStorageAdapterTest.java  | 15 ++--
 .../segment/column/ColumnCapabilitiesImplTest.java |  4 +-
 .../segment/filter/ExtractionDimFilterTest.java    |  5 +-
 .../join/table/IndexedTableJoinableTest.java       |  6 +-
 .../virtual/ExpressionVirtualColumnTest.java       | 11 ++-
 .../druid/segment/virtual/VirtualColumnsTest.java  |  2 +-
 41 files changed, 347 insertions(+), 210 deletions(-)

diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java 
b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
index 6c0b6d9..f9a920a 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java
@@ -33,6 +33,7 @@ import org.apache.druid.query.filter.BitmapIndexSelector;
 import org.apache.druid.query.filter.BoundDimFilter;
 import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.BitmapSerdeFactory;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.data.GenericIndexed;
@@ -195,7 +196,7 @@ public class BoundFilterBenchmark
       }
 
       @Override
-      public boolean hasMultipleValues(final String dimension)
+      public ColumnCapabilities.Capable hasMultipleValues(final String 
dimension)
       {
         throw new UnsupportedOperationException();
       }
diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
index 8dd177f..a845bb1 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java
@@ -35,6 +35,7 @@ import org.apache.druid.query.filter.DruidFloatPredicate;
 import org.apache.druid.query.filter.DruidLongPredicate;
 import org.apache.druid.query.filter.DruidPredicateFactory;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.BitmapSerdeFactory;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.data.GenericIndexed;
@@ -166,7 +167,7 @@ public class DimensionPredicateFilterBenchmark
       }
 
       @Override
-      public boolean hasMultipleValues(final String dimension)
+      public ColumnCapabilities.Capable hasMultipleValues(final String 
dimension)
       {
         throw new UnsupportedOperationException();
       }
diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java 
b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
index 8355114..5c4a0f3 100644
--- 
a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java
@@ -35,6 +35,7 @@ import org.apache.druid.query.filter.RegexDimFilter;
 import org.apache.druid.query.filter.SelectorDimFilter;
 import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.BitmapSerdeFactory;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.data.GenericIndexed;
@@ -166,7 +167,7 @@ public class LikeFilterBenchmark
       }
 
       @Override
-      public boolean hasMultipleValues(final String dimension)
+      public ColumnCapabilities.Capable hasMultipleValues(final String 
dimension)
       {
         throw new UnsupportedOperationException();
       }
diff --git 
a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java
 
b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java
index 90307eb..fd90e74 100644
--- 
a/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java
+++ 
b/processing/src/main/java/org/apache/druid/query/filter/BitmapIndexSelector.java
@@ -24,6 +24,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory;
 import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.collections.spatial.ImmutableRTree;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.CloseableIndexed;
 
 import javax.annotation.Nullable;
@@ -35,7 +36,7 @@ public interface BitmapIndexSelector
   @MustBeClosed
   @Nullable
   CloseableIndexed<String> getDimensionValues(String dimension);
-  boolean hasMultipleValues(String dimension);
+  ColumnCapabilities.Capable hasMultipleValues(String dimension);
   int getNumRows();
   BitmapFactory getBitmapFactory();
   @Nullable
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
index 9a417c9..f19ae25 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -321,13 +321,13 @@ public class GroupByQueryEngineV2
   /**
    * Checks whether all "dimensions" are either single-valued, or if allowed, 
nonexistent. Since non-existent column
    * selectors will show up as full of nulls they are effectively single 
valued, however they can also be null during
-   * broker merge, for example with an 'inline' datasource subquery. 
'missingMeansNonexistent' is sort of a hack to let
+   * broker merge, for example with an 'inline' datasource subquery. 
'missingMeansNonExistent' is sort of a hack to let
    * the vectorized engine, which only operates on actual segments, to still 
work in this case for non-existent columns.
    */
   public static boolean isAllSingleValueDims(
       final Function<String, ColumnCapabilities> capabilitiesFunction,
       final List<DimensionSpec> dimensions,
-      final boolean missingMeansNonexistent
+      final boolean missingMeansNonExistent
   )
   {
     return dimensions
@@ -342,8 +342,8 @@ public class GroupByQueryEngineV2
 
               // Now check column capabilities.
               final ColumnCapabilities columnCapabilities = 
capabilitiesFunction.apply(dimension.getDimension());
-              return (columnCapabilities != null && 
!columnCapabilities.hasMultipleValues()) ||
-                     (missingMeansNonexistent && columnCapabilities == null);
+              return (columnCapabilities != null && 
!columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
+                     (missingMeansNonExistent && columnCapabilities == null);
             });
   }
 
diff --git 
a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java 
b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
index c5f1800..659b55b 100644
--- 
a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
+++ 
b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java
@@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ComplexColumn;
 import org.apache.druid.segment.column.DictionaryEncodedColumn;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.IndexedInts;
+import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
 import org.apache.druid.segment.serde.ComplexMetricSerde;
 import org.apache.druid.segment.serde.ComplexMetrics;
 import org.joda.time.DateTime;
@@ -101,9 +102,18 @@ public class SegmentAnalyzer
 
     for (String columnName : columnNames) {
       final ColumnHolder columnHolder = index == null ? null : 
index.getColumnHolder(columnName);
-      final ColumnCapabilities capabilities = columnHolder != null
-                                              ? columnHolder.getCapabilities()
-                                              : 
storageAdapter.getColumnCapabilities(columnName);
+      final ColumnCapabilities capabilities;
+      if (columnHolder != null) {
+        capabilities = columnHolder.getCapabilities();
+      } else {
+        // this can be removed if we get to the point where 
IncrementalIndexStorageAdapter.getColumnCapabilities
+        // accurately reports the capabilities
+        if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
+          capabilities = ((IncrementalIndexStorageAdapter) 
storageAdapter).getSnapshotColumnCapabilities(columnName);
+        } else {
+          capabilities = storageAdapter.getColumnCapabilities(columnName);
+        }
+      }
 
       final ColumnAnalysis analysis;
       final ValueType type = capabilities.getType();
@@ -138,7 +148,7 @@ public class SegmentAnalyzer
     // Add time column too
     ColumnCapabilities timeCapabilities = 
storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME);
     if (timeCapabilities == null) {
-      timeCapabilities = new 
ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
+      timeCapabilities = 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
     }
     columns.put(
         ColumnHolder.TIME_COLUMN_NAME,
@@ -172,7 +182,7 @@ public class SegmentAnalyzer
     long size = 0;
 
     if (analyzingSize()) {
-      if (capabilities.hasMultipleValues()) {
+      if (capabilities.hasMultipleValues().isTrue()) {
         return ColumnAnalysis.error("multi_value");
       }
 
@@ -181,7 +191,7 @@ public class SegmentAnalyzer
 
     return new ColumnAnalysis(
         capabilities.getType().name(),
-        capabilities.hasMultipleValues(),
+        capabilities.hasMultipleValues().isTrue(),
         size,
         null,
         null,
@@ -231,7 +241,7 @@ public class SegmentAnalyzer
 
     return new ColumnAnalysis(
         capabilities.getType().name(),
-        capabilities.hasMultipleValues(),
+        capabilities.hasMultipleValues().isTrue(),
         size,
         analyzingCardinality() ? cardinality : 0,
         min,
@@ -308,7 +318,7 @@ public class SegmentAnalyzer
 
     return new ColumnAnalysis(
         capabilities.getType().name(),
-        capabilities.hasMultipleValues(),
+        capabilities.hasMultipleValues().isTrue(),
         size,
         cardinality,
         min,
@@ -324,7 +334,7 @@ public class SegmentAnalyzer
   )
   {
     try (final ComplexColumn complexColumn = columnHolder != null ? 
(ComplexColumn) columnHolder.getColumn() : null) {
-      final boolean hasMultipleValues = capabilities != null && 
capabilities.hasMultipleValues();
+      final boolean hasMultipleValues = capabilities != null && 
capabilities.hasMultipleValues().isTrue();
       long size = 0;
 
       if (analyzingSize() && complexColumn != null) {
diff --git 
a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java 
b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
index 5fb698a..05e85fd 100644
--- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
+++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessors.java
@@ -197,6 +197,6 @@ public class ColumnProcessors
    */
   private static boolean mayBeMultiValue(@Nullable final ColumnCapabilities 
capabilities)
   {
-    return capabilities == null || !capabilities.isComplete() || 
capabilities.hasMultipleValues();
+    return capabilities == null || 
capabilities.hasMultipleValues().isMaybeTrue();
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
 
b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
index 79b6e89..bd6de7a 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/ColumnSelectorBitmapIndexSelector.java
@@ -27,6 +27,7 @@ import org.apache.druid.query.filter.BitmapIndexSelector;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 import org.apache.druid.segment.column.BaseColumn;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.DictionaryEncodedColumn;
 import org.apache.druid.segment.column.NumericColumn;
@@ -157,14 +158,18 @@ public class ColumnSelectorBitmapIndexSelector implements 
BitmapIndexSelector
   }
 
   @Override
-  public boolean hasMultipleValues(final String dimension)
+  public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
   {
     if (isVirtualColumn(dimension)) {
       return 
virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues();
     }
 
     final ColumnHolder columnHolder = index.getColumnHolder(dimension);
-    return columnHolder != null && 
columnHolder.getCapabilities().hasMultipleValues();
+    // if ColumnHolder is null, the column doesn't exist, but report as not 
having multiple values so that
+    // the empty bitmap will be used
+    return columnHolder != null
+           ? columnHolder.getCapabilities().hasMultipleValues()
+           : ColumnCapabilities.Capable.FALSE;
   }
 
   @Override
diff --git 
a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java 
b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
index 0c8b3c2..9e5a129 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java
@@ -289,15 +289,23 @@ public final class DimensionHandlerUtils
       final VectorColumnSelectorFactory selectorFactory
   )
   {
-    final ColumnCapabilities capabilities = getEffectiveCapabilities(
+    final ColumnCapabilities originalCapabilities =
+        selectorFactory.getColumnCapabilities(dimensionSpec.getDimension());
+
+    final ColumnCapabilities effectiveCapabilites = getEffectiveCapabilities(
         dimensionSpec,
-        selectorFactory.getColumnCapabilities(dimensionSpec.getDimension())
+        originalCapabilities
     );
 
-    final ValueType type = capabilities.getType();
+    final ValueType type = effectiveCapabilites.getType();
+
+    // vector selectors should never have null column capabilities, these 
signify a non-existent column, and complex
+    // columns should never be treated as a multi-value column, so always use 
single value string processor
+    final boolean forceSingleValue =
+        originalCapabilities == null || 
ValueType.COMPLEX.equals(originalCapabilities.getType());
 
     if (type == ValueType.STRING) {
-      if (capabilities.hasMultipleValues()) {
+      if (!forceSingleValue && 
effectiveCapabilites.hasMultipleValues().isMaybeTrue()) {
         return strategyFactory.makeMultiValueDimensionProcessor(
             selectorFactory.makeMultiValueDimensionSelector(dimensionSpec)
         );
@@ -328,7 +336,7 @@ public final class DimensionHandlerUtils
             selectorFactory.makeValueSelector(dimensionSpec.getDimension())
         );
       } else {
-        throw new ISE("Unsupported type[%s]", capabilities.getType());
+        throw new ISE("Unsupported type[%s]", effectiveCapabilites.getType());
       }
     }
   }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
index 99921eb..cf7631d 100644
--- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java
@@ -128,6 +128,19 @@ public interface DimensionIndexer
   EncodedKeyComponentType 
processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean 
reportParseExceptions);
 
   /**
+   * This method will be called while building an {@link IncrementalIndex} 
whenever a known dimension column (either
+   * through an explicit schema on the ingestion spec, or auto-discovered 
while processing rows) is absent in any row
+   * that is processed, to allow an indexer to account for any missing rows if 
necessary. Useful so that a string
+   * {@link DimensionSelector} built on top of an {@link IncrementalIndex} may 
accurately report
+   * {@link DimensionSelector#nameLookupPossibleInAdvance()} by allowing it to 
track if it has any implicit null valued
+   * rows.
+   *
+   * At index persist/merge time all missing columns for a row will be 
explicitly replaced with the value appropriate
+   * null or default value.
+   */
+  void setSparseIndexed();
+
+  /**
    * Gives the estimated size in bytes for the given key
    *
    * @param key dimension value array from a TimeAndDims key
diff --git 
a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
index 6645e1f..b802f75 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java
@@ -48,6 +48,12 @@ public class DoubleDimensionIndexer implements 
DimensionIndexer<Double, Double,
   }
 
   @Override
+  public void setSparseIndexed()
+  {
+    // no-op, double columns do not have a dictionary to track null values
+  }
+
+  @Override
   public long estimateEncodedKeyComponentSize(Double key)
   {
     return Double.BYTES;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
index 4132882..dce58a2 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java
@@ -49,6 +49,12 @@ public class FloatDimensionIndexer implements 
DimensionIndexer<Float, Float, Flo
   }
 
   @Override
+  public void setSparseIndexed()
+  {
+    // no-op, float columns do not have a dictionary to track null values
+  }
+
+  @Override
   public long estimateEncodedKeyComponentSize(Float key)
   {
     return Float.BYTES;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java 
b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
index bdf0bcc..066b8dc 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
@@ -165,7 +165,7 @@ public class IndexMergerV9 implements IndexMerger
       progress.progress();
       final Map<String, ValueType> metricsValueTypes = new 
TreeMap<>(Comparators.naturalNullsFirst());
       final Map<String, String> metricTypeNames = new 
TreeMap<>(Comparators.naturalNullsFirst());
-      final List<ColumnCapabilitiesImpl> dimCapabilities = 
Lists.newArrayListWithCapacity(mergedDimensions.size());
+      final List<ColumnCapabilities> dimCapabilities = 
Lists.newArrayListWithCapacity(mergedDimensions.size());
       mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, 
metricTypeNames, dimCapabilities);
 
       final Map<String, DimensionHandler> handlers = 
makeDimensionHandlers(mergedDimensions, dimCapabilities);
@@ -716,18 +716,22 @@ public class IndexMergerV9 implements IndexMerger
       final List<String> mergedDimensions,
       final Map<String, ValueType> metricsValueTypes,
       final Map<String, String> metricTypeNames,
-      final List<ColumnCapabilitiesImpl> dimCapabilities
+      final List<ColumnCapabilities> dimCapabilities
   )
   {
-    final Map<String, ColumnCapabilitiesImpl> capabilitiesMap = new 
HashMap<>();
+    final Map<String, ColumnCapabilities> capabilitiesMap = new HashMap<>();
     for (IndexableAdapter adapter : adapters) {
       for (String dimension : adapter.getDimensionNames()) {
         ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
-        capabilitiesMap.computeIfAbsent(dimension, d -> new 
ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities);
+        capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
+            ColumnCapabilitiesImpl.snapshot(capabilities)
+                                  
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
       }
       for (String metric : adapter.getMetricNames()) {
         ColumnCapabilities capabilities = adapter.getCapabilities(metric);
-        capabilitiesMap.computeIfAbsent(metric, m -> new 
ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities);
+        capabilitiesMap.compute(metric, (m, existingCapabilities) ->
+            ColumnCapabilitiesImpl.snapshot(capabilities)
+                                  
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
         metricsValueTypes.put(metric, capabilities.getType());
         metricTypeNames.put(metric, adapter.getMetricType(metric));
       }
@@ -1002,7 +1006,7 @@ public class IndexMergerV9 implements IndexMerger
 
   private Map<String, DimensionHandler> makeDimensionHandlers(
       final List<String> mergedDimensions,
-      final List<ColumnCapabilitiesImpl> dimCapabilities
+      final List<ColumnCapabilities> dimCapabilities
   )
   {
     Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
diff --git 
a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
index d357243..f2a9127 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java
@@ -49,6 +49,12 @@ public class LongDimensionIndexer implements 
DimensionIndexer<Long, Long, Long>
   }
 
   @Override
+  public void setSparseIndexed()
+  {
+    // no-op, long columns do not have a dictionary to track null values
+  }
+
+  @Override
   public long estimateEncodedKeyComponentSize(Long key)
   {
     return Long.BYTES;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
 
b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
index 7b21d1f..77e8978 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
@@ -96,7 +96,7 @@ public class RowBasedColumnSelectorFactory<T> implements 
ColumnSelectorFactory
   {
     if (ColumnHolder.TIME_COLUMN_NAME.equals(columnName)) {
       // TIME_COLUMN_NAME is handled specially; override the provided 
rowSignature.
-      return new 
ColumnCapabilitiesImpl().setType(ValueType.LONG).setIsComplete(true);
+      return 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
     } else {
       final ValueType valueType = 
rowSignature.getColumnType(columnName).orElse(null);
 
@@ -105,12 +105,13 @@ public class RowBasedColumnSelectorFactory<T> implements 
ColumnSelectorFactory
       // causes expression selectors to always treat us as arrays. If we might 
have multiple values (i.e. if our type
       // is nonnumeric), set isComplete false to compensate.
       if (valueType != null) {
+        if (valueType.isNumeric()) {
+          return 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(valueType);
+        }
         return new ColumnCapabilitiesImpl()
             .setType(valueType)
             .setDictionaryValuesUnique(false)
-            .setDictionaryValuesSorted(false)
-            // Numeric types should be reported as complete, but not STRING or 
COMPLEX (because we don't have full info)
-            .setIsComplete(valueType.isNumeric());
+            .setDictionaryValuesSorted(false);
       } else {
         return null;
       }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index ada146d..c0200e1 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -233,7 +233,8 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
   private final DimensionDictionary dimLookup;
   private final MultiValueHandling multiValueHandling;
   private final boolean hasBitmapIndexes;
-  private boolean hasMultipleValues = false;
+  private volatile boolean hasMultipleValues = false;
+  private volatile boolean isSparse = false;
 
   @Nullable
   private SortedDimensionDictionary sortedLookup;
@@ -302,6 +303,12 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
   }
 
   @Override
+  public void setSparseIndexed()
+  {
+    isSparse = true;
+  }
+
+  @Override
   public long estimateEncodedKeyComponentSize(int[] key)
   {
     // string length is being accounted for each time they are referenced, 
based on dimension handler interface,
@@ -623,7 +630,9 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
       @Override
       public boolean nameLookupPossibleInAdvance()
       {
-        return true;
+        // name lookup is possible in advance if we got a value for every row 
(setSparseIndexed was not called on this
+        // column) or we've encountered an actual null value and it is present 
in our dictionary
+        return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
       }
 
       @Nullable
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
index cff7ef9..abb4637 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionMergerV9.java
@@ -221,7 +221,7 @@ public class StringDimensionMergerV9 implements 
DimensionMergerV9
     final CompressionStrategy compressionStrategy = 
indexSpec.getDimensionCompression();
 
     String filenameBase = StringUtils.format("%s.forward_dim", dimensionName);
-    if (capabilities.hasMultipleValues()) {
+    if (capabilities.hasMultipleValues().isTrue()) {
       if (compressionStrategy != CompressionStrategy.UNCOMPRESSED) {
         encodedValueSerializer = 
V3CompressedVSizeColumnarMultiIntsSerializer.create(
             dimensionName,
@@ -533,7 +533,7 @@ public class StringDimensionMergerV9 implements 
DimensionMergerV9
   public ColumnDescriptor makeColumnDescriptor()
   {
     // Now write everything
-    boolean hasMultiValue = capabilities.hasMultipleValues();
+    boolean hasMultiValue = capabilities.hasMultipleValues().isTrue();
     final CompressionStrategy compressionStrategy = 
indexSpec.getDimensionCompression();
     final BitmapSerdeFactory bitmapSerdeFactory = 
indexSpec.getBitmapSerdeFactory();
 
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java
index 1b7163e..cde454b 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java
@@ -118,7 +118,6 @@ public class ColumnBuilder
             .setDictionaryValuesUnique(dictionaryEncoded)
             .setHasSpatialIndexes(spatialIndex != null)
             .setHasMultipleValues(hasMultipleValues)
-            .setIsComplete(true)
             .setFilterable(filterable),
         columnSupplier,
         bitmapIndex,
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
index 53f7440..a9af25b 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilities.java
@@ -19,33 +19,26 @@
 
 package org.apache.druid.segment.column;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+import org.apache.druid.java.util.common.StringUtils;
+
+import javax.annotation.Nullable;
+
 /**
  */
 public interface ColumnCapabilities
 {
   ValueType getType();
-
   boolean isDictionaryEncoded();
   Capable areDictionaryValuesSorted();
   Capable areDictionaryValuesUnique();
   boolean isRunLengthEncoded();
   boolean hasBitmapIndexes();
   boolean hasSpatialIndexes();
-  boolean hasMultipleValues();
+  Capable hasMultipleValues();
   boolean isFilterable();
 
-  /**
-   * This property indicates that this {@link ColumnCapabilities} is 
"complete" in that all properties can be expected
-   * to supply valid responses. This is mostly a hack to work around {@link 
ColumnCapabilities} generators that
-   * fail to set {@link #hasMultipleValues()} even when the associated column 
really could have multiple values.
-   * Until this situation is sorted out, if this method returns false, callers 
are encouraged to ignore
-   * {@link #hasMultipleValues()} and treat that property as if it were 
unknown.
-   *
-   * todo: replace all booleans with {@link Capable} and this method can be 
dropped
-   */
-  boolean isComplete();
-
-
   enum Capable
   {
     FALSE,
@@ -57,6 +50,21 @@ public interface ColumnCapabilities
       return this == TRUE;
     }
 
+    public boolean isMaybeTrue()
+    {
+      return isTrue() || isUnknown();
+    }
+
+    public boolean isUnknown()
+    {
+      return this == UNKNOWN;
+    }
+
+    public Capable coerceUnknownToBoolean(boolean unknownIsTrue)
+    {
+      return this == UNKNOWN ? Capable.of(unknownIsTrue) : this;
+    }
+
     public Capable and(Capable other)
     {
       if (this == UNKNOWN || other == UNKNOWN) {
@@ -65,9 +73,36 @@ public interface ColumnCapabilities
       return this == TRUE && other == TRUE ? TRUE : FALSE;
     }
 
+    public Capable or(Capable other)
+    {
+      if (this == TRUE) {
+        return TRUE;
+      }
+      return other;
+    }
+
     public static Capable of(boolean bool)
     {
       return bool ? TRUE : FALSE;
     }
+
+    @JsonCreator
+    public static Capable ofNullable(@Nullable Boolean bool)
+    {
+      return bool == null ? Capable.UNKNOWN : of(bool);
+    }
+
+    @JsonValue
+    @Nullable
+    public Boolean toJson()
+    {
+      return this == UNKNOWN ? null : isTrue();
+    }
+
+    @Override
+    public String toString()
+    {
+      return StringUtils.toLowerCase(super.toString());
+    }
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
index bee28eb..9ddbd04 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
@@ -31,15 +31,65 @@ import javax.annotation.Nullable;
  */
 public class ColumnCapabilitiesImpl implements ColumnCapabilities
 {
-  public static ColumnCapabilitiesImpl copyOf(final ColumnCapabilities other)
+  public static ColumnCapabilitiesImpl copyOf(@Nullable final 
ColumnCapabilities other)
   {
     final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl();
-    capabilities.merge(other);
-    capabilities.setFilterable(other.isFilterable());
-    capabilities.setIsComplete(other.isComplete());
+    if (other != null) {
+      capabilities.type = other.getType();
+      capabilities.dictionaryEncoded = other.isDictionaryEncoded();
+      capabilities.runLengthEncoded = other.isRunLengthEncoded();
+      capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
+      capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
+      capabilities.hasMultipleValues = other.hasMultipleValues();
+      capabilities.dictionaryValuesSorted = other.areDictionaryValuesSorted();
+      capabilities.dictionaryValuesUnique = other.areDictionaryValuesUnique();
+      capabilities.filterable = other.isFilterable();
+    }
     return capabilities;
   }
 
+  /**
+   * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
+   * {@link Capable#FALSE}, in order to present a snapshot of the state of the 
this column
+   */
+  @Nullable
+  public static ColumnCapabilitiesImpl snapshot(@Nullable final 
ColumnCapabilities capabilities)
+  {
+    return snapshot(capabilities, false);
+  }
+
+  /**
+   * Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
+   * {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a 
snapshot of the state of the this column
+   */
+  @Nullable
+  public static ColumnCapabilitiesImpl snapshot(@Nullable final 
ColumnCapabilities capabilities, boolean unknownIsTrue)
+  {
+    if (capabilities == null) {
+      return null;
+    }
+    ColumnCapabilitiesImpl copy = copyOf(capabilities);
+    copy.hasMultipleValues = 
copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue);
+    copy.dictionaryValuesSorted = 
copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue);
+    copy.dictionaryValuesUnique = 
copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue);
+    return copy;
+  }
+
+
+  /**
+   * Create a no frills, simple column with {@link ValueType} set and 
everything else false
+   */
+  public static ColumnCapabilitiesImpl 
createSimpleNumericColumnCapabilities(ValueType valueType)
+  {
+    return new ColumnCapabilitiesImpl().setType(valueType)
+                                       .setHasMultipleValues(false)
+                                       .setHasBitmapIndexes(false)
+                                       .setDictionaryEncoded(false)
+                                       .setDictionaryValuesSorted(false)
+                                       .setDictionaryValuesUnique(false)
+                                       .setHasSpatialIndexes(false);
+  }
+
   @Nullable
   private ValueType type = null;
 
@@ -47,7 +97,7 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
   private boolean runLengthEncoded = false;
   private boolean hasInvertedIndexes = false;
   private boolean hasSpatialIndexes = false;
-  private boolean hasMultipleValues = false;
+  private Capable hasMultipleValues = Capable.UNKNOWN;
 
   // These capabilities are computed at query time and not persisted in the 
segment files.
   @JsonIgnore
@@ -56,8 +106,6 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
   private Capable dictionaryValuesUnique = Capable.UNKNOWN;
   @JsonIgnore
   private boolean filterable;
-  @JsonIgnore
-  private boolean complete = false;
 
   @Override
   @JsonProperty
@@ -144,14 +192,14 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
 
   @Override
   @JsonProperty("hasMultipleValues")
-  public boolean hasMultipleValues()
+  public Capable hasMultipleValues()
   {
     return hasMultipleValues;
   }
 
   public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues)
   {
-    this.hasMultipleValues = hasMultipleValues;
+    this.hasMultipleValues = Capable.of(hasMultipleValues);
     return this;
   }
 
@@ -171,22 +219,10 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
     return this;
   }
 
-  @Override
-  public boolean isComplete()
-  {
-    return complete;
-  }
-
-  public ColumnCapabilitiesImpl setIsComplete(boolean complete)
-  {
-    this.complete = complete;
-    return this;
-  }
-
-  public void merge(ColumnCapabilities other)
+  public ColumnCapabilities merge(@Nullable ColumnCapabilities other)
   {
     if (other == null) {
-      return;
+      return this;
     }
 
     if (type == null) {
@@ -201,10 +237,11 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
     this.runLengthEncoded |= other.isRunLengthEncoded();
     this.hasInvertedIndexes |= other.hasBitmapIndexes();
     this.hasSpatialIndexes |= other.hasSpatialIndexes();
-    this.hasMultipleValues |= other.hasMultipleValues();
-    this.complete &= other.isComplete(); // these should always be the same?
     this.filterable &= other.isFilterable();
+    this.hasMultipleValues = 
this.hasMultipleValues.or(other.hasMultipleValues());
     this.dictionaryValuesSorted = 
this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted());
     this.dictionaryValuesUnique = 
this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique());
+
+    return this;
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
 
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
index ef7613c..0baa594 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java
@@ -115,7 +115,7 @@ public class ExpressionFilter implements Filter
       // multiple values. The lack of multiple values is important because 
expression filters treat multi-value
       // arrays as nulls, which doesn't permit index based filtering.
       final String column = Iterables.getOnlyElement(requiredBindings.get());
-      return selector.getBitmapIndex(column) != null && 
!selector.hasMultipleValues(column);
+      return selector.getBitmapIndex(column) != null && 
!selector.hasMultipleValues(column).isMaybeTrue();
     } else {
       // Multi-column expression.
       return false;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java 
b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index b6a717b..990127f 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -414,7 +414,7 @@ public class Filters
     if (filter.supportsBitmapIndex(indexSelector)) {
       final ColumnHolder columnHolder = 
columnSelector.getColumnHolder(dimension);
       if (columnHolder != null) {
-        return !columnHolder.getCapabilities().hasMultipleValues();
+        return 
!columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue();
       }
     }
     return false;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
 
b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
index c169d15..649aea9 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java
@@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 import com.google.common.primitives.Ints;
 import com.google.common.primitives.Longs;
 import com.google.errorprone.annotations.concurrent.GuardedBy;
@@ -89,6 +90,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedDeque;
 import java.util.concurrent.ConcurrentMap;
@@ -327,9 +329,10 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
     }
 
     //__time capabilities
-    ColumnCapabilitiesImpl timeCapabilities = new 
ColumnCapabilitiesImpl().setIsComplete(true);
-    timeCapabilities.setType(ValueType.LONG);
-    columnCapabilities.put(ColumnHolder.TIME_COLUMN_NAME, timeCapabilities);
+    columnCapabilities.put(
+        ColumnHolder.TIME_COLUMN_NAME,
+        
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
+    );
 
     // This should really be more generic
     List<SpatialDimensionSchema> spatialDimensions = 
dimensionsSpec.getSpatialDimensions();
@@ -640,12 +643,15 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
     }
 
     final List<String> rowDimensions = row.getDimensions();
-
     Object[] dims;
     List<Object> overflow = null;
     long dimsKeySize = 0;
     List<String> parseExceptionMessages = new ArrayList<>();
     synchronized (dimensionDescs) {
+      // all known dimensions are assumed missing until we encounter in the 
rowDimensions
+      Set<String> absentDimensions = Sets.newHashSet(dimensionDescs.keySet());
+
+      // first, process dimension values present in the row
       dims = new Object[dimensionDescs.size()];
       for (String dimension : rowDimensions) {
         if (Strings.isNullOrEmpty(dimension)) {
@@ -656,18 +662,13 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
         DimensionDesc desc = dimensionDescs.get(dimension);
         if (desc != null) {
           capabilities = desc.getCapabilities();
+          absentDimensions.remove(dimension);
         } else {
           wasNewDim = true;
           capabilities = columnCapabilities.get(dimension);
           if (capabilities == null) {
-            capabilities = new ColumnCapabilitiesImpl();
             // For schemaless type discovery, assume everything is a String 
for now, can change later.
-            capabilities.setType(ValueType.STRING);
-            capabilities.setDictionaryEncoded(true);
-            capabilities.setHasBitmapIndexes(true);
-            capabilities.setDictionaryValuesSorted(false);
-            capabilities.setDictionaryValuesUnique(true);
-            capabilities.setIsComplete(true);
+            capabilities = makeCapabilitiesFromValueType(ValueType.STRING);
             columnCapabilities.put(dimension, capabilities);
           }
           DimensionHandler handler = 
DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
@@ -677,23 +678,24 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
         DimensionIndexer indexer = desc.getIndexer();
         Object dimsKey = null;
         try {
-          dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent(
-              row.getRaw(dimension),
-              true
-          );
+          dimsKey = 
indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), 
true);
         }
         catch (ParseException pe) {
           parseExceptionMessages.add(pe.getMessage());
         }
         dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey);
         // Set column capabilities as data is coming in
-        if (!capabilities.hasMultipleValues() &&
+        if (!capabilities.hasMultipleValues().isTrue() &&
             dimsKey != null &&
             handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
           capabilities.setHasMultipleValues(true);
         }
 
         if (wasNewDim) {
+          // unless this is the first row we are processing, all newly 
discovered columns will be sparse
+          if (maxIngestedEventTime != null) {
+            indexer.setSparseIndexed();
+          }
           if (overflow == null) {
             overflow = new ArrayList<>();
           }
@@ -713,6 +715,11 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
           dims[desc.getIndex()] = dimsKey;
         }
       }
+
+      // process any dimensions with missing values in the row
+      for (String missing : absentDimensions) {
+        dimensionDescs.get(missing).getIndexer().setSparseIndexed();
+      }
     }
 
     if (overflow != null) {
@@ -923,16 +930,16 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
 
   private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type)
   {
-    ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl();
-    capabilities.setDictionaryEncoded(type == ValueType.STRING);
-    capabilities.setHasBitmapIndexes(type == ValueType.STRING);
     if (type == ValueType.STRING) {
-      capabilities.setDictionaryValuesUnique(true);
-      capabilities.setDictionaryValuesSorted(false);
+      // we start out as not having multiple values, but this might change as 
we encounter them
+      return new ColumnCapabilitiesImpl().setType(type)
+                                         .setHasBitmapIndexes(true)
+                                         .setDictionaryEncoded(true)
+                                         .setDictionaryValuesUnique(true)
+                                         .setDictionaryValuesSorted(false);
+    } else {
+      return 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type);
     }
-    capabilities.setType(type);
-    capabilities.setIsComplete(true);
-    return capabilities;
   }
 
   /**
@@ -988,6 +995,7 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
     return new IncrementalIndexStorageAdapter(this);
   }
 
+  @Nullable
   public ColumnCapabilities getCapabilities(String column)
   {
     return columnCapabilities.get(column);
@@ -1124,18 +1132,18 @@ public abstract class IncrementalIndex<AggregatorType> 
extends AbstractIndex imp
       this.name = factory.getName();
 
       String typeInfo = factory.getTypeName();
-      this.capabilities = new ColumnCapabilitiesImpl().setIsComplete(true);
       if ("float".equalsIgnoreCase(typeInfo)) {
-        capabilities.setType(ValueType.FLOAT);
+        capabilities = 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT);
         this.type = typeInfo;
       } else if ("long".equalsIgnoreCase(typeInfo)) {
-        capabilities.setType(ValueType.LONG);
+        capabilities = 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
         this.type = typeInfo;
       } else if ("double".equalsIgnoreCase(typeInfo)) {
-        capabilities.setType(ValueType.DOUBLE);
+        capabilities = 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE);
         this.type = typeInfo;
       } else {
-        capabilities.setType(ValueType.COMPLEX);
+        // in an ideal world complex type reports its actual column 
capabilities...
+        capabilities = 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.COMPLEX);
         this.type = ComplexMetrics.getSerdeForType(typeInfo).getTypeName();
       }
     }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
 
b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
index cc792a5..8e8520d 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java
@@ -39,7 +39,6 @@ import org.apache.druid.segment.VirtualColumns;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnHolder;
-import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.data.Indexed;
 import org.apache.druid.segment.data.ListIndexed;
 import org.apache.druid.segment.filter.BooleanValueMatcher;
@@ -150,16 +149,23 @@ public class IncrementalIndexStorageAdapter implements 
StorageAdapter
     // at index-persisting time to determine if we need a multi-value column 
or not. However, that means we
     // need to tweak the capabilities here in the StorageAdapter (a query-time 
construct), so at query time
     // they appear multi-valued.
+    //
+    // Note that this could be improved if we snapshot the capabilities at 
cursor creation time and feed those through
+    // to the StringDimensionIndexer so the selector built on top of it can 
produce values from the snapshot state of
+    // multi-valuedness at cursor creation time, instead of the latest state, 
and getSnapshotColumnCapabilities could
+    // be removed.
+    return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), 
true);
+  }
 
-    final ColumnCapabilities capabilitiesFromIndex = 
index.getCapabilities(column);
-    final IncrementalIndex.DimensionDesc dimensionDesc = 
index.getDimension(column);
-    if (dimensionDesc != null && dimensionDesc.getCapabilities().getType() == 
ValueType.STRING) {
-      final ColumnCapabilitiesImpl retVal = 
ColumnCapabilitiesImpl.copyOf(capabilitiesFromIndex);
-      retVal.setHasMultipleValues(true);
-      return retVal;
-    } else {
-      return capabilitiesFromIndex;
-    }
+  /**
+   * Sad workaround for {@link 
org.apache.druid.query.metadata.SegmentAnalyzer} to deal with the fact that the
+   * response from {@link #getColumnCapabilities} is not accurate for string 
columns, in that it reports all string
+   * string columns as having multiple values. This method returns the actual 
capabilities of the underlying
+   * {@link IncrementalIndex}at the time this method is called.
+   */
+  public ColumnCapabilities getSnapshotColumnCapabilities(String column)
+  {
+    return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column));
   }
 
   @Override
diff --git 
a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java
 
b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java
index 7e6466d..00cb51f 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableColumnSelectorFactory.java
@@ -58,8 +58,9 @@ public class IndexedTableColumnSelectorFactory implements 
ColumnSelectorFactory
 
       capabilities.setDictionaryValuesSorted(false);
       capabilities.setDictionaryValuesUnique(false);
+      capabilities.setHasMultipleValues(false);
 
-      return capabilities.setIsComplete(true);
+      return capabilities;
     } else {
       return null;
     }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
 
b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
index b9cfe4f..269ac38 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java
@@ -85,7 +85,7 @@ public class QueryableIndexVectorColumnSelectorFactory 
implements VectorColumnSe
           if (holder == null
               || !holder.getCapabilities().isDictionaryEncoded()
               || holder.getCapabilities().getType() != ValueType.STRING
-              || !holder.getCapabilities().hasMultipleValues()) {
+              || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
             throw new ISE(
                 "Column[%s] is not a multi-value string column, do not ask for 
a multi-value selector",
                 spec.getDimension()
@@ -125,7 +125,7 @@ public class QueryableIndexVectorColumnSelectorFactory 
implements VectorColumnSe
             return NilVectorSelector.create(offset);
           }
 
-          if (holder.getCapabilities().hasMultipleValues()) {
+          if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
             // Asking for a single-value dimension selector on a multi-value 
column gets you an error.
             throw new ISE("Column[%s] is multi-value, do not ask for a 
single-value selector", spec.getDimension());
           }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
index 5ae6987..5ab4e46 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java
@@ -154,8 +154,7 @@ public class ExpressionSelectors
       } else if (capabilities != null
                  && capabilities.getType() == ValueType.STRING
                  && capabilities.isDictionaryEncoded()
-                 && capabilities.isComplete()
-                 && !capabilities.hasMultipleValues()
+                 && !capabilities.hasMultipleValues().isMaybeTrue()
                  && exprDetails.getArrayBindings().isEmpty()) {
         // Optimization for expressions that hit one scalar string column and 
nothing else.
         return new SingleStringInputCachingExpressionColumnValueSelector(
@@ -227,7 +226,7 @@ public class ExpressionSelectors
       if (capabilities != null
           && capabilities.getType() == ValueType.STRING
           && capabilities.isDictionaryEncoded()
-          && capabilities.isComplete()
+          && !capabilities.hasMultipleValues().isUnknown()
           && !exprDetails.hasInputArrays()
           && !exprDetails.isOutputArray()
       ) {
@@ -356,7 +355,7 @@ public class ExpressionSelectors
       final ColumnCapabilities columnCapabilities = columnSelectorFactory
           .getColumnCapabilities(columnName);
       final ValueType nativeType = columnCapabilities != null ? 
columnCapabilities.getType() : null;
-      final boolean multiVal = columnCapabilities != null && 
columnCapabilities.hasMultipleValues();
+      final boolean multiVal = columnCapabilities != null && 
columnCapabilities.hasMultipleValues().isTrue();
       final Supplier<Object> supplier;
 
       if (nativeType == ValueType.FLOAT) {
@@ -597,11 +596,11 @@ public class ExpressionSelectors
     for (String column : columns) {
       final ColumnCapabilities capabilities = 
columnSelectorFactory.getColumnCapabilities(column);
       if (capabilities != null) {
-        if (capabilities.hasMultipleValues()) {
+        if (capabilities.hasMultipleValues().isTrue()) {
           actualArrays.add(column);
         } else if (
-            !capabilities.isComplete() &&
             capabilities.getType().equals(ValueType.STRING) &&
+            capabilities.hasMultipleValues().isMaybeTrue() &&
             !exprDetails.getArrayBindings().contains(column)
         ) {
           unknownIfArrays.add(column);
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
index 4823651..59b846f 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java
@@ -131,10 +131,10 @@ public class ExpressionVirtualColumn implements 
VirtualColumn
   @Override
   public ColumnCapabilities capabilities(String columnName)
   {
-    // Note: Ideally we would only "setHasMultipleValues(true)" if the 
expression in question could potentially return
-    // multiple values. However, we don't currently have a good way of 
determining this, so to be safe we always
-    // set the flag.
-    return new 
ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true);
+    // Note: Ideally we would fill out additional information instead of 
leaving capabilities as 'unknown', e.g. examine
+    // if the expression in question could potentially return multiple values 
and anything else. However, we don't
+    // currently have a good way of determining this, so fill this out more 
once we do
+    return new ColumnCapabilitiesImpl().setType(outputType);
   }
 
   @Override
diff --git 
a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java
 
b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java
index d71d70a..75a12a9 100644
--- 
a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java
+++ 
b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2Test.java
@@ -47,8 +47,7 @@ public class GroupByQueryEngineV2Test
                                                                   
.setHasMultipleValues(false)
                                                                   
.setDictionaryEncoded(true)
                                                                   
.setDictionaryValuesSorted(true)
-                                                                  
.setDictionaryValuesUnique(true)
-                                                                  
.setIsComplete(true);
+                                                                  
.setDictionaryValuesUnique(true);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once();
     EasyMock.replay(factory);
     Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
@@ -63,8 +62,7 @@ public class GroupByQueryEngineV2Test
                                                                   
.setHasMultipleValues(false)
                                                                   
.setDictionaryEncoded(false)
                                                                   
.setDictionaryValuesSorted(false)
-                                                                  
.setDictionaryValuesUnique(true)
-                                                                  
.setIsComplete(true);
+                                                                  
.setDictionaryValuesUnique(true);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once();
     EasyMock.replay(factory);
     Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
@@ -79,8 +77,7 @@ public class GroupByQueryEngineV2Test
                                                                   
.setHasMultipleValues(false)
                                                                   
.setDictionaryEncoded(false)
                                                                   
.setDictionaryValuesSorted(false)
-                                                                  
.setDictionaryValuesUnique(false)
-                                                                  
.setIsComplete(true);
+                                                                  
.setDictionaryValuesUnique(false);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once();
     EasyMock.replay(factory);
     Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
@@ -95,8 +92,7 @@ public class GroupByQueryEngineV2Test
                                                                   
.setHasMultipleValues(false)
                                                                   
.setDictionaryEncoded(true)
                                                                   
.setDictionaryValuesSorted(false)
-                                                                  
.setDictionaryValuesUnique(false)
-                                                                  
.setIsComplete(true);
+                                                                  
.setDictionaryValuesUnique(false);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once();
     EasyMock.replay(factory);
     Assert.assertFalse(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
@@ -111,8 +107,7 @@ public class GroupByQueryEngineV2Test
                                                                       
.setHasMultipleValues(false)
                                                                       
.setDictionaryEncoded(false)
                                                                       
.setDictionaryValuesSorted(false)
-                                                                      
.setDictionaryValuesUnique(false)
-                                                                      
.setIsComplete(true);
+                                                                      
.setDictionaryValuesUnique(false);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).anyTimes();
     EasyMock.replay(factory);
     Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
@@ -131,8 +126,7 @@ public class GroupByQueryEngineV2Test
                                                                       
.setHasMultipleValues(false)
                                                                       
.setDictionaryEncoded(false)
                                                                       
.setDictionaryValuesSorted(false)
-                                                                      
.setDictionaryValuesUnique(false)
-                                                                      
.setIsComplete(true);
+                                                                      
.setDictionaryValuesUnique(false);
     
EasyMock.expect(factory.getColumnCapabilities(DIM)).andReturn(capabilities).once();
     EasyMock.replay(factory);
     Assert.assertTrue(GroupByQueryEngineV2.canPushDownLimit(factory, DIM));
diff --git 
a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java 
b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
index ce60351..3ca72aa 100644
--- 
a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java
@@ -137,9 +137,8 @@ public class LookupSegmentTest
     // Note: the "k" column does not actually have multiple values, but the 
RowBasedStorageAdapter doesn't allow
     // reporting complete single-valued capabilities. It would be good to 
change this in the future, so query engines
     // running on top of lookups can take advantage of singly-valued 
optimizations.
-    Assert.assertFalse(capabilities.hasMultipleValues());
+    Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
     Assert.assertFalse(capabilities.isDictionaryEncoded());
-    Assert.assertFalse(capabilities.isComplete());
   }
 
   @Test
@@ -151,9 +150,8 @@ public class LookupSegmentTest
     // reporting complete single-valued capabilities. It would be good to 
change this in the future, so query engines
     // running on top of lookups can take advantage of singly-valued 
optimizations.
     Assert.assertEquals(ValueType.STRING, capabilities.getType());
-    Assert.assertFalse(capabilities.hasMultipleValues());
+    Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
     Assert.assertFalse(capabilities.isDictionaryEncoded());
-    Assert.assertFalse(capabilities.isComplete());
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java
 
b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java
index 5f044ec..9da0671 100644
--- 
a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java
@@ -227,7 +227,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1,
+                false,
                 preferedSize1,
                 1,
                 "preferred",
@@ -268,7 +268,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap2,
+                false,
                 placementSize2,
                 1,
                 null,
@@ -304,7 +304,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 0,
                 null,
@@ -372,7 +372,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 1,
                 null,
@@ -440,7 +440,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 1,
                 null,
@@ -509,7 +509,7 @@ public class SegmentMetadataQueryTest
     }
     ColumnAnalysis analysis = new ColumnAnalysis(
         ValueType.STRING.toString(),
-        !mmap1 || !mmap2,
+        false,
         size1 + size2,
         1,
         "preferred",
@@ -530,7 +530,7 @@ public class SegmentMetadataQueryTest
     }
     ColumnAnalysis analysis = new ColumnAnalysis(
         ValueType.STRING.toString(),
-        !mmap1 || !mmap2,
+        false,
         size1 + size2,
         3,
         "spot",
@@ -551,7 +551,7 @@ public class SegmentMetadataQueryTest
     }
     ColumnAnalysis analysis = new ColumnAnalysis(
         ValueType.STRING.toString(),
-        !mmap1 || !mmap2,
+        false,
         size1 + size2,
         9,
         "automotive",
@@ -637,7 +637,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 0,
                 null,
@@ -699,7 +699,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 0,
                 null,
@@ -757,7 +757,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 0,
                 null,
@@ -815,7 +815,7 @@ public class SegmentMetadataQueryTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap1 || !mmap2,
+                false,
                 0,
                 0,
                 null,
diff --git 
a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java
 
b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java
index ccfcce7..55d8b18 100644
--- 
a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java
@@ -102,7 +102,7 @@ public class SegmentMetadataUnionQueryTest extends 
InitializedNullHandlingTest
             "placement",
             new ColumnAnalysis(
                 ValueType.STRING.toString(),
-                !mmap,
+                false,
                 mmap ? 43524 : 43056,
                 1,
                 "preferred",
diff --git 
a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java
 
b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java
index 1c68a21..b4525c0 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/IndexMergerV9WithSpatialIndexTest.java
@@ -48,6 +48,7 @@ import 
org.apache.druid.query.timeseries.TimeseriesResultValue;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
+import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.joda.time.Interval;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -66,7 +67,7 @@ import java.util.concurrent.ThreadLocalRandom;
 /**
  */
 @RunWith(Parameterized.class)
-public class IndexMergerV9WithSpatialIndexTest
+public class IndexMergerV9WithSpatialIndexTest extends 
InitializedNullHandlingTest
 {
 
   public static final int NUM_POINTS = 5000;
diff --git 
a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
 
b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
index dc102e6..c7783e9 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java
@@ -38,6 +38,7 @@ import 
org.apache.druid.query.aggregation.FloatSumAggregatorFactory;
 import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
 import 
org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
 import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.incremental.IncrementalIndex;
@@ -152,9 +153,12 @@ public class QueryableIndexColumnCapabilitiesTest extends 
InitializedNullHandlin
     Assert.assertTrue(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    // multi-value is unknown unless explicitly set to 'true'
+    Assert.assertTrue(caps.hasMultipleValues().isUnknown());
+    // at index merge or query time we 'complete' the capabilities to take a 
snapshot of the current state,
+    // coercing any 'UNKNOWN' values to false
+    
Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
 
     caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities();
     Assert.assertEquals(ValueType.STRING, caps.getType());
@@ -162,9 +166,8 @@ public class QueryableIndexColumnCapabilitiesTest extends 
InitializedNullHandlin
     Assert.assertTrue(caps.isDictionaryEncoded());
     Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -176,9 +179,8 @@ public class QueryableIndexColumnCapabilitiesTest extends 
InitializedNullHandlin
     Assert.assertTrue(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertTrue(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
 
     caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities();
     Assert.assertEquals(ValueType.STRING, caps.getType());
@@ -186,9 +188,8 @@ public class QueryableIndexColumnCapabilitiesTest extends 
InitializedNullHandlin
     Assert.assertTrue(caps.isDictionaryEncoded());
     Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertTrue(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -206,8 +207,7 @@ public class QueryableIndexColumnCapabilitiesTest extends 
InitializedNullHandlin
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
 
b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
index 8886c7d..e12dac4 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java
@@ -54,9 +54,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -69,9 +68,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isUnknown());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertFalse(caps.isComplete());
   }
 
   @Test
@@ -84,9 +82,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -99,9 +96,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -114,9 +110,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertTrue(caps.isComplete());
   }
 
   @Test
@@ -129,9 +124,8 @@ public class RowBasedColumnSelectorFactoryTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertFalse(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isUnknown());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertFalse(caps.isComplete());
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java
 
b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java
index a4fc2f2..652a590 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java
@@ -332,8 +332,7 @@ public class RowBasedStorageAdapterTest
 
     final ColumnCapabilities capabilities = 
adapter.getColumnCapabilities(ValueType.FLOAT.name());
     Assert.assertEquals(ValueType.FLOAT, capabilities.getType());
-    Assert.assertFalse(capabilities.hasMultipleValues());
-    Assert.assertTrue(capabilities.isComplete());
+    Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
   }
 
   @Test
@@ -343,8 +342,7 @@ public class RowBasedStorageAdapterTest
 
     final ColumnCapabilities capabilities = 
adapter.getColumnCapabilities(ValueType.DOUBLE.name());
     Assert.assertEquals(ValueType.DOUBLE, capabilities.getType());
-    Assert.assertFalse(capabilities.hasMultipleValues());
-    Assert.assertTrue(capabilities.isComplete());
+    Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
   }
 
   @Test
@@ -354,8 +352,7 @@ public class RowBasedStorageAdapterTest
 
     final ColumnCapabilities capabilities = 
adapter.getColumnCapabilities(ValueType.LONG.name());
     Assert.assertEquals(ValueType.LONG, capabilities.getType());
-    Assert.assertFalse(capabilities.hasMultipleValues());
-    Assert.assertTrue(capabilities.isComplete());
+    Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
   }
 
   @Test
@@ -369,8 +366,7 @@ public class RowBasedStorageAdapterTest
     // Note: unlike numeric types, STRING-typed columns might have multiple 
values, so they report as incomplete. It
     // would be good in the future to support some way of changing this, when 
it is known ahead of time that
     // multi-valuedness is definitely happening or is definitely impossible.
-    Assert.assertFalse(capabilities.hasMultipleValues());
-    Assert.assertFalse(capabilities.isComplete());
+    Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
   }
 
   @Test
@@ -382,8 +378,7 @@ public class RowBasedStorageAdapterTest
 
     // Note: unlike numeric types, COMPLEX-typed columns report that they are 
incomplete.
     Assert.assertEquals(ValueType.COMPLEX, capabilities.getType());
-    Assert.assertFalse(capabilities.hasMultipleValues());
-    Assert.assertFalse(capabilities.isComplete());
+    Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
 
b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
index 53e93c7..e221edd 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java
@@ -47,7 +47,7 @@ public class ColumnCapabilitiesImplTest
     Assert.assertTrue(cc.isDictionaryEncoded());
     Assert.assertFalse(cc.isRunLengthEncoded());
     Assert.assertTrue(cc.hasSpatialIndexes());
-    Assert.assertTrue(cc.hasMultipleValues());
+    Assert.assertTrue(cc.hasMultipleValues().isTrue());
     Assert.assertTrue(cc.hasBitmapIndexes());
     Assert.assertFalse(cc.isFilterable());
   }
@@ -72,7 +72,7 @@ public class ColumnCapabilitiesImplTest
     Assert.assertTrue(cc.isDictionaryEncoded());
     Assert.assertTrue(cc.isRunLengthEncoded());
     Assert.assertTrue(cc.hasSpatialIndexes());
-    Assert.assertTrue(cc.hasMultipleValues());
+    Assert.assertTrue(cc.hasMultipleValues().isTrue());
     Assert.assertTrue(cc.hasBitmapIndexes());
     Assert.assertFalse(cc.isFilterable());
   }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
 
b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
index c7a560c..3ed33b8 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java
@@ -36,6 +36,7 @@ import org.apache.druid.query.filter.Filter;
 import org.apache.druid.query.filter.SelectorDimFilter;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.BitmapSerdeFactory;
 import org.apache.druid.segment.data.CloseableIndexed;
 import org.apache.druid.segment.data.ConciseBitmapSerdeFactory;
@@ -146,9 +147,9 @@ public class ExtractionDimFilterTest
     }
 
     @Override
-    public boolean hasMultipleValues(final String dimension)
+    public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
     {
-      return true;
+      return ColumnCapabilities.Capable.TRUE;
     }
 
     @Override
diff --git 
a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
 
b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
index 20cfdb7..13cc45f 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java
@@ -137,9 +137,8 @@ public class IndexedTableJoinableTest
     Assert.assertEquals(ValueType.STRING, capabilities.getType());
     Assert.assertTrue(capabilities.isDictionaryEncoded());
     Assert.assertFalse(capabilities.hasBitmapIndexes());
-    Assert.assertFalse(capabilities.hasMultipleValues());
+    Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(capabilities.hasSpatialIndexes());
-    Assert.assertTrue(capabilities.isComplete());
   }
 
   @Test
@@ -149,9 +148,8 @@ public class IndexedTableJoinableTest
     Assert.assertEquals(ValueType.LONG, capabilities.getType());
     Assert.assertFalse(capabilities.isDictionaryEncoded());
     Assert.assertFalse(capabilities.hasBitmapIndexes());
-    Assert.assertFalse(capabilities.hasMultipleValues());
+    Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(capabilities.hasSpatialIndexes());
-    Assert.assertTrue(capabilities.isComplete());
   }
 
   @Test
diff --git 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index 526c63c..16e090d 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -357,8 +357,7 @@ public class ExpressionVirtualColumnTest extends 
InitializedNullHandlingTest
       {
         return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
                                            .setHasMultipleValues(true)
-                                           .setDictionaryEncoded(true)
-                                           .setIsComplete(true);
+                                           .setDictionaryEncoded(true);
       }
     };
     final BaseObjectColumnValueSelector selectorImplicit =
@@ -814,9 +813,9 @@ public class ExpressionVirtualColumnTest extends 
InitializedNullHandlingTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertTrue(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isUnknown());
+    Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertFalse(caps.isComplete());
 
     caps = Z_CONCAT_X.capabilities("expr");
     Assert.assertEquals(ValueType.STRING, caps.getType());
@@ -824,8 +823,8 @@ public class ExpressionVirtualColumnTest extends 
InitializedNullHandlingTest
     Assert.assertFalse(caps.isDictionaryEncoded());
     Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
     Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
-    Assert.assertTrue(caps.hasMultipleValues());
+    Assert.assertTrue(caps.hasMultipleValues().isUnknown());
+    Assert.assertTrue(caps.hasMultipleValues().isMaybeTrue());
     Assert.assertFalse(caps.hasSpatialIndexes());
-    Assert.assertFalse(caps.isComplete());
   }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
 
b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
index b1a9eee..597dbb6 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/virtual/VirtualColumnsTest.java
@@ -417,7 +417,7 @@ public class VirtualColumnsTest extends 
InitializedNullHandlingTest
     @Override
     public ColumnCapabilities capabilities(String columnName)
     {
-      return new ColumnCapabilitiesImpl().setType(ValueType.LONG);
+      return 
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
     }
 
     @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to