This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new b3c238457ff fix unnest bugs (#16723)
b3c238457ff is described below

commit b3c238457ff8ecc33118f8b45e212a50f5ec95d4
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jul 11 13:48:15 2024 -0700

    fix unnest bugs (#16723)
    
    changes:
    * fixes a bug with unnest storage adapter not preserving underlying columns 
dictionary uniqueness when allowing dimension selector cursor
    * fixes a bug with unnest on realtime segments with empty rows incorrectly 
specifying index 0 as the row dictionary value
---
 .../druid/segment/UnnestDimensionCursor.java       |  83 ++--
 .../apache/druid/segment/UnnestStorageAdapter.java |   4 +-
 .../apache/druid/query/QueryRunnerTestHelper.java  |   3 +-
 .../groupby/UnnestGroupByQueryRunnerTest.java      | 430 ++++++++++++++++++++-
 .../druid/segment/UnnestStorageAdapterTest.java    |   5 +-
 5 files changed, 455 insertions(+), 70 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java 
b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
index 98f0d0949c8..4d4aeaf7046 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.segment;
 
+import com.google.common.base.Preconditions;
 import org.apache.druid.query.BaseQuery;
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.dimension.DimensionSpec;
@@ -27,6 +28,7 @@ import org.apache.druid.query.filter.ValueMatcher;
 import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.data.IndexedInts;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
 import org.joda.time.DateTime;
 
 import javax.annotation.Nullable;
@@ -69,10 +71,12 @@ public class UnnestDimensionCursor implements Cursor
   private final String outputName;
   private final ColumnSelectorFactory baseColumnSelectorFactory;
   private int index;
-  @Nullable
+  @MonotonicNonNull
   private IndexedInts indexedIntsForCurrentRow;
   private boolean needInitialization;
   private SingleIndexInts indexIntsForRow;
+  private final int nullId;
+  private final int idOffset;
 
   public UnnestDimensionCursor(
       Cursor cursor,
@@ -91,11 +95,22 @@ public class UnnestDimensionCursor implements Cursor
     this.index = 0;
     this.outputName = outputColumnName;
     this.needInitialization = true;
+    // this shouldn't happen, but just in case...
+    final IdLookup lookup = Preconditions.checkNotNull(dimSelector.idLookup());
+    final int nullId = lookup.lookupId(null);
+    if (nullId < 0) {
+      this.idOffset = 1;
+      this.nullId = 0;
+    } else {
+      this.idOffset = 0;
+      this.nullId = nullId;
+    }
   }
 
   @Override
   public ColumnSelectorFactory getColumnSelectorFactory()
   {
+
     return new ColumnSelectorFactory()
     {
       @Override
@@ -110,15 +125,13 @@ public class UnnestDimensionCursor implements Cursor
           @Override
           public IndexedInts getRow()
           {
-            // This object reference has been created
-            // during the call to initialize and referenced henceforth
             return indexIntsForRow;
           }
 
           @Override
           public ValueMatcher makeValueMatcher(@Nullable String value)
           {
-            final int idForLookup = idLookup().lookupId(value);
+            final int idForLookup = dimSelector.idLookup().lookupId(value);
             if (idForLookup < 0) {
               return new ValueMatcher()
               {
@@ -131,7 +144,7 @@ public class UnnestDimensionCursor implements Cursor
                       return true;
                     }
                     final int rowId = indexedIntsForCurrentRow.get(index);
-                    return lookupName(rowId) == null;
+                    return dimSelector.lookupName(rowId) == null;
                   }
                   return false;
                 }
@@ -156,7 +169,7 @@ public class UnnestDimensionCursor implements Cursor
                   return includeUnknown;
                 }
                 final int rowId = indexedIntsForCurrentRow.get(index);
-                return (includeUnknown && lookupName(rowId) == null) || 
idForLookup == rowId;
+                return (includeUnknown && dimSelector.lookupName(rowId) == 
null) || idForLookup == rowId;
               }
 
               @Override
@@ -183,10 +196,10 @@ public class UnnestDimensionCursor implements Cursor
           @Override
           public Object getObject()
           {
-            if (indexedIntsForCurrentRow == null || 
indexedIntsForCurrentRow.size() == 0) {
+            if (indexedIntsForCurrentRow.size() == 0) {
               return null;
             }
-            return lookupName(indexedIntsForCurrentRow.get(index));
+            return dimSelector.lookupName(indexedIntsForCurrentRow.get(index));
           }
 
           @Override
@@ -198,14 +211,14 @@ public class UnnestDimensionCursor implements Cursor
           @Override
           public int getValueCardinality()
           {
-            return dimSelector.getValueCardinality();
+            return dimSelector.getValueCardinality() + idOffset;
           }
 
           @Nullable
           @Override
           public String lookupName(int id)
           {
-            return dimSelector.lookupName(id);
+            return dimSelector.lookupName(id - idOffset);
           }
 
           @Override
@@ -218,21 +231,19 @@ public class UnnestDimensionCursor implements Cursor
           @Override
           public IdLookup idLookup()
           {
-            return dimSelector.idLookup();
+            return name -> name == null ? nullId : 
dimSelector.idLookup().lookupId(name) + idOffset;
           }
         };
       }
 
-      /*
-      This ideally should not be called. If called delegate using the 
makeDimensionSelector
-       */
       @Override
       public ColumnValueSelector makeColumnValueSelector(String columnName)
       {
-        if (!outputName.equals(columnName)) {
-          return baseColumnSelectorFactory.makeColumnValueSelector(columnName);
+        if (outputName.equals(columnName)) {
+          return makeDimensionSelector(DefaultDimensionSpec.of(columnName));
         }
-        return makeDimensionSelector(DefaultDimensionSpec.of(columnName));
+
+        return baseColumnSelectorFactory.makeColumnValueSelector(columnName);
       }
 
       @Nullable
@@ -304,11 +315,7 @@ public class UnnestDimensionCursor implements Cursor
   {
     index = 0;
     this.indexIntsForRow = new SingleIndexInts();
-
-    if (dimSelector.getObject() != null) {
-      this.indexedIntsForCurrentRow = dimSelector.getRow();
-    }
-
+    this.indexedIntsForCurrentRow = dimSelector.getRow();
     needInitialization = false;
   }
 
@@ -320,30 +327,19 @@ public class UnnestDimensionCursor implements Cursor
    */
   private void advanceAndUpdate()
   {
-    if (indexedIntsForCurrentRow == null) {
-      index = 0;
+    if (index >= indexedIntsForCurrentRow.size() - 1) {
       if (!baseCursor.isDone()) {
         baseCursor.advanceUninterruptibly();
-        if (!baseCursor.isDone()) {
-          indexedIntsForCurrentRow = dimSelector.getRow();
-        }
       }
-    } else {
-      if (index >= indexedIntsForCurrentRow.size() - 1) {
-        if (!baseCursor.isDone()) {
-          baseCursor.advanceUninterruptibly();
-        }
-        if (!baseCursor.isDone()) {
-          indexedIntsForCurrentRow = dimSelector.getRow();
-        }
-        index = 0;
-      } else {
-        ++index;
+      if (!baseCursor.isDone()) {
+        indexedIntsForCurrentRow = dimSelector.getRow();
       }
+      index = 0;
+    } else {
+      ++index;
     }
   }
 
-
   // Helper class to help in returning
   // getRow from the dimensionSelector
   // This is set in the initialize method
@@ -366,12 +362,11 @@ public class UnnestDimensionCursor implements Cursor
     @Override
     public int get(int idx)
     {
-      // need to get value from the indexed ints
-      // only if it is non null and has at least 1 value
-      if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() 
> 0) {
-        return indexedIntsForCurrentRow.get(index);
+      // everything that calls get also checks size
+      if (indexedIntsForCurrentRow.size() == 0) {
+        return nullId;
       }
-      return 0;
+      return idOffset + indexedIntsForCurrentRow.get(index);
     }
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java 
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
index 8735844c946..ff4994210e1 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
@@ -582,10 +582,12 @@ public class UnnestStorageAdapter implements 
StorageAdapter
       final TypeSignature<ValueType> outputType =
           capabilities.isArray() ? capabilities.getElementType() : 
capabilities.toColumnType();
 
+      final boolean useDimensionCursor = useDimensionCursor(capabilities);
       return ColumnCapabilitiesImpl.createDefault()
                                    .setType(outputType)
                                    .setHasMultipleValues(false)
-                                   
.setDictionaryEncoded(useDimensionCursor(capabilities));
+                                   .setDictionaryEncoded(useDimensionCursor)
+                                   
.setDictionaryValuesUnique(useDimensionCursor);
     }
   }
 
diff --git 
a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java 
b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
index f4699639af1..e562f72955e 100644
--- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
+++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
@@ -524,8 +524,7 @@ public class QueryRunnerTestHelper
     final DataSource base = query.getDataSource();
 
     final SegmentReference segmentReference = 
base.createSegmentMapFunction(query, new AtomicLong())
-                                                  
.apply(ReferenceCountingSegment.wrapRootGenerationSegment(
-                                                      adapter));
+                                                  
.apply(ReferenceCountingSegment.wrapRootGenerationSegment(adapter));
     return makeQueryRunner(factory, segmentReference, runnerName);
   }
 
diff --git 
a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
 
b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
index 13a33191e8e..3976a20bd2d 100644
--- 
a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
@@ -25,7 +25,10 @@ import com.google.common.base.Suppliers;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.ListBasedInputRow;
+import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.math.expr.ExprMacroTable;
 import org.apache.druid.query.DataSource;
 import org.apache.druid.query.DirectQueryProcessingPool;
@@ -43,21 +46,30 @@ import 
org.apache.druid.query.extraction.StringFormatExtractionFn;
 import org.apache.druid.query.filter.EqualityFilter;
 import org.apache.druid.query.filter.NotDimFilter;
 import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
+import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
 import org.apache.druid.segment.IncrementalIndexSegment;
+import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.QueryableIndexSegment;
 import org.apache.druid.segment.TestHelper;
 import org.apache.druid.segment.TestIndex;
 import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.incremental.IncrementalIndex;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.joda.time.DateTime;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -78,6 +90,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
+  @Rule
+  public final TemporaryFolder tempFolder = new TemporaryFolder();
+
   public UnnestGroupByQueryRunnerTest(
       GroupByQueryConfig config,
       GroupByQueryRunnerFactory factory,
@@ -214,6 +229,11 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
     return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, 
vals);
   }
 
+  private static ResultRow makeRow(final GroupByQuery query, final DateTime 
timestamp, final Object... vals)
+  {
+    return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, 
vals);
+  }
+
   @Test
   public void testGroupBy()
   {
@@ -423,6 +443,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
   }
 
   @Test
@@ -462,6 +485,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"missing-column");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"missing-column");
   }
 
   @Test
@@ -538,6 +564,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-column");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-column");
   }
 
   @Test
@@ -627,6 +656,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
   }
 
   @Test
@@ -678,35 +710,335 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-columns");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
   }
 
-  /**
-   * Use this method instead of makeQueryBuilder() to make sure the context is 
set properly. Also, avoid
-   * setContext in tests. Only use overrideContext.
-   */
-  private GroupByQuery.Builder makeQueryBuilder()
+  @Test
+  public void testGroupByOnUnnestedStringColumnWithNullStuff() throws 
IOException
   {
-    return GroupByQuery.builder().overrideContext(makeContext());
+    cannotVectorize();
+
+    final String dim = "mvd";
+    final DateTime timestamp = DateTimes.nowUtc();
+    final RowSignature signature = RowSignature.builder()
+                                               .add(dim, ColumnType.STRING)
+                                               .build();
+    List<String> dims = Collections.singletonList(dim);
+    IndexBuilder bob =
+        IndexBuilder.create()
+                    .schema(
+                        IncrementalIndexSchema.builder()
+                                              .withRollup(false)
+                                              .build()
+                    )
+                    .rows(
+                        ImmutableList.of(
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(ImmutableList.of("a", "b", "c"))),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of()),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(ImmutableList.of())),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(""))
+                        )
+                    );
+
+    final DataSource unnestDataSource = UnnestDataSource.create(
+        new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+        new ExpressionVirtualColumn(
+            "v0",
+            "mvd",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
+    );
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(unnestDataSource)
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = NullHandling.sqlCompatible() ? 
Arrays.asList(
+        makeRow(query, timestamp, "v0", null, "rows", 2L),
+        makeRow(query, timestamp, "v0", "", "rows", 1L),
+        makeRow(query, timestamp, "v0", "a", "rows", 1L),
+        makeRow(query, timestamp, "v0", "b", "rows", 1L),
+        makeRow(query, timestamp, "v0", "c", "rows", 1L)
+    ) : Arrays.asList(
+        makeRow(query, timestamp, "v0", null, "rows", 3L),
+        makeRow(query, timestamp, "v0", "a", "rows", 1L),
+        makeRow(query, timestamp, "v0", "b", "rows", 1L),
+        makeRow(query, timestamp, "v0", "c", "rows", 1L)
+    );
+
+    Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls");
+
+    results = runQuery(query, 
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls");
   }
 
-  private Iterable<ResultRow> runQuery(final GroupByQuery query, final 
IncrementalIndex index)
+  @Test
+  public void testGroupByOnUnnestedStringColumnWithMoreNullStuff() throws 
IOException
   {
-    final QueryRunner<?> queryRunner = factory.mergeRunners(
-        DirectQueryProcessingPool.INSTANCE,
-        Collections.singletonList(
-            QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
-                factory,
-                new IncrementalIndexSegment(
-                    index,
-                    QueryRunnerTestHelper.SEGMENT_ID
-                ),
-                query,
-                "rtIndexvc"
-            )
-        )
+    cannotVectorize();
+
+    final String dim = "mvd";
+    final DateTime timestamp = DateTimes.nowUtc();
+    final RowSignature signature = RowSignature.builder()
+                                               .add(dim, ColumnType.STRING)
+                                               .build();
+    List<String> dims = Collections.singletonList(dim);
+    IndexBuilder bob =
+        IndexBuilder.create()
+                    .schema(
+                        IncrementalIndexSchema.builder()
+                                              .withRollup(false)
+                                              .build()
+                    )
+                    .rows(
+                        ImmutableList.of(
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Arrays.asList("a", "b", "c"))),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.emptyList()),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(null)),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Collections.emptyList())),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Arrays.asList(null, null))),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Collections.singletonList(null))),
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Collections.singletonList("")))
+                        )
+                    );
+
+    final DataSource unnestDataSource = UnnestDataSource.create(
+        new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+        new ExpressionVirtualColumn(
+            "v0",
+            "mvd",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
     );
 
-    return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(unnestDataSource)
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    // make sure results are consistent with grouping directly on the column 
with implicit unnest
+    GroupByQuery regularQuery = makeQueryBuilder()
+        .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE))
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("mvd", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = NullHandling.sqlCompatible() ? 
Arrays.asList(
+        makeRow(query, timestamp, "v0", null, "rows", 6L),
+        makeRow(query, timestamp, "v0", "", "rows", 1L),
+        makeRow(query, timestamp, "v0", "a", "rows", 1L),
+        makeRow(query, timestamp, "v0", "b", "rows", 1L),
+        makeRow(query, timestamp, "v0", "c", "rows", 1L)
+    ) : Arrays.asList(
+        makeRow(query, timestamp, "v0", null, "rows", 7L),
+        makeRow(query, timestamp, "v0", "a", "rows", 1L),
+        makeRow(query, timestamp, "v0", "b", "rows", 1L),
+        makeRow(query, timestamp, "v0", "c", "rows", 1L)
+    );
+
+    Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls");
+
+    results = runQuery(regularQuery, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls");
+
+    results = runQuery(query, 
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls");
+  }
+
+  @Test
+  public void testGroupByOnUnnestEmptyTable()
+  {
+    cannotVectorize();
+    IndexBuilder bob =
+        IndexBuilder.create()
+                    .rows(ImmutableList.of());
+
+    final DataSource unnestDataSource = UnnestDataSource.create(
+        new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+        new ExpressionVirtualColumn(
+            "v0",
+            "mvd",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
+    );
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(unnestDataSource)
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Collections.emptyList();
+
+    Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-empty");
+
+    // can only test realtime since empty cannot be persisted
+  }
+
+  @Test
+  public void testGroupByOnUnnestEmptyRows()
+  {
+    cannotVectorize();
+    final String dim = "mvd";
+    final DateTime timestamp = DateTimes.nowUtc();
+    final RowSignature signature = RowSignature.builder()
+                                               .add(dim, ColumnType.STRING)
+                                               .build();
+    List<String> dims = Collections.singletonList(dim);
+    IndexBuilder bob =
+        IndexBuilder.create()
+                    .schema(
+                        IncrementalIndexSchema.builder()
+                                              .withRollup(false)
+                                              .build()
+                    )
+                    .rows(
+                        ImmutableList.of(
+                            new ListBasedInputRow(signature, timestamp, dims, 
Collections.singletonList(Collections.emptyList()))
+                        )
+                    );
+
+    final DataSource unnestDataSource = UnnestDataSource.create(
+        new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+        new ExpressionVirtualColumn(
+            "v0",
+            "mvd",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
+    );
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(unnestDataSource)
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    // make sure results are consistent with grouping directly on the column 
with implicit unnest
+    GroupByQuery regularQuery = makeQueryBuilder()
+        .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE))
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("mvd", "v0"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = Collections.singletonList(
+        makeRow(query, timestamp, "v0", null, "rows", 1L)
+    );
+
+    Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-empty");
+
+    results = runQuery(regularQuery, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-empty");
+
+    // can only test realtime since empty cannot be persisted
+  }
+
+  @Test
+  public void testGroupByOnUnnestedStringColumnDoubleUnnest() throws 
IOException
+  {
+    // not really a sane query to write, but it shouldn't behave differently 
than a single unnest
+    // the issue is that the dimension selector handles null differently than 
if arrays are used from a column value
+    // selector. the dimension selector cursor puts nulls in the output to be 
compatible with implict unnest used by
+    // group-by, while the column selector cursor
+    cannotVectorize();
+
+    final String dim = "mvd";
+    final DateTime timestamp = DateTimes.nowUtc();
+    final RowSignature signature = RowSignature.builder()
+                                               .add(dim, ColumnType.STRING)
+                                               .build();
+    List<String> dims = Collections.singletonList(dim);
+    IndexBuilder bob =
+        IndexBuilder.create()
+                    .schema(
+                        IncrementalIndexSchema.builder()
+                                              .withRollup(false)
+                                              .build()
+                    )
+                    .rows(
+                        ImmutableList.of(
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(ImmutableList.of("a", "b", "c"))),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of()),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(ImmutableList.of())),
+                            new ListBasedInputRow(signature, timestamp, dims, 
ImmutableList.of(""))
+                        )
+                    );
+
+    final DataSource unnestDataSource = UnnestDataSource.create(
+        new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+        new ExpressionVirtualColumn(
+            "v0",
+            "mvd",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
+    );
+    final DataSource extraUnnested = UnnestDataSource.create(
+        unnestDataSource,
+        new ExpressionVirtualColumn(
+            "v1",
+            "v0",
+            ColumnType.STRING,
+            TestExprMacroTable.INSTANCE
+        ),
+        null
+    );
+
+    GroupByQuery query = makeQueryBuilder()
+        .setDataSource(extraUnnested)
+        .setQuerySegmentSpec(new 
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+        .setDimensions(new DefaultDimensionSpec("v1", "v1"))
+        .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+        .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+        .build();
+
+    List<ResultRow> expectedResults = NullHandling.sqlCompatible() ? 
Arrays.asList(
+        makeRow(query, timestamp, "v1", null, "rows", 2L),
+        makeRow(query, timestamp, "v1", "", "rows", 1L),
+        makeRow(query, timestamp, "v1", "a", "rows", 1L),
+        makeRow(query, timestamp, "v1", "b", "rows", 1L),
+        makeRow(query, timestamp, "v1", "c", "rows", 1L)
+    ) : Arrays.asList(
+        makeRow(query, timestamp, "v1", null, "rows", 3L),
+        makeRow(query, timestamp, "v1", "a", "rows", 1L),
+        makeRow(query, timestamp, "v1", "b", "rows", 1L),
+        makeRow(query, timestamp, "v1", "c", "rows", 1L)
+    );
+
+    Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls-double-unnest");
+
+    results = runQuery(query, 
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"group-by-unnested-string-nulls-double-unnest");
   }
 
   @Test
@@ -751,6 +1083,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
   }
 
   @Test
@@ -837,6 +1172,9 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
 
     Iterable<ResultRow> results = runQuery(query, 
TestIndex.getIncrementalTestIndex());
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
+
+    results = runQuery(query, TestIndex.getMMappedTestIndex());
+    TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
   }
 
   @Test
@@ -931,6 +1269,56 @@ public class UnnestGroupByQueryRunnerTest extends 
InitializedNullHandlingTest
     TestHelper.assertExpectedObjects(expectedResults, results, 
"groupBy-on-unnested-virtual-column");
   }
 
+
+  /**
+   * Use this method instead of makeQueryBuilder() to make sure the context is 
set properly. Also, avoid
+   * setContext in tests. Only use overrideContext.
+   */
+  private GroupByQuery.Builder makeQueryBuilder()
+  {
+    return GroupByQuery.builder().overrideContext(makeContext());
+  }
+
+  private Iterable<ResultRow> runQuery(final GroupByQuery query, final 
IncrementalIndex index)
+  {
+    final QueryRunner<?> queryRunner = factory.mergeRunners(
+        DirectQueryProcessingPool.INSTANCE,
+        Collections.singletonList(
+            QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
+                factory,
+                new IncrementalIndexSegment(
+                    index,
+                    QueryRunnerTestHelper.SEGMENT_ID
+                ),
+                query,
+                "rtIndexvc"
+            )
+        )
+    );
+
+    return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+  }
+
+  private Iterable<ResultRow> runQuery(final GroupByQuery query, 
QueryableIndex index)
+  {
+    final QueryRunner<?> queryRunner = factory.mergeRunners(
+        DirectQueryProcessingPool.INSTANCE,
+        Collections.singletonList(
+            QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
+                factory,
+                new QueryableIndexSegment(
+                    index,
+                    QueryRunnerTestHelper.SEGMENT_ID
+                ),
+                query,
+                "mmapIndexvc"
+            )
+        )
+    );
+
+    return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+  }
+
   private Map<String, Object> makeContext()
   {
     return ImmutableMap.<String, Object>builder()
diff --git 
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
 
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
index 8dd81a94ce9..b467bc6c938 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
@@ -299,11 +299,12 @@ public class UnnestStorageAdapterTest extends 
InitializedNullHandlingTest
       }
       /*
       each row has 8 entries.
-      unnest 2 rows -> 16 entries also the value cardinality
+      unnest 2 rows -> 16 entries also the value cardinality, but null is not 
present in the dictionary and so is
+                       fabricated so cardinality is 17
       unnest of unnest -> 16*8 = 128 rows
        */
       Assert.assertEquals(count, 128);
-      Assert.assertEquals(dimSelector.getValueCardinality(), 16);
+      Assert.assertEquals(dimSelector.getValueCardinality(), 17);
       return null;
     });
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to