This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b3c238457ff fix unnest bugs (#16723)
b3c238457ff is described below
commit b3c238457ff8ecc33118f8b45e212a50f5ec95d4
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jul 11 13:48:15 2024 -0700
fix unnest bugs (#16723)
changes:
* fixes a bug with unnest storage adapter not preserving underlying columns
dictionary uniqueness when allowing dimension selector cursor
* fixes a bug with unnest on realtime segments with empty rows incorrectly
specifying index 0 as the row dictionary value
---
.../druid/segment/UnnestDimensionCursor.java | 83 ++--
.../apache/druid/segment/UnnestStorageAdapter.java | 4 +-
.../apache/druid/query/QueryRunnerTestHelper.java | 3 +-
.../groupby/UnnestGroupByQueryRunnerTest.java | 430 ++++++++++++++++++++-
.../druid/segment/UnnestStorageAdapterTest.java | 5 +-
5 files changed, 455 insertions(+), 70 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
index 98f0d0949c8..4d4aeaf7046 100644
---
a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
+++
b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
@@ -19,6 +19,7 @@
package org.apache.druid.segment;
+import com.google.common.base.Preconditions;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
@@ -27,6 +28,7 @@ import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.IndexedInts;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
@@ -69,10 +71,12 @@ public class UnnestDimensionCursor implements Cursor
private final String outputName;
private final ColumnSelectorFactory baseColumnSelectorFactory;
private int index;
- @Nullable
+ @MonotonicNonNull
private IndexedInts indexedIntsForCurrentRow;
private boolean needInitialization;
private SingleIndexInts indexIntsForRow;
+ private final int nullId;
+ private final int idOffset;
public UnnestDimensionCursor(
Cursor cursor,
@@ -91,11 +95,22 @@ public class UnnestDimensionCursor implements Cursor
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
+ // this shouldn't happen, but just in case...
+ final IdLookup lookup = Preconditions.checkNotNull(dimSelector.idLookup());
+ final int nullId = lookup.lookupId(null);
+ if (nullId < 0) {
+ this.idOffset = 1;
+ this.nullId = 0;
+ } else {
+ this.idOffset = 0;
+ this.nullId = nullId;
+ }
}
@Override
public ColumnSelectorFactory getColumnSelectorFactory()
{
+
return new ColumnSelectorFactory()
{
@Override
@@ -110,15 +125,13 @@ public class UnnestDimensionCursor implements Cursor
@Override
public IndexedInts getRow()
{
- // This object reference has been created
- // during the call to initialize and referenced henceforth
return indexIntsForRow;
}
@Override
public ValueMatcher makeValueMatcher(@Nullable String value)
{
- final int idForLookup = idLookup().lookupId(value);
+ final int idForLookup = dimSelector.idLookup().lookupId(value);
if (idForLookup < 0) {
return new ValueMatcher()
{
@@ -131,7 +144,7 @@ public class UnnestDimensionCursor implements Cursor
return true;
}
final int rowId = indexedIntsForCurrentRow.get(index);
- return lookupName(rowId) == null;
+ return dimSelector.lookupName(rowId) == null;
}
return false;
}
@@ -156,7 +169,7 @@ public class UnnestDimensionCursor implements Cursor
return includeUnknown;
}
final int rowId = indexedIntsForCurrentRow.get(index);
- return (includeUnknown && lookupName(rowId) == null) ||
idForLookup == rowId;
+ return (includeUnknown && dimSelector.lookupName(rowId) ==
null) || idForLookup == rowId;
}
@Override
@@ -183,10 +196,10 @@ public class UnnestDimensionCursor implements Cursor
@Override
public Object getObject()
{
- if (indexedIntsForCurrentRow == null ||
indexedIntsForCurrentRow.size() == 0) {
+ if (indexedIntsForCurrentRow.size() == 0) {
return null;
}
- return lookupName(indexedIntsForCurrentRow.get(index));
+ return dimSelector.lookupName(indexedIntsForCurrentRow.get(index));
}
@Override
@@ -198,14 +211,14 @@ public class UnnestDimensionCursor implements Cursor
@Override
public int getValueCardinality()
{
- return dimSelector.getValueCardinality();
+ return dimSelector.getValueCardinality() + idOffset;
}
@Nullable
@Override
public String lookupName(int id)
{
- return dimSelector.lookupName(id);
+ return dimSelector.lookupName(id - idOffset);
}
@Override
@@ -218,21 +231,19 @@ public class UnnestDimensionCursor implements Cursor
@Override
public IdLookup idLookup()
{
- return dimSelector.idLookup();
+ return name -> name == null ? nullId :
dimSelector.idLookup().lookupId(name) + idOffset;
}
};
}
- /*
- This ideally should not be called. If called delegate using the
makeDimensionSelector
- */
@Override
public ColumnValueSelector makeColumnValueSelector(String columnName)
{
- if (!outputName.equals(columnName)) {
- return baseColumnSelectorFactory.makeColumnValueSelector(columnName);
+ if (outputName.equals(columnName)) {
+ return makeDimensionSelector(DefaultDimensionSpec.of(columnName));
}
- return makeDimensionSelector(DefaultDimensionSpec.of(columnName));
+
+ return baseColumnSelectorFactory.makeColumnValueSelector(columnName);
}
@Nullable
@@ -304,11 +315,7 @@ public class UnnestDimensionCursor implements Cursor
{
index = 0;
this.indexIntsForRow = new SingleIndexInts();
-
- if (dimSelector.getObject() != null) {
- this.indexedIntsForCurrentRow = dimSelector.getRow();
- }
-
+ this.indexedIntsForCurrentRow = dimSelector.getRow();
needInitialization = false;
}
@@ -320,30 +327,19 @@ public class UnnestDimensionCursor implements Cursor
*/
private void advanceAndUpdate()
{
- if (indexedIntsForCurrentRow == null) {
- index = 0;
+ if (index >= indexedIntsForCurrentRow.size() - 1) {
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
- if (!baseCursor.isDone()) {
- indexedIntsForCurrentRow = dimSelector.getRow();
- }
}
- } else {
- if (index >= indexedIntsForCurrentRow.size() - 1) {
- if (!baseCursor.isDone()) {
- baseCursor.advanceUninterruptibly();
- }
- if (!baseCursor.isDone()) {
- indexedIntsForCurrentRow = dimSelector.getRow();
- }
- index = 0;
- } else {
- ++index;
+ if (!baseCursor.isDone()) {
+ indexedIntsForCurrentRow = dimSelector.getRow();
}
+ index = 0;
+ } else {
+ ++index;
}
}
-
// Helper class to help in returning
// getRow from the dimensionSelector
// This is set in the initialize method
@@ -366,12 +362,11 @@ public class UnnestDimensionCursor implements Cursor
@Override
public int get(int idx)
{
- // need to get value from the indexed ints
- // only if it is non null and has at least 1 value
- if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size()
> 0) {
- return indexedIntsForCurrentRow.get(index);
+ // everything that calls get also checks size
+ if (indexedIntsForCurrentRow.size() == 0) {
+ return nullId;
}
- return 0;
+ return idOffset + indexedIntsForCurrentRow.get(index);
}
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
index 8735844c946..ff4994210e1 100644
---
a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
+++
b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
@@ -582,10 +582,12 @@ public class UnnestStorageAdapter implements
StorageAdapter
final TypeSignature<ValueType> outputType =
capabilities.isArray() ? capabilities.getElementType() :
capabilities.toColumnType();
+ final boolean useDimensionCursor = useDimensionCursor(capabilities);
return ColumnCapabilitiesImpl.createDefault()
.setType(outputType)
.setHasMultipleValues(false)
-
.setDictionaryEncoded(useDimensionCursor(capabilities));
+ .setDictionaryEncoded(useDimensionCursor)
+
.setDictionaryValuesUnique(useDimensionCursor);
}
}
diff --git
a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
index f4699639af1..e562f72955e 100644
--- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
+++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java
@@ -524,8 +524,7 @@ public class QueryRunnerTestHelper
final DataSource base = query.getDataSource();
final SegmentReference segmentReference =
base.createSegmentMapFunction(query, new AtomicLong())
-
.apply(ReferenceCountingSegment.wrapRootGenerationSegment(
- adapter));
+
.apply(ReferenceCountingSegment.wrapRootGenerationSegment(adapter));
return makeQueryRunner(factory, segmentReference, runnerName);
}
diff --git
a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
index 13a33191e8e..3976a20bd2d 100644
---
a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
+++
b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java
@@ -25,7 +25,10 @@ import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.ListBasedInputRow;
+import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.DirectQueryProcessingPool;
@@ -43,21 +46,30 @@ import
org.apache.druid.query.extraction.StringFormatExtractionFn;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
+import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.IncrementalIndexSegment;
+import org.apache.druid.segment.IndexBuilder;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndex;
+import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.testing.InitializedNullHandlingTest;
+import org.joda.time.DateTime;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -78,6 +90,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
@Rule
public ExpectedException expectedException = ExpectedException.none();
+ @Rule
+ public final TemporaryFolder tempFolder = new TemporaryFolder();
+
public UnnestGroupByQueryRunnerTest(
GroupByQueryConfig config,
GroupByQueryRunnerFactory factory,
@@ -214,6 +229,11 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp,
vals);
}
+ private static ResultRow makeRow(final GroupByQuery query, final DateTime
timestamp, final Object... vals)
+ {
+ return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp,
vals);
+ }
+
@Test
public void testGroupBy()
{
@@ -423,6 +443,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
}
@Test
@@ -462,6 +485,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"missing-column");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"missing-column");
}
@Test
@@ -538,6 +564,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-column");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-column");
}
@Test
@@ -627,6 +656,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
}
@Test
@@ -678,35 +710,335 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-columns");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
}
- /**
- * Use this method instead of makeQueryBuilder() to make sure the context is
set properly. Also, avoid
- * setContext in tests. Only use overrideContext.
- */
- private GroupByQuery.Builder makeQueryBuilder()
+ @Test
+ public void testGroupByOnUnnestedStringColumnWithNullStuff() throws
IOException
{
- return GroupByQuery.builder().overrideContext(makeContext());
+ cannotVectorize();
+
+ final String dim = "mvd";
+ final DateTime timestamp = DateTimes.nowUtc();
+ final RowSignature signature = RowSignature.builder()
+ .add(dim, ColumnType.STRING)
+ .build();
+ List<String> dims = Collections.singletonList(dim);
+ IndexBuilder bob =
+ IndexBuilder.create()
+ .schema(
+ IncrementalIndexSchema.builder()
+ .withRollup(false)
+ .build()
+ )
+ .rows(
+ ImmutableList.of(
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(ImmutableList.of("a", "b", "c"))),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of()),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(ImmutableList.of())),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(""))
+ )
+ );
+
+ final DataSource unnestDataSource = UnnestDataSource.create(
+ new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+ new ExpressionVirtualColumn(
+ "v0",
+ "mvd",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
+ );
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(unnestDataSource)
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = NullHandling.sqlCompatible() ?
Arrays.asList(
+ makeRow(query, timestamp, "v0", null, "rows", 2L),
+ makeRow(query, timestamp, "v0", "", "rows", 1L),
+ makeRow(query, timestamp, "v0", "a", "rows", 1L),
+ makeRow(query, timestamp, "v0", "b", "rows", 1L),
+ makeRow(query, timestamp, "v0", "c", "rows", 1L)
+ ) : Arrays.asList(
+ makeRow(query, timestamp, "v0", null, "rows", 3L),
+ makeRow(query, timestamp, "v0", "a", "rows", 1L),
+ makeRow(query, timestamp, "v0", "b", "rows", 1L),
+ makeRow(query, timestamp, "v0", "c", "rows", 1L)
+ );
+
+ Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls");
+
+ results = runQuery(query,
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls");
}
- private Iterable<ResultRow> runQuery(final GroupByQuery query, final
IncrementalIndex index)
+ @Test
+ public void testGroupByOnUnnestedStringColumnWithMoreNullStuff() throws
IOException
{
- final QueryRunner<?> queryRunner = factory.mergeRunners(
- DirectQueryProcessingPool.INSTANCE,
- Collections.singletonList(
- QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
- factory,
- new IncrementalIndexSegment(
- index,
- QueryRunnerTestHelper.SEGMENT_ID
- ),
- query,
- "rtIndexvc"
- )
- )
+ cannotVectorize();
+
+ final String dim = "mvd";
+ final DateTime timestamp = DateTimes.nowUtc();
+ final RowSignature signature = RowSignature.builder()
+ .add(dim, ColumnType.STRING)
+ .build();
+ List<String> dims = Collections.singletonList(dim);
+ IndexBuilder bob =
+ IndexBuilder.create()
+ .schema(
+ IncrementalIndexSchema.builder()
+ .withRollup(false)
+ .build()
+ )
+ .rows(
+ ImmutableList.of(
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Arrays.asList("a", "b", "c"))),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.emptyList()),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(null)),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Collections.emptyList())),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Arrays.asList(null, null))),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Collections.singletonList(null))),
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Collections.singletonList("")))
+ )
+ );
+
+ final DataSource unnestDataSource = UnnestDataSource.create(
+ new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+ new ExpressionVirtualColumn(
+ "v0",
+ "mvd",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
);
- return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(unnestDataSource)
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ // make sure results are consistent with grouping directly on the column
with implicit unnest
+ GroupByQuery regularQuery = makeQueryBuilder()
+ .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE))
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("mvd", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = NullHandling.sqlCompatible() ?
Arrays.asList(
+ makeRow(query, timestamp, "v0", null, "rows", 6L),
+ makeRow(query, timestamp, "v0", "", "rows", 1L),
+ makeRow(query, timestamp, "v0", "a", "rows", 1L),
+ makeRow(query, timestamp, "v0", "b", "rows", 1L),
+ makeRow(query, timestamp, "v0", "c", "rows", 1L)
+ ) : Arrays.asList(
+ makeRow(query, timestamp, "v0", null, "rows", 7L),
+ makeRow(query, timestamp, "v0", "a", "rows", 1L),
+ makeRow(query, timestamp, "v0", "b", "rows", 1L),
+ makeRow(query, timestamp, "v0", "c", "rows", 1L)
+ );
+
+ Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls");
+
+ results = runQuery(regularQuery, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls");
+
+ results = runQuery(query,
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls");
+ }
+
+ @Test
+ public void testGroupByOnUnnestEmptyTable()
+ {
+ cannotVectorize();
+ IndexBuilder bob =
+ IndexBuilder.create()
+ .rows(ImmutableList.of());
+
+ final DataSource unnestDataSource = UnnestDataSource.create(
+ new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+ new ExpressionVirtualColumn(
+ "v0",
+ "mvd",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
+ );
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(unnestDataSource)
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Collections.emptyList();
+
+ Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-empty");
+
+ // can only test realtime since empty cannot be persisted
+ }
+
+ @Test
+ public void testGroupByOnUnnestEmptyRows()
+ {
+ cannotVectorize();
+ final String dim = "mvd";
+ final DateTime timestamp = DateTimes.nowUtc();
+ final RowSignature signature = RowSignature.builder()
+ .add(dim, ColumnType.STRING)
+ .build();
+ List<String> dims = Collections.singletonList(dim);
+ IndexBuilder bob =
+ IndexBuilder.create()
+ .schema(
+ IncrementalIndexSchema.builder()
+ .withRollup(false)
+ .build()
+ )
+ .rows(
+ ImmutableList.of(
+ new ListBasedInputRow(signature, timestamp, dims,
Collections.singletonList(Collections.emptyList()))
+ )
+ );
+
+ final DataSource unnestDataSource = UnnestDataSource.create(
+ new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+ new ExpressionVirtualColumn(
+ "v0",
+ "mvd",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
+ );
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(unnestDataSource)
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("v0", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ // make sure results are consistent with grouping directly on the column
with implicit unnest
+ GroupByQuery regularQuery = makeQueryBuilder()
+ .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE))
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("mvd", "v0"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = Collections.singletonList(
+ makeRow(query, timestamp, "v0", null, "rows", 1L)
+ );
+
+ Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-empty");
+
+ results = runQuery(regularQuery, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-empty");
+
+ // can only test realtime since empty cannot be persisted
+ }
+
+ @Test
+ public void testGroupByOnUnnestedStringColumnDoubleUnnest() throws
IOException
+ {
+ // not really a sane query to write, but it shouldn't behave differently
than a single unnest
+ // the issue is that the dimension selector handles null differently than
if arrays are used from a column value
+ // selector. the dimension selector cursor puts nulls in the output to be
compatible with implict unnest used by
+ // group-by, while the column selector cursor
+ cannotVectorize();
+
+ final String dim = "mvd";
+ final DateTime timestamp = DateTimes.nowUtc();
+ final RowSignature signature = RowSignature.builder()
+ .add(dim, ColumnType.STRING)
+ .build();
+ List<String> dims = Collections.singletonList(dim);
+ IndexBuilder bob =
+ IndexBuilder.create()
+ .schema(
+ IncrementalIndexSchema.builder()
+ .withRollup(false)
+ .build()
+ )
+ .rows(
+ ImmutableList.of(
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(ImmutableList.of("a", "b", "c"))),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of()),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(ImmutableList.of())),
+ new ListBasedInputRow(signature, timestamp, dims,
ImmutableList.of(""))
+ )
+ );
+
+ final DataSource unnestDataSource = UnnestDataSource.create(
+ new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
+ new ExpressionVirtualColumn(
+ "v0",
+ "mvd",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
+ );
+ final DataSource extraUnnested = UnnestDataSource.create(
+ unnestDataSource,
+ new ExpressionVirtualColumn(
+ "v1",
+ "v0",
+ ColumnType.STRING,
+ TestExprMacroTable.INSTANCE
+ ),
+ null
+ );
+
+ GroupByQuery query = makeQueryBuilder()
+ .setDataSource(extraUnnested)
+ .setQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY)))
+ .setDimensions(new DefaultDimensionSpec("v1", "v1"))
+ .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
+ .setGranularity(QueryRunnerTestHelper.ALL_GRAN)
+ .build();
+
+ List<ResultRow> expectedResults = NullHandling.sqlCompatible() ?
Arrays.asList(
+ makeRow(query, timestamp, "v1", null, "rows", 2L),
+ makeRow(query, timestamp, "v1", "", "rows", 1L),
+ makeRow(query, timestamp, "v1", "a", "rows", 1L),
+ makeRow(query, timestamp, "v1", "b", "rows", 1L),
+ makeRow(query, timestamp, "v1", "c", "rows", 1L)
+ ) : Arrays.asList(
+ makeRow(query, timestamp, "v1", null, "rows", 3L),
+ makeRow(query, timestamp, "v1", "a", "rows", 1L),
+ makeRow(query, timestamp, "v1", "b", "rows", 1L),
+ makeRow(query, timestamp, "v1", "c", "rows", 1L)
+ );
+
+ Iterable<ResultRow> results = runQuery(query, bob.buildIncrementalIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls-double-unnest");
+
+ results = runQuery(query,
bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"group-by-unnested-string-nulls-double-unnest");
}
@Test
@@ -751,6 +1083,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
}
@Test
@@ -837,6 +1172,9 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
Iterable<ResultRow> results = runQuery(query,
TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
+
+ results = runQuery(query, TestIndex.getMMappedTestIndex());
+ TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
}
@Test
@@ -931,6 +1269,56 @@ public class UnnestGroupByQueryRunnerTest extends
InitializedNullHandlingTest
TestHelper.assertExpectedObjects(expectedResults, results,
"groupBy-on-unnested-virtual-column");
}
+
+ /**
+ * Use this method instead of makeQueryBuilder() to make sure the context is
set properly. Also, avoid
+ * setContext in tests. Only use overrideContext.
+ */
+ private GroupByQuery.Builder makeQueryBuilder()
+ {
+ return GroupByQuery.builder().overrideContext(makeContext());
+ }
+
+ private Iterable<ResultRow> runQuery(final GroupByQuery query, final
IncrementalIndex index)
+ {
+ final QueryRunner<?> queryRunner = factory.mergeRunners(
+ DirectQueryProcessingPool.INSTANCE,
+ Collections.singletonList(
+ QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
+ factory,
+ new IncrementalIndexSegment(
+ index,
+ QueryRunnerTestHelper.SEGMENT_ID
+ ),
+ query,
+ "rtIndexvc"
+ )
+ )
+ );
+
+ return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+ }
+
+ private Iterable<ResultRow> runQuery(final GroupByQuery query,
QueryableIndex index)
+ {
+ final QueryRunner<?> queryRunner = factory.mergeRunners(
+ DirectQueryProcessingPool.INSTANCE,
+ Collections.singletonList(
+ QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
+ factory,
+ new QueryableIndexSegment(
+ index,
+ QueryRunnerTestHelper.SEGMENT_ID
+ ),
+ query,
+ "mmapIndexvc"
+ )
+ )
+ );
+
+ return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
+ }
+
private Map<String, Object> makeContext()
{
return ImmutableMap.<String, Object>builder()
diff --git
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
index 8dd81a94ce9..b467bc6c938 100644
---
a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
@@ -299,11 +299,12 @@ public class UnnestStorageAdapterTest extends
InitializedNullHandlingTest
}
/*
each row has 8 entries.
- unnest 2 rows -> 16 entries also the value cardinality
+ unnest 2 rows -> 16 entries also the value cardinality, but null is not
present in the dictionary and so is
+ fabricated so cardinality is 17
unnest of unnest -> 16*8 = 128 rows
*/
Assert.assertEquals(count, 128);
- Assert.assertEquals(dimSelector.getValueCardinality(), 16);
+ Assert.assertEquals(dimSelector.getValueCardinality(), 17);
return null;
});
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]