This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 37e158c2c4a Frames: consider writing singly-valued column when input
column hasMultipleValues is UNKNOWN. (#15300)
37e158c2c4a is described below
commit 37e158c2c4a94cdbccde1add3c864219228f92fa
Author: Gian Merlino <[email protected]>
AuthorDate: Wed Nov 1 22:05:53 2023 -0700
Frames: consider writing singly-valued column when input column
hasMultipleValues is UNKNOWN. (#15300)
* Frames: consider writing singly-valued column when input column
hasMultipleValues is UNKNOWN.
Prior to this patch, columnar frames would always write multi-valued
columns if
the input column had hasMultipleValues = UNKNOWN. This had the effect of
flipping
UNKNOWN to TRUE when copying data into frames, which is problematic because
TRUE
causes expressions to assume that string inputs must be treated as arrays.
We now avoid this by flipping UNKNOWN to FALSE if no multi-valuedness
is encountered, and flipping it to TRUE if multi-valuedness is encountered.
* Add regression test case.
---
.../frame/write/columnar/FrameColumnWriters.java | 2 +-
.../write/columnar/StringFrameColumnWriter.java | 46 ++++--
.../segment/column/ColumnCapabilitiesImpl.java | 7 +-
.../apache/druid/frame/write/FrameWriterTest.java | 168 ++++++++++++++++++++-
.../druid/frame/write/FrameWriterTestData.java | 40 ++++-
.../druid/server/ClientQuerySegmentWalkerTest.java | 86 ++++++++++-
6 files changed, 320 insertions(+), 29 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
index 596fa77646f..8c5dbe75853 100644
---
a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
+++
b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
@@ -130,7 +130,7 @@ public class FrameColumnWriters
return new StringFrameColumnWriterImpl(
selector,
allocator,
- capabilities == null || capabilities.hasMultipleValues().isMaybeTrue()
+ capabilities == null ? ColumnCapabilities.Capable.UNKNOWN :
capabilities.hasMultipleValues()
);
}
diff --git
a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
index 536918ec59c..ec812d9654c 100644
---
a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
+++
b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
@@ -28,6 +28,7 @@ import org.apache.druid.frame.write.FrameWriterUtils;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
@@ -45,7 +46,8 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
private final T selector;
private final byte typeCode;
- protected final boolean multiValue;
+ protected final ColumnCapabilities.Capable multiValue;
+ protected boolean encounteredMultiValueRow;
/**
* Row lengths: one int per row with the number of values contained by that
row and all previous rows.
@@ -73,14 +75,14 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
final T selector,
final MemoryAllocator allocator,
final byte typeCode,
- final boolean multiValue
+ final ColumnCapabilities.Capable multiValue
)
{
this.selector = selector;
this.typeCode = typeCode;
this.multiValue = multiValue;
- if (multiValue) {
+ if (multiValue.isMaybeTrue()) {
this.cumulativeRowLengths = AppendableMemory.create(allocator,
INITIAL_ALLOCATION_SIZE);
} else {
this.cumulativeRowLengths = null;
@@ -107,7 +109,7 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
return false;
}
- if (multiValue && !cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
+ if (multiValue.isMaybeTrue() &&
!cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
return false;
}
@@ -119,8 +121,16 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
return false;
}
+ if (utf8Count != 1) {
+ encounteredMultiValueRow = true;
+
+ if (multiValue.isFalse()) {
+ throw new ISE("Encountered unexpected multi-value row, size[%d]",
utf8Count);
+ }
+ }
+
// Enough space has been reserved to write what we need to write; let's
start.
- if (multiValue) {
+ if (multiValue.isMaybeTrue()) {
final MemoryRange<WritableMemory> rowLengthsCursor =
cumulativeRowLengths.cursor();
if (utf8Data == null && typeCode ==
FrameColumnWriters.TYPE_STRING_ARRAY) {
@@ -189,7 +199,7 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
throw new ISE("Cannot undo");
}
- if (multiValue) {
+ if (multiValue.isMaybeTrue()) {
cumulativeRowLengths.rewindCursor(Integer.BYTES);
cumulativeStringLengths.rewindCursor(Integer.BYTES * lastRowLength);
lastCumulativeRowLength -= lastRowLength;
@@ -207,7 +217,7 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
public long size()
{
return DATA_OFFSET
- + (multiValue ? cumulativeRowLengths.size() : 0)
+ + (isWriteMultiValue() ? cumulativeRowLengths.size() : 0)
+ cumulativeStringLengths.size()
+ stringData.size();
}
@@ -215,13 +225,14 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
@Override
public long writeTo(final WritableMemory memory, final long startPosition)
{
+ final boolean writeMultiValue = isWriteMultiValue();
long currentPosition = startPosition;
memory.putByte(currentPosition, typeCode);
- memory.putByte(currentPosition + 1, multiValue ? (byte) 1 : (byte) 0);
+ memory.putByte(currentPosition + 1, writeMultiValue ? (byte) 1 : (byte) 0);
currentPosition += 2;
- if (multiValue) {
+ if (writeMultiValue) {
currentPosition += cumulativeRowLengths.writeTo(memory, currentPosition);
}
@@ -234,7 +245,7 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
@Override
public void close()
{
- if (multiValue) {
+ if (multiValue.isMaybeTrue()) {
cumulativeRowLengths.close();
}
@@ -249,6 +260,15 @@ public abstract class StringFrameColumnWriter<T extends
ColumnValueSelector> imp
@Nullable
public abstract List<ByteBuffer> getUtf8ByteBuffersFromSelector(T selector);
+ /**
+ * Whether, given the current state of the writer, a call to {@link
#writeTo(WritableMemory, long)} at this point
+ * would generate a multi-value column.
+ */
+ private boolean isWriteMultiValue()
+ {
+ return multiValue.isTrue() || encounteredMultiValueRow;
+ }
+
/**
* Returns the sum of remaining bytes in the provided list of byte buffers.
*/
@@ -277,7 +297,7 @@ class StringFrameColumnWriterImpl extends
StringFrameColumnWriter<DimensionSelec
StringFrameColumnWriterImpl(
DimensionSelector selector,
MemoryAllocator allocator,
- boolean multiValue
+ ColumnCapabilities.Capable multiValue
)
{
super(selector, allocator, FrameColumnWriters.TYPE_STRING, multiValue);
@@ -286,7 +306,7 @@ class StringFrameColumnWriterImpl extends
StringFrameColumnWriter<DimensionSelec
@Override
public List<ByteBuffer> getUtf8ByteBuffersFromSelector(final
DimensionSelector selector)
{
- return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector,
multiValue);
+ return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector,
multiValue.isMaybeTrue());
}
}
@@ -300,7 +320,7 @@ class StringArrayFrameColumnWriterImpl extends
StringFrameColumnWriter<ColumnVal
MemoryAllocator allocator
)
{
- super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY, true);
+ super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY,
ColumnCapabilities.Capable.TRUE);
}
@Override
diff --git
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
index f8464a0cf71..9baff99cfb2 100644
---
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
+++
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
@@ -290,7 +290,12 @@ public class ColumnCapabilitiesImpl implements
ColumnCapabilities
public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues)
{
- this.hasMultipleValues = Capable.of(hasMultipleValues);
+ return setHasMultipleValues(Capable.of(hasMultipleValues));
+ }
+
+ public ColumnCapabilitiesImpl setHasMultipleValues(Capable hasMultipleValues)
+ {
+ this.hasMultipleValues = hasMultipleValues;
return this;
}
diff --git
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
index 31b24825f95..16a1b667556 100644
--- a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
+++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
@@ -45,9 +45,16 @@ import
org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.RowBasedSegment;
+import org.apache.druid.segment.RowIdSupplier;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType;
@@ -55,10 +62,12 @@ import org.apache.druid.segment.serde.ComplexMetrics;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.SegmentId;
import org.hamcrest.CoreMatchers;
+import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -70,6 +79,7 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
+import java.util.function.Consumer;
import java.util.stream.Collectors;
/**
@@ -87,6 +97,9 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
private MemoryAllocator allocator;
+ @Nullable
+ private Consumer<ColumnCapabilitiesImpl> capabilitiesAdjustFn;
+
public FrameWriterTest(
@Nullable final FrameType inputFrameType,
final FrameType outputFrameType,
@@ -130,14 +143,89 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
}
@Test
- public void test_string()
+ public void test_string_multiValueTrue()
{
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
}
@Test
- public void test_multiValueString()
+ public void test_string_multiValueFalse()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
+ }
+
+ @Test
+ public void test_string_multiValueUnknown()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
+ }
+
+ @Test
+ public void test_singleValueWithEmpty_multiValueTrue()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+ }
+
+ @Test
+ public void test_singleValueWithEmpty_multiValueFalse()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+
+ // When columnar frames are in multiValue = false mode, and when they see
a dataset that is all single strings and
+ // empty arrays, they write a single-valued column, replacing the empty
arrays with nulls.
+ final FrameWriterTestData.Dataset<?> expectedReadDataset =
+ outputFrameType == FrameType.COLUMNAR
+ ? FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE
+ : FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY;
+
+ testWithDataset(
+ FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY,
+ expectedReadDataset
+ );
+ }
+
+ @Test
+ public void test_singleValueWithEmpty_multiValueUnknown()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY);
+ }
+
+ @Test
+ public void test_multiValueString_multiValueTrue()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+ }
+
+ @Test
+ public void test_multiValueString_multiValueFalse()
+ {
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+
+ if (outputFrameType == FrameType.COLUMNAR) {
+ final IllegalStateException e = Assert.assertThrows(
+ IllegalStateException.class,
+ () -> testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE)
+ );
+
+ MatcherAssert.assertThat(
+ e,
+
ThrowableMessageMatcher.hasMessage(CoreMatchers.startsWith("Encountered
unexpected multi-value row"))
+ );
+ } else {
+ testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+ }
+ }
+
+ @Test
+ public void test_multiValueString_multiValueUnknown()
{
+ capabilitiesAdjustFn = capabilities ->
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
}
@@ -418,6 +506,7 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
inputFrameType,
outputFrameType,
allocator,
+ capabilitiesAdjustFn,
rows,
signature,
computeSortColumns(sortColumns)
@@ -453,6 +542,20 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
verifyFrame(rows(dataset.getData(sortedness)), writeResult.lhs, signature);
}
+ private <T1, T2> void testWithDataset(
+ final FrameWriterTestData.Dataset<T1> writeDataset,
+ final FrameWriterTestData.Dataset<T2> readDataset
+ )
+ {
+ final List<T1> data = writeDataset.getData(KeyOrder.NONE);
+ final RowSignature signature = RowSignature.builder().add("x",
writeDataset.getType()).build();
+ final Sequence<List<Object>> rowSequence = rows(data);
+ final Pair<Frame, Integer> writeResult = writeFrame(rowSequence,
signature, signature.getColumnNames());
+
+ Assert.assertEquals(data.size(), (int) writeResult.rhs);
+ verifyFrame(rows(readDataset.getData(sortedness)), writeResult.lhs,
signature);
+ }
+
/**
* Writes as many rows to a single frame as possible. Returns the number of
rows written.
*/
@@ -460,6 +563,7 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
@Nullable final FrameType inputFrameType,
final FrameType outputFrameType,
final MemoryAllocator allocator,
+ @Nullable final Consumer<ColumnCapabilitiesImpl> capabilitiesAdjustFn,
final Sequence<List<Object>> rows,
final RowSignature signature,
final List<KeyColumn> keyColumns
@@ -483,6 +587,7 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
null,
inputFrameType,
HeapMemoryAllocator.unlimited(),
+ null,
rows,
signature,
Collections.emptyList()
@@ -504,8 +609,17 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
keyColumns
);
+ ColumnSelectorFactory columnSelectorFactory =
cursor.getColumnSelectorFactory();
+
+ if (capabilitiesAdjustFn != null) {
+ columnSelectorFactory = new
OverrideCapabilitiesColumnSelectorFactory(
+ columnSelectorFactory,
+ capabilitiesAdjustFn
+ );
+ }
+
try (final FrameWriter frameWriter =
-
frameWriterFactory.newFrameWriter(cursor.getColumnSelectorFactory())) {
+
frameWriterFactory.newFrameWriter(columnSelectorFactory)) {
while (!cursor.isDone() &&
frameWriter.addSelection()) {
numRows++;
cursor.advance();
@@ -593,4 +707,52 @@ public class FrameWriterTest extends
InitializedNullHandlingTest
return Sequences.simple(retVal);
}
+
+ private static class OverrideCapabilitiesColumnSelectorFactory implements
ColumnSelectorFactory
+ {
+ private final ColumnSelectorFactory delegate;
+ private final Consumer<ColumnCapabilitiesImpl> fn;
+
+ public OverrideCapabilitiesColumnSelectorFactory(
+ final ColumnSelectorFactory delegate,
+ final Consumer<ColumnCapabilitiesImpl> fn
+ )
+ {
+ this.delegate = delegate;
+ this.fn = fn;
+ }
+
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ return delegate.makeDimensionSelector(dimensionSpec);
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ return delegate.makeColumnValueSelector(columnName);
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ final ColumnCapabilities capabilities =
delegate.getColumnCapabilities(column);
+ if (capabilities == null) {
+ return null;
+ } else {
+ final ColumnCapabilitiesImpl retVal =
ColumnCapabilitiesImpl.copyOf(capabilities);
+ fn.accept(retVal);
+ return retVal;
+ }
+ }
+
+ @Nullable
+ @Override
+ public RowIdSupplier getRowIdSupplier()
+ {
+ return delegate.getRowIdSupplier();
+ }
+ }
}
diff --git
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
index a52c4d5efdd..72990e02086 100644
---
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
+++
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
@@ -48,23 +48,47 @@ public class FrameWriterTestData
{
public static final Dataset<String> TEST_STRINGS_SINGLE_VALUE = new
Dataset<>(
ColumnType.STRING,
- Stream.of(
+ Arrays.asList(
null,
NullHandling.emptyToNullIfNeeded(""), // Empty string in
SQL-compatible mode, null otherwise
+ "brown",
+ "dog",
+ "fox",
+ "jumps",
+ "lazy",
+ "over",
+ "quick",
+ "the", // Repeated string
+ "the",
+ "thee", // To ensure "the" is before "thee"
"\uD83D\uDE42",
- "\uD83E\uDEE5",
"\uD83E\uDD20",
- "thee", // To ensure "the" is before "thee"
- "the",
- "quick",
+ "\uD83E\uDEE5"
+ )
+ );
+
+ /**
+ * Single-value strings, mostly, but with an empty list thrown in.
+ */
+ public static final Dataset<Object> TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY =
new Dataset<>(
+ ColumnType.STRING,
+ Arrays.asList(
+ Collections.emptyList(),
+ NullHandling.emptyToNullIfNeeded(""), // Empty string in
SQL-compatible mode, null otherwise
"brown",
+ "dog",
"fox",
"jumps",
+ "lazy",
"over",
+ "quick",
"the", // Repeated string
- "lazy",
- "dog"
- ).sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList())
+ "the",
+ "thee", // To ensure "the" is before "thee"
+ "\uD83D\uDE42",
+ "\uD83E\uDD20",
+ "\uD83E\uDEE5"
+ )
);
/**
diff --git
a/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
b/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
index fcdc894f61f..112275860e4 100644
---
a/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
+++
b/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
@@ -59,6 +59,7 @@ import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupingEngine;
import org.apache.druid.query.scan.ScanQuery;
+import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.query.topn.TopNQuery;
import org.apache.druid.query.topn.TopNQueryBuilder;
@@ -81,6 +82,7 @@ import org.apache.druid.segment.join.Joinable;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.join.MapJoinableFactory;
+import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.server.initialization.ServerConfig;
import org.apache.druid.server.scheduling.ManualQueryPrioritizationStrategy;
import org.apache.druid.server.scheduling.NoQueryLaningStrategy;
@@ -215,7 +217,7 @@ public class ClientQuerySegmentWalkerTest
private QueryRunnerFactoryConglomerate conglomerate;
// Queries that are issued; checked by "testQuery" against its
"expectedQueries" parameter.
- private List<ExpectedQuery> issuedQueries = new ArrayList<>();
+ private final List<ExpectedQuery> issuedQueries = new ArrayList<>();
// A ClientQuerySegmentWalker that has two segments: one for FOO and one for
BAR; each with interval INTERVAL,
// version VERSION, and shard spec SHARD_SPEC.
@@ -717,7 +719,6 @@ public class ClientQuerySegmentWalkerTest
testQuery(
query,
- // GroupBy handles its own subqueries; only the inner one will go to
the cluster.
ImmutableList.of(
ExpectedQuery.cluster(subquery.withId(DUMMY_QUERY_ID).withSubQueryId("1.1")),
ExpectedQuery.local(
@@ -805,6 +806,73 @@ public class ClientQuerySegmentWalkerTest
testQuery(query, ImmutableList.of(), ImmutableList.of());
}
+ @Test // Regression test for bug fixed in
https://github.com/apache/druid/pull/15300
+ public void testScanOnScanWithStringExpression()
+ {
+ initWalker(
+ ImmutableMap.of(QueryContexts.MAX_SUBQUERY_ROWS_KEY, "1",
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "1000"),
+ scheduler
+ );
+
+ final Query<?> subquery =
+ Druids.newScanQueryBuilder()
+ .dataSource(FOO)
+ .intervals(new
MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY))
+ .columns("s")
+ .legacy(false)
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .build()
+ .withId(DUMMY_QUERY_ID);
+
+ final Query<?> query =
+ Druids.newScanQueryBuilder()
+ .dataSource(new QueryDataSource(subquery))
+ .intervals(new
MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY))
+ .virtualColumns(
+ new ExpressionVirtualColumn(
+ "v",
+ "case_searched(s == 'x',2,3)",
+ ColumnType.LONG,
+ ExprMacroTable.nil()
+ )
+ )
+ .columns("v")
+ .legacy(false)
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .build()
+ .withId(DUMMY_QUERY_ID);
+
+ testQuery(
+ query,
+ ImmutableList.of(
+
ExpectedQuery.cluster(subquery.withId(DUMMY_QUERY_ID).withSubQueryId("1.1")),
+ ExpectedQuery.local(
+ query.withDataSource(
+ InlineDataSource.fromIterable(
+ ImmutableList.of(
+ new Object[]{"x"},
+ new Object[]{"x"},
+ new Object[]{"y"},
+ new Object[]{"z"}
+ ),
+ RowSignature.builder().add("s", null).build()
+ )
+ )
+ )
+ ),
+ ImmutableList.of(
+ new Object[]{2L},
+ new Object[]{2L},
+ new Object[]{3L},
+ new Object[]{3L}
+ )
+ );
+
+ Assert.assertEquals(2, scheduler.getTotalRun().get());
+ Assert.assertEquals(1, scheduler.getTotalPrioritizedAndLaned().get());
+ Assert.assertEquals(2, scheduler.getTotalAcquired().get());
+ Assert.assertEquals(2, scheduler.getTotalReleased().get());
+ }
@Test
public void testTimeseriesOnGroupByOnTableErrorTooLarge()
@@ -1500,7 +1568,19 @@ public class ClientQuerySegmentWalkerTest
);
if (modifiedQuery.getDataSource() instanceof FrameBasedInlineDataSource)
{
- // Do this recursively for if the query's datasource is a query
datasource
+ // Do round-trip serialization in order to replace
FrameBasedInlineDataSource with InlineDataSource, so
+ // comparisons work independently of whether we are using frames or
regular inline datasets.
+ try {
+ modifiedQuery = modifiedQuery.withDataSource(
+ TestHelper.JSON_MAPPER.readValue(
+
TestHelper.JSON_MAPPER.writeValueAsBytes(modifiedQuery.getDataSource()),
+ DataSource.class
+ )
+ );
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
}
this.query = modifiedQuery;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]