This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 37e158c2c4a Frames: consider writing singly-valued column when input 
column hasMultipleValues is UNKNOWN. (#15300)
37e158c2c4a is described below

commit 37e158c2c4a94cdbccde1add3c864219228f92fa
Author: Gian Merlino <[email protected]>
AuthorDate: Wed Nov 1 22:05:53 2023 -0700

    Frames: consider writing singly-valued column when input column 
hasMultipleValues is UNKNOWN. (#15300)
    
    * Frames: consider writing singly-valued column when input column 
hasMultipleValues is UNKNOWN.
    
    Prior to this patch, columnar frames would always write multi-valued 
columns if
    the input column had hasMultipleValues = UNKNOWN. This had the effect of 
flipping
    UNKNOWN to TRUE when copying data into frames, which is problematic because 
TRUE
    causes expressions to assume that string inputs must be treated as arrays.
    
    We now avoid this by flipping UNKNOWN to FALSE if no multi-valuedness
    is encountered, and flipping it to TRUE if multi-valuedness is encountered.
    
    * Add regression test case.
---
 .../frame/write/columnar/FrameColumnWriters.java   |   2 +-
 .../write/columnar/StringFrameColumnWriter.java    |  46 ++++--
 .../segment/column/ColumnCapabilitiesImpl.java     |   7 +-
 .../apache/druid/frame/write/FrameWriterTest.java  | 168 ++++++++++++++++++++-
 .../druid/frame/write/FrameWriterTestData.java     |  40 ++++-
 .../druid/server/ClientQuerySegmentWalkerTest.java |  86 ++++++++++-
 6 files changed, 320 insertions(+), 29 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
 
b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
index 596fa77646f..8c5dbe75853 100644
--- 
a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
+++ 
b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java
@@ -130,7 +130,7 @@ public class FrameColumnWriters
     return new StringFrameColumnWriterImpl(
         selector,
         allocator,
-        capabilities == null || capabilities.hasMultipleValues().isMaybeTrue()
+        capabilities == null ? ColumnCapabilities.Capable.UNKNOWN : 
capabilities.hasMultipleValues()
     );
   }
 
diff --git 
a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
 
b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
index 536918ec59c..ec812d9654c 100644
--- 
a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
+++ 
b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java
@@ -28,6 +28,7 @@ import org.apache.druid.frame.write.FrameWriterUtils;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.segment.ColumnValueSelector;
 import org.apache.druid.segment.DimensionSelector;
+import org.apache.druid.segment.column.ColumnCapabilities;
 
 import javax.annotation.Nullable;
 import java.nio.ByteBuffer;
@@ -45,7 +46,8 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
 
   private final T selector;
   private final byte typeCode;
-  protected final boolean multiValue;
+  protected final ColumnCapabilities.Capable multiValue;
+  protected boolean encounteredMultiValueRow;
 
   /**
    * Row lengths: one int per row with the number of values contained by that 
row and all previous rows.
@@ -73,14 +75,14 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
       final T selector,
       final MemoryAllocator allocator,
       final byte typeCode,
-      final boolean multiValue
+      final ColumnCapabilities.Capable multiValue
   )
   {
     this.selector = selector;
     this.typeCode = typeCode;
     this.multiValue = multiValue;
 
-    if (multiValue) {
+    if (multiValue.isMaybeTrue()) {
       this.cumulativeRowLengths = AppendableMemory.create(allocator, 
INITIAL_ALLOCATION_SIZE);
     } else {
       this.cumulativeRowLengths = null;
@@ -107,7 +109,7 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
       return false;
     }
 
-    if (multiValue && !cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
+    if (multiValue.isMaybeTrue() && 
!cumulativeRowLengths.reserveAdditional(Integer.BYTES)) {
       return false;
     }
 
@@ -119,8 +121,16 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
       return false;
     }
 
+    if (utf8Count != 1) {
+      encounteredMultiValueRow = true;
+
+      if (multiValue.isFalse()) {
+        throw new ISE("Encountered unexpected multi-value row, size[%d]", 
utf8Count);
+      }
+    }
+
     // Enough space has been reserved to write what we need to write; let's 
start.
-    if (multiValue) {
+    if (multiValue.isMaybeTrue()) {
       final MemoryRange<WritableMemory> rowLengthsCursor = 
cumulativeRowLengths.cursor();
 
       if (utf8Data == null && typeCode == 
FrameColumnWriters.TYPE_STRING_ARRAY) {
@@ -189,7 +199,7 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
       throw new ISE("Cannot undo");
     }
 
-    if (multiValue) {
+    if (multiValue.isMaybeTrue()) {
       cumulativeRowLengths.rewindCursor(Integer.BYTES);
       cumulativeStringLengths.rewindCursor(Integer.BYTES * lastRowLength);
       lastCumulativeRowLength -= lastRowLength;
@@ -207,7 +217,7 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
   public long size()
   {
     return DATA_OFFSET
-           + (multiValue ? cumulativeRowLengths.size() : 0)
+           + (isWriteMultiValue() ? cumulativeRowLengths.size() : 0)
            + cumulativeStringLengths.size()
            + stringData.size();
   }
@@ -215,13 +225,14 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
   @Override
   public long writeTo(final WritableMemory memory, final long startPosition)
   {
+    final boolean writeMultiValue = isWriteMultiValue();
     long currentPosition = startPosition;
 
     memory.putByte(currentPosition, typeCode);
-    memory.putByte(currentPosition + 1, multiValue ? (byte) 1 : (byte) 0);
+    memory.putByte(currentPosition + 1, writeMultiValue ? (byte) 1 : (byte) 0);
     currentPosition += 2;
 
-    if (multiValue) {
+    if (writeMultiValue) {
       currentPosition += cumulativeRowLengths.writeTo(memory, currentPosition);
     }
 
@@ -234,7 +245,7 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
   @Override
   public void close()
   {
-    if (multiValue) {
+    if (multiValue.isMaybeTrue()) {
       cumulativeRowLengths.close();
     }
 
@@ -249,6 +260,15 @@ public abstract class StringFrameColumnWriter<T extends 
ColumnValueSelector> imp
   @Nullable
   public abstract List<ByteBuffer> getUtf8ByteBuffersFromSelector(T selector);
 
+  /**
+   * Whether, given the current state of the writer, a call to {@link 
#writeTo(WritableMemory, long)} at this point
+   * would generate a multi-value column.
+   */
+  private boolean isWriteMultiValue()
+  {
+    return multiValue.isTrue() || encounteredMultiValueRow;
+  }
+
   /**
    * Returns the sum of remaining bytes in the provided list of byte buffers.
    */
@@ -277,7 +297,7 @@ class StringFrameColumnWriterImpl extends 
StringFrameColumnWriter<DimensionSelec
   StringFrameColumnWriterImpl(
       DimensionSelector selector,
       MemoryAllocator allocator,
-      boolean multiValue
+      ColumnCapabilities.Capable multiValue
   )
   {
     super(selector, allocator, FrameColumnWriters.TYPE_STRING, multiValue);
@@ -286,7 +306,7 @@ class StringFrameColumnWriterImpl extends 
StringFrameColumnWriter<DimensionSelec
   @Override
   public List<ByteBuffer> getUtf8ByteBuffersFromSelector(final 
DimensionSelector selector)
   {
-    return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector, 
multiValue);
+    return FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector, 
multiValue.isMaybeTrue());
   }
 }
 
@@ -300,7 +320,7 @@ class StringArrayFrameColumnWriterImpl extends 
StringFrameColumnWriter<ColumnVal
       MemoryAllocator allocator
   )
   {
-    super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY, true);
+    super(selector, allocator, FrameColumnWriters.TYPE_STRING_ARRAY, 
ColumnCapabilities.Capable.TRUE);
   }
 
   @Override
diff --git 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
index f8464a0cf71..9baff99cfb2 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/column/ColumnCapabilitiesImpl.java
@@ -290,7 +290,12 @@ public class ColumnCapabilitiesImpl implements 
ColumnCapabilities
 
   public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues)
   {
-    this.hasMultipleValues = Capable.of(hasMultipleValues);
+    return setHasMultipleValues(Capable.of(hasMultipleValues));
+  }
+
+  public ColumnCapabilitiesImpl setHasMultipleValues(Capable hasMultipleValues)
+  {
+    this.hasMultipleValues = hasMultipleValues;
     return this;
   }
 
diff --git 
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java 
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
index 31b24825f95..16a1b667556 100644
--- a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
+++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java
@@ -45,9 +45,16 @@ import 
org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.guava.Sequences;
 import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
+import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.ColumnValueSelector;
+import org.apache.druid.segment.DimensionSelector;
 import org.apache.druid.segment.RowBasedSegment;
+import org.apache.druid.segment.RowIdSupplier;
 import org.apache.druid.segment.Segment;
 import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.column.ValueType;
@@ -55,10 +62,12 @@ import org.apache.druid.segment.serde.ComplexMetrics;
 import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.apache.druid.timeline.SegmentId;
 import org.hamcrest.CoreMatchers;
+import org.hamcrest.MatcherAssert;
 import org.junit.Assert;
 import org.junit.Assume;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -70,6 +79,7 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
 /**
@@ -87,6 +97,9 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
 
   private MemoryAllocator allocator;
 
+  @Nullable
+  private Consumer<ColumnCapabilitiesImpl> capabilitiesAdjustFn;
+
   public FrameWriterTest(
       @Nullable final FrameType inputFrameType,
       final FrameType outputFrameType,
@@ -130,14 +143,89 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
   }
 
   @Test
-  public void test_string()
+  public void test_string_multiValueTrue()
   {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
     testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
   }
 
   @Test
-  public void test_multiValueString()
+  public void test_string_multiValueFalse()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+    testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
+  }
+
+  @Test
+  public void test_string_multiValueUnknown()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
+    testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE);
+  }
+
+  @Test
+  public void test_singleValueWithEmpty_multiValueTrue()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
+    testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+  }
+
+  @Test
+  public void test_singleValueWithEmpty_multiValueFalse()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+
+    // When columnar frames are in multiValue = false mode, and when they see 
a dataset that is all single strings and
+    // empty arrays, they write a single-valued column, replacing the empty 
arrays with nulls.
+    final FrameWriterTestData.Dataset<?> expectedReadDataset =
+        outputFrameType == FrameType.COLUMNAR
+        ? FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE
+        : FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY;
+
+    testWithDataset(
+        FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY,
+        expectedReadDataset
+    );
+  }
+
+  @Test
+  public void test_singleValueWithEmpty_multiValueUnknown()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
+    testWithDataset(FrameWriterTestData.TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY);
+  }
+
+  @Test
+  public void test_multiValueString_multiValueTrue()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.TRUE);
+    testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+  }
+
+  @Test
+  public void test_multiValueString_multiValueFalse()
+  {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.FALSE);
+
+    if (outputFrameType == FrameType.COLUMNAR) {
+      final IllegalStateException e = Assert.assertThrows(
+          IllegalStateException.class,
+          () -> testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE)
+      );
+
+      MatcherAssert.assertThat(
+          e,
+          
ThrowableMessageMatcher.hasMessage(CoreMatchers.startsWith("Encountered 
unexpected multi-value row"))
+      );
+    } else {
+      testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
+    }
+  }
+
+  @Test
+  public void test_multiValueString_multiValueUnknown()
   {
+    capabilitiesAdjustFn = capabilities -> 
capabilities.setHasMultipleValues(ColumnCapabilities.Capable.UNKNOWN);
     testWithDataset(FrameWriterTestData.TEST_STRINGS_MULTI_VALUE);
   }
 
@@ -418,6 +506,7 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
         inputFrameType,
         outputFrameType,
         allocator,
+        capabilitiesAdjustFn,
         rows,
         signature,
         computeSortColumns(sortColumns)
@@ -453,6 +542,20 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
     verifyFrame(rows(dataset.getData(sortedness)), writeResult.lhs, signature);
   }
 
+  private <T1, T2> void testWithDataset(
+      final FrameWriterTestData.Dataset<T1> writeDataset,
+      final FrameWriterTestData.Dataset<T2> readDataset
+  )
+  {
+    final List<T1> data = writeDataset.getData(KeyOrder.NONE);
+    final RowSignature signature = RowSignature.builder().add("x", 
writeDataset.getType()).build();
+    final Sequence<List<Object>> rowSequence = rows(data);
+    final Pair<Frame, Integer> writeResult = writeFrame(rowSequence, 
signature, signature.getColumnNames());
+
+    Assert.assertEquals(data.size(), (int) writeResult.rhs);
+    verifyFrame(rows(readDataset.getData(sortedness)), writeResult.lhs, 
signature);
+  }
+
   /**
    * Writes as many rows to a single frame as possible. Returns the number of 
rows written.
    */
@@ -460,6 +563,7 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
       @Nullable final FrameType inputFrameType,
       final FrameType outputFrameType,
       final MemoryAllocator allocator,
+      @Nullable final Consumer<ColumnCapabilitiesImpl> capabilitiesAdjustFn,
       final Sequence<List<Object>> rows,
       final RowSignature signature,
       final List<KeyColumn> keyColumns
@@ -483,6 +587,7 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
           null,
           inputFrameType,
           HeapMemoryAllocator.unlimited(),
+          null,
           rows,
           signature,
           Collections.emptyList()
@@ -504,8 +609,17 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
                                  keyColumns
                              );
 
+                             ColumnSelectorFactory columnSelectorFactory = 
cursor.getColumnSelectorFactory();
+
+                             if (capabilitiesAdjustFn != null) {
+                               columnSelectorFactory = new 
OverrideCapabilitiesColumnSelectorFactory(
+                                   columnSelectorFactory,
+                                   capabilitiesAdjustFn
+                               );
+                             }
+
                              try (final FrameWriter frameWriter =
-                                      
frameWriterFactory.newFrameWriter(cursor.getColumnSelectorFactory())) {
+                                      
frameWriterFactory.newFrameWriter(columnSelectorFactory)) {
                                while (!cursor.isDone() && 
frameWriter.addSelection()) {
                                  numRows++;
                                  cursor.advance();
@@ -593,4 +707,52 @@ public class FrameWriterTest extends 
InitializedNullHandlingTest
 
     return Sequences.simple(retVal);
   }
+
+  private static class OverrideCapabilitiesColumnSelectorFactory implements 
ColumnSelectorFactory
+  {
+    private final ColumnSelectorFactory delegate;
+    private final Consumer<ColumnCapabilitiesImpl> fn;
+
+    public OverrideCapabilitiesColumnSelectorFactory(
+        final ColumnSelectorFactory delegate,
+        final Consumer<ColumnCapabilitiesImpl> fn
+    )
+    {
+      this.delegate = delegate;
+      this.fn = fn;
+    }
+
+    @Override
+    public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+    {
+      return delegate.makeDimensionSelector(dimensionSpec);
+    }
+
+    @Override
+    public ColumnValueSelector makeColumnValueSelector(String columnName)
+    {
+      return delegate.makeColumnValueSelector(columnName);
+    }
+
+    @Nullable
+    @Override
+    public ColumnCapabilities getColumnCapabilities(String column)
+    {
+      final ColumnCapabilities capabilities = 
delegate.getColumnCapabilities(column);
+      if (capabilities == null) {
+        return null;
+      } else {
+        final ColumnCapabilitiesImpl retVal = 
ColumnCapabilitiesImpl.copyOf(capabilities);
+        fn.accept(retVal);
+        return retVal;
+      }
+    }
+
+    @Nullable
+    @Override
+    public RowIdSupplier getRowIdSupplier()
+    {
+      return delegate.getRowIdSupplier();
+    }
+  }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
 
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
index a52c4d5efdd..72990e02086 100644
--- 
a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
+++ 
b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTestData.java
@@ -48,23 +48,47 @@ public class FrameWriterTestData
 {
   public static final Dataset<String> TEST_STRINGS_SINGLE_VALUE = new 
Dataset<>(
       ColumnType.STRING,
-      Stream.of(
+      Arrays.asList(
           null,
           NullHandling.emptyToNullIfNeeded(""), // Empty string in 
SQL-compatible mode, null otherwise
+          "brown",
+          "dog",
+          "fox",
+          "jumps",
+          "lazy",
+          "over",
+          "quick",
+          "the", // Repeated string
+          "the",
+          "thee", // To ensure "the" is before "thee"
           "\uD83D\uDE42",
-          "\uD83E\uDEE5",
           "\uD83E\uDD20",
-          "thee", // To ensure "the" is before "thee"
-          "the",
-          "quick",
+          "\uD83E\uDEE5"
+      )
+  );
+
+  /**
+   * Single-value strings, mostly, but with an empty list thrown in.
+   */
+  public static final Dataset<Object> TEST_STRINGS_SINGLE_VALUE_WITH_EMPTY = 
new Dataset<>(
+      ColumnType.STRING,
+      Arrays.asList(
+          Collections.emptyList(),
+          NullHandling.emptyToNullIfNeeded(""), // Empty string in 
SQL-compatible mode, null otherwise
           "brown",
+          "dog",
           "fox",
           "jumps",
+          "lazy",
           "over",
+          "quick",
           "the", // Repeated string
-          "lazy",
-          "dog"
-      ).sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList())
+          "the",
+          "thee", // To ensure "the" is before "thee"
+          "\uD83D\uDE42",
+          "\uD83E\uDD20",
+          "\uD83E\uDEE5"
+      )
   );
 
   /**
diff --git 
a/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
 
b/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
index fcdc894f61f..112275860e4 100644
--- 
a/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
+++ 
b/server/src/test/java/org/apache/druid/server/ClientQuerySegmentWalkerTest.java
@@ -59,6 +59,7 @@ import org.apache.druid.query.groupby.GroupByQuery;
 import org.apache.druid.query.groupby.GroupByQueryConfig;
 import org.apache.druid.query.groupby.GroupingEngine;
 import org.apache.druid.query.scan.ScanQuery;
+import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
 import org.apache.druid.query.timeseries.TimeseriesQuery;
 import org.apache.druid.query.topn.TopNQuery;
 import org.apache.druid.query.topn.TopNQueryBuilder;
@@ -81,6 +82,7 @@ import org.apache.druid.segment.join.Joinable;
 import org.apache.druid.segment.join.JoinableFactory;
 import org.apache.druid.segment.join.JoinableFactoryWrapper;
 import org.apache.druid.segment.join.MapJoinableFactory;
+import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 import org.apache.druid.server.initialization.ServerConfig;
 import org.apache.druid.server.scheduling.ManualQueryPrioritizationStrategy;
 import org.apache.druid.server.scheduling.NoQueryLaningStrategy;
@@ -215,7 +217,7 @@ public class ClientQuerySegmentWalkerTest
   private QueryRunnerFactoryConglomerate conglomerate;
 
   // Queries that are issued; checked by "testQuery" against its 
"expectedQueries" parameter.
-  private List<ExpectedQuery> issuedQueries = new ArrayList<>();
+  private final List<ExpectedQuery> issuedQueries = new ArrayList<>();
 
   // A ClientQuerySegmentWalker that has two segments: one for FOO and one for 
BAR; each with interval INTERVAL,
   // version VERSION, and shard spec SHARD_SPEC.
@@ -717,7 +719,6 @@ public class ClientQuerySegmentWalkerTest
 
     testQuery(
         query,
-        // GroupBy handles its own subqueries; only the inner one will go to 
the cluster.
         ImmutableList.of(
             
ExpectedQuery.cluster(subquery.withId(DUMMY_QUERY_ID).withSubQueryId("1.1")),
             ExpectedQuery.local(
@@ -805,6 +806,73 @@ public class ClientQuerySegmentWalkerTest
     testQuery(query, ImmutableList.of(), ImmutableList.of());
   }
 
+  @Test // Regression test for bug fixed in 
https://github.com/apache/druid/pull/15300
+  public void testScanOnScanWithStringExpression()
+  {
+    initWalker(
+        ImmutableMap.of(QueryContexts.MAX_SUBQUERY_ROWS_KEY, "1", 
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "1000"),
+        scheduler
+    );
+
+    final Query<?> subquery =
+        Druids.newScanQueryBuilder()
+              .dataSource(FOO)
+              .intervals(new 
MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY))
+              .columns("s")
+              .legacy(false)
+              
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+              .build()
+              .withId(DUMMY_QUERY_ID);
+
+    final Query<?> query =
+        Druids.newScanQueryBuilder()
+              .dataSource(new QueryDataSource(subquery))
+              .intervals(new 
MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY))
+              .virtualColumns(
+                  new ExpressionVirtualColumn(
+                      "v",
+                      "case_searched(s == 'x',2,3)",
+                      ColumnType.LONG,
+                      ExprMacroTable.nil()
+                  )
+              )
+              .columns("v")
+              .legacy(false)
+              
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+              .build()
+              .withId(DUMMY_QUERY_ID);
+
+    testQuery(
+        query,
+        ImmutableList.of(
+            
ExpectedQuery.cluster(subquery.withId(DUMMY_QUERY_ID).withSubQueryId("1.1")),
+            ExpectedQuery.local(
+                query.withDataSource(
+                    InlineDataSource.fromIterable(
+                        ImmutableList.of(
+                            new Object[]{"x"},
+                            new Object[]{"x"},
+                            new Object[]{"y"},
+                            new Object[]{"z"}
+                        ),
+                        RowSignature.builder().add("s", null).build()
+                    )
+                )
+            )
+        ),
+        ImmutableList.of(
+            new Object[]{2L},
+            new Object[]{2L},
+            new Object[]{3L},
+            new Object[]{3L}
+        )
+    );
+
+    Assert.assertEquals(2, scheduler.getTotalRun().get());
+    Assert.assertEquals(1, scheduler.getTotalPrioritizedAndLaned().get());
+    Assert.assertEquals(2, scheduler.getTotalAcquired().get());
+    Assert.assertEquals(2, scheduler.getTotalReleased().get());
+  }
 
   @Test
   public void testTimeseriesOnGroupByOnTableErrorTooLarge()
@@ -1500,7 +1568,19 @@ public class ClientQuerySegmentWalkerTest
       );
 
       if (modifiedQuery.getDataSource() instanceof FrameBasedInlineDataSource) 
{
-        // Do this recursively for if the query's datasource is a query 
datasource
+        // Do round-trip serialization in order to replace 
FrameBasedInlineDataSource with InlineDataSource, so
+        // comparisons work independently of whether we are using frames or 
regular inline datasets.
+        try {
+          modifiedQuery = modifiedQuery.withDataSource(
+              TestHelper.JSON_MAPPER.readValue(
+                  
TestHelper.JSON_MAPPER.writeValueAsBytes(modifiedQuery.getDataSource()),
+                  DataSource.class
+              )
+          );
+        }
+        catch (IOException e) {
+          throw new RuntimeException(e);
+        }
       }
 
       this.query = modifiedQuery;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to