This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch revert-18760-more-chill-msq-compaction
in repository https://gitbox.apache.org/repos/asf/druid.git

commit 30eda7bb901791cafd9073c3a4999c2752211322
Author: Clint Wylie <[email protected]>
AuthorDate: Tue Dec 2 15:25:26 2025 -0800

    Revert "compaction now only identifies multi-value dimensions for dimension 
s…"
    
    This reverts commit 9b6d18fe511cb7e53493b0f64c7c54759b82c364.
---
 .../druid/indexing/common/task/CompactionTask.java | 36 ++++++++++------------
 .../indexing/common/task/CompactionTaskTest.java   | 28 -----------------
 .../druid/data/input/impl/DimensionSchema.java     | 11 -------
 .../data/input/impl/NewSpatialDimensionSchema.java |  6 ----
 .../data/input/impl/StringDimensionSchema.java     |  6 ----
 5 files changed, 17 insertions(+), 70 deletions(-)

diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 7e26a82d1b6..ff5a97518be 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -84,6 +84,7 @@ import org.apache.druid.segment.QueryableIndex;
 import org.apache.druid.segment.Segment;
 import org.apache.druid.segment.column.ColumnCapabilities;
 import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.incremental.AppendableIndexSpec;
 import org.apache.druid.segment.indexing.CombinedDataSchema;
 import org.apache.druid.segment.indexing.DataSchema;
@@ -465,13 +466,13 @@ public class CompactionTask extends 
AbstractBatchIndexTask implements PendingSeg
    * Checks if multi-valued string dimensions need to be analyzed by 
downloading the segments.
    * This method returns true only for MSQ engine when either of the following 
holds true:
    * <ul>
-   * <li> Range partitioning is done on a possibly multi-valued string 
dimension or an unknown dimension
+   * <li> Range partitioning is done on a string dimension or an unknown 
dimension
    * (since MSQ does not support partitioning on a multi-valued string 
dimension) </li>
-   * <li> Rollup is done on a multi-valued string dimension or an unknown 
dimension
+   * <li> Rollup is done on a string dimension or an unknown dimension
    * (since MSQ requires multi-valued string dimensions to be converted to 
arrays for rollup) </li>
    * </ul>
-   * @return false for native engine, true for MSQ engine only when 
partitioning or rollup is done on a multi-valued
-   * string or unknown dimension.
+   * @return false for native engine, true for MSQ engine only when 
partitioning or rollup is done on a string
+   * or unknown dimension.
    */
   boolean identifyMultiValuedDimensions()
   {
@@ -480,31 +481,28 @@ public class CompactionTask extends 
AbstractBatchIndexTask implements PendingSeg
     }
     // Rollup can be true even when granularitySpec is not known since rollup 
is then decided based on segment analysis
     final boolean isPossiblyRollup = granularitySpec == null || 
!Boolean.FALSE.equals(granularitySpec.isRollup());
-    final DimensionRangePartitionsSpec rangeSpec;
-    if (tuningConfig != null && tuningConfig.getPartitionsSpec() instanceof 
DimensionRangePartitionsSpec) {
-      rangeSpec = (DimensionRangePartitionsSpec) 
tuningConfig.getPartitionsSpec();
-    } else {
-      rangeSpec = null;
-    }
-    final boolean isRangePartitioned = rangeSpec != null;
+    boolean isRangePartitioned = tuningConfig != null
+                                 && tuningConfig.getPartitionsSpec() 
instanceof DimensionRangePartitionsSpec;
 
     if (dimensionsSpec == null || dimensionsSpec.getDimensions().isEmpty()) {
       return isPossiblyRollup || isRangePartitioned;
     } else {
-      boolean isRollupOnMultiValueStringDimension = isPossiblyRollup &&
-                                                    
dimensionsSpec.getDimensions()
-                                                                  .stream()
-                                                                  
.anyMatch(DimensionSchema::canBeMultiValued);
+      boolean isRollupOnStringDimension = isPossiblyRollup &&
+                                          dimensionsSpec.getDimensions()
+                                                        .stream()
+                                                        .anyMatch(dim -> 
ColumnType.STRING.equals(dim.getColumnType()));
 
-      boolean isPartitionedOnMultiValueStringDimension =
+      boolean isPartitionedOnStringDimension =
           isRangePartitioned &&
           dimensionsSpec.getDimensions()
                         .stream()
                         .anyMatch(
-                            dim -> dim.canBeMultiValued()
-                                   && 
rangeSpec.getPartitionDimensions().contains(dim.getName())
+                            dim -> 
ColumnType.STRING.equals(dim.getColumnType())
+                                   && ((DimensionRangePartitionsSpec) 
tuningConfig.getPartitionsSpec())
+                                       .getPartitionDimensions()
+                                       .contains(dim.getName())
                         );
-      return isRollupOnMultiValueStringDimension || 
isPartitionedOnMultiValueStringDimension;
+      return isRollupOnStringDimension || isPartitionedOnStringDimension;
     }
   }
 
diff --git 
a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
 
b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
index a0992c24d0b..5d319c1013f 100644
--- 
a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
+++ 
b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java
@@ -97,7 +97,6 @@ import 
org.apache.druid.query.aggregation.firstlast.first.FloatFirstAggregatorFa
 import 
org.apache.druid.query.aggregation.firstlast.last.DoubleLastAggregatorFactory;
 import org.apache.druid.query.expression.TestExprMacroTable;
 import org.apache.druid.query.filter.SelectorDimFilter;
-import org.apache.druid.segment.AutoTypeColumnSchema;
 import org.apache.druid.segment.IndexIO;
 import org.apache.druid.segment.IndexMergerV9;
 import org.apache.druid.segment.IndexSpec;
@@ -1675,33 +1674,6 @@ public class CompactionTaskTest
     Assert.assertTrue(compactionTask.identifyMultiValuedDimensions());
   }
 
-  @Test
-  public void testMSQRangePartitionOnAutoStringDoesNotNeedMVDInfo()
-  {
-    final Builder builder = new Builder(
-        DATA_SOURCE,
-        segmentCacheManagerFactory
-    );
-    builder.inputSpec(new CompactionIntervalSpec(COMPACTION_INTERVAL, 
SegmentUtils.hashIds(SEGMENTS)));
-    builder.compactionRunner(new TestMSQCompactionRunner());
-
-    DimensionSchema stringDim = new AutoTypeColumnSchema("string_dim_1", 
ColumnType.STRING, null);
-    builder.tuningConfig(TuningConfigBuilder.forCompactionTask()
-                                            .withForceGuaranteedRollup(true)
-                                            .withPartitionsSpec(
-                                                new 
DimensionRangePartitionsSpec(
-                                                    3,
-                                                    null,
-                                                    
List.of(stringDim.getName()),
-                                                    false
-                                                )
-                                            )
-                                            .build());
-    builder.dimensionsSpec(new DimensionsSpec(ImmutableList.of(stringDim)));
-    CompactionTask compactionTask = builder.build();
-    Assert.assertFalse(compactionTask.identifyMultiValuedDimensions());
-  }
-
   @Test
   public void testChooseFinestGranularityWithNulls()
   {
diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java
index fc8e9e654bc..afc2a5feea1 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java
@@ -176,17 +176,6 @@ public abstract class DimensionSchema
   @JsonIgnore
   public abstract ColumnType getColumnType();
 
-  /**
-   * Returns true if the {@link DimensionHandler#makeIndexer()} of this schema 
can produce multi-valued
-   * {@link ColumnType#STRING} columns. This method is used by MSQ compaction 
to determine if it needs to download the
-   * segments to check if any are actually multi-valued.
-   */
-  @JsonIgnore
-  public boolean canBeMultiValued()
-  {
-    return false;
-  }
-
   @JsonIgnore
   public DimensionHandler getDimensionHandler()
   {
diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java
index 0de2f77bbc5..bd1fd8292ae 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java
@@ -69,12 +69,6 @@ public class NewSpatialDimensionSchema extends 
DimensionSchema
     return ColumnType.STRING;
   }
 
-  @Override
-  public boolean canBeMultiValued()
-  {
-    return true;
-  }
-
   @Override
   public DimensionHandler getDimensionHandler()
   {
diff --git 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
index bd5314c636b..2af2fbbaac2 100644
--- 
a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
+++ 
b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java
@@ -64,12 +64,6 @@ public class StringDimensionSchema extends DimensionSchema
     return ColumnType.STRING;
   }
 
-  @Override
-  public boolean canBeMultiValued()
-  {
-    return true;
-  }
-
   @Override
   public DimensionHandler getDimensionHandler()
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to