This is an automated email from the ASF dual-hosted git repository. cwylie pushed a commit to branch revert-18760-more-chill-msq-compaction in repository https://gitbox.apache.org/repos/asf/druid.git
commit 30eda7bb901791cafd9073c3a4999c2752211322 Author: Clint Wylie <[email protected]> AuthorDate: Tue Dec 2 15:25:26 2025 -0800 Revert "compaction now only identifies multi-value dimensions for dimension s…" This reverts commit 9b6d18fe511cb7e53493b0f64c7c54759b82c364. --- .../druid/indexing/common/task/CompactionTask.java | 36 ++++++++++------------ .../indexing/common/task/CompactionTaskTest.java | 28 ----------------- .../druid/data/input/impl/DimensionSchema.java | 11 ------- .../data/input/impl/NewSpatialDimensionSchema.java | 6 ---- .../data/input/impl/StringDimensionSchema.java | 6 ---- 5 files changed, 17 insertions(+), 70 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index 7e26a82d1b6..ff5a97518be 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -84,6 +84,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.indexing.CombinedDataSchema; import org.apache.druid.segment.indexing.DataSchema; @@ -465,13 +466,13 @@ public class CompactionTask extends AbstractBatchIndexTask implements PendingSeg * Checks if multi-valued string dimensions need to be analyzed by downloading the segments. * This method returns true only for MSQ engine when either of the following holds true: * <ul> - * <li> Range partitioning is done on a possibly multi-valued string dimension or an unknown dimension + * <li> Range partitioning is done on a string dimension or an unknown dimension * (since MSQ does not support partitioning on a multi-valued string dimension) </li> - * <li> Rollup is done on a multi-valued string dimension or an unknown dimension + * <li> Rollup is done on a string dimension or an unknown dimension * (since MSQ requires multi-valued string dimensions to be converted to arrays for rollup) </li> * </ul> - * @return false for native engine, true for MSQ engine only when partitioning or rollup is done on a multi-valued - * string or unknown dimension. + * @return false for native engine, true for MSQ engine only when partitioning or rollup is done on a string + * or unknown dimension. */ boolean identifyMultiValuedDimensions() { @@ -480,31 +481,28 @@ public class CompactionTask extends AbstractBatchIndexTask implements PendingSeg } // Rollup can be true even when granularitySpec is not known since rollup is then decided based on segment analysis final boolean isPossiblyRollup = granularitySpec == null || !Boolean.FALSE.equals(granularitySpec.isRollup()); - final DimensionRangePartitionsSpec rangeSpec; - if (tuningConfig != null && tuningConfig.getPartitionsSpec() instanceof DimensionRangePartitionsSpec) { - rangeSpec = (DimensionRangePartitionsSpec) tuningConfig.getPartitionsSpec(); - } else { - rangeSpec = null; - } - final boolean isRangePartitioned = rangeSpec != null; + boolean isRangePartitioned = tuningConfig != null + && tuningConfig.getPartitionsSpec() instanceof DimensionRangePartitionsSpec; if (dimensionsSpec == null || dimensionsSpec.getDimensions().isEmpty()) { return isPossiblyRollup || isRangePartitioned; } else { - boolean isRollupOnMultiValueStringDimension = isPossiblyRollup && - dimensionsSpec.getDimensions() - .stream() - .anyMatch(DimensionSchema::canBeMultiValued); + boolean isRollupOnStringDimension = isPossiblyRollup && + dimensionsSpec.getDimensions() + .stream() + .anyMatch(dim -> ColumnType.STRING.equals(dim.getColumnType())); - boolean isPartitionedOnMultiValueStringDimension = + boolean isPartitionedOnStringDimension = isRangePartitioned && dimensionsSpec.getDimensions() .stream() .anyMatch( - dim -> dim.canBeMultiValued() - && rangeSpec.getPartitionDimensions().contains(dim.getName()) + dim -> ColumnType.STRING.equals(dim.getColumnType()) + && ((DimensionRangePartitionsSpec) tuningConfig.getPartitionsSpec()) + .getPartitionDimensions() + .contains(dim.getName()) ); - return isRollupOnMultiValueStringDimension || isPartitionedOnMultiValueStringDimension; + return isRollupOnStringDimension || isPartitionedOnStringDimension; } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index a0992c24d0b..5d319c1013f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -97,7 +97,6 @@ import org.apache.druid.query.aggregation.firstlast.first.FloatFirstAggregatorFa import org.apache.druid.query.aggregation.firstlast.last.DoubleLastAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.SelectorDimFilter; -import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; @@ -1675,33 +1674,6 @@ public class CompactionTaskTest Assert.assertTrue(compactionTask.identifyMultiValuedDimensions()); } - @Test - public void testMSQRangePartitionOnAutoStringDoesNotNeedMVDInfo() - { - final Builder builder = new Builder( - DATA_SOURCE, - segmentCacheManagerFactory - ); - builder.inputSpec(new CompactionIntervalSpec(COMPACTION_INTERVAL, SegmentUtils.hashIds(SEGMENTS))); - builder.compactionRunner(new TestMSQCompactionRunner()); - - DimensionSchema stringDim = new AutoTypeColumnSchema("string_dim_1", ColumnType.STRING, null); - builder.tuningConfig(TuningConfigBuilder.forCompactionTask() - .withForceGuaranteedRollup(true) - .withPartitionsSpec( - new DimensionRangePartitionsSpec( - 3, - null, - List.of(stringDim.getName()), - false - ) - ) - .build()); - builder.dimensionsSpec(new DimensionsSpec(ImmutableList.of(stringDim))); - CompactionTask compactionTask = builder.build(); - Assert.assertFalse(compactionTask.identifyMultiValuedDimensions()); - } - @Test public void testChooseFinestGranularityWithNulls() { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java index fc8e9e654bc..afc2a5feea1 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/DimensionSchema.java @@ -176,17 +176,6 @@ public abstract class DimensionSchema @JsonIgnore public abstract ColumnType getColumnType(); - /** - * Returns true if the {@link DimensionHandler#makeIndexer()} of this schema can produce multi-valued - * {@link ColumnType#STRING} columns. This method is used by MSQ compaction to determine if it needs to download the - * segments to check if any are actually multi-valued. - */ - @JsonIgnore - public boolean canBeMultiValued() - { - return false; - } - @JsonIgnore public DimensionHandler getDimensionHandler() { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java index 0de2f77bbc5..bd1fd8292ae 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/NewSpatialDimensionSchema.java @@ -69,12 +69,6 @@ public class NewSpatialDimensionSchema extends DimensionSchema return ColumnType.STRING; } - @Override - public boolean canBeMultiValued() - { - return true; - } - @Override public DimensionHandler getDimensionHandler() { diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index bd5314c636b..2af2fbbaac2 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -64,12 +64,6 @@ public class StringDimensionSchema extends DimensionSchema return ColumnType.STRING; } - @Override - public boolean canBeMultiValued() - { - return true; - } - @Override public DimensionHandler getDimensionHandler() { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
