This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 933db53658b fix bug with compacting segments when ordering contains
columns not present in dimensions list on any segments (#19096)
933db53658b is described below
commit 933db53658b33551fc2bee3c593eefce5867b639
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Mar 5 11:55:07 2026 -0800
fix bug with compacting segments when ordering contains columns not present
in dimensions list on any segments (#19096)
---
.../embedded/compact/CompactionSupervisorTest.java | 52 ++++++++++++++++++++++
.../druid/indexing/common/task/CompactionTask.java | 6 +--
2 files changed, 53 insertions(+), 5 deletions(-)
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
index 35273779997..fdfe1e1430b 100644
---
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
import org.apache.druid.catalog.guice.CatalogClientModule;
import org.apache.druid.catalog.guice.CatalogCoordinatorModule;
import org.apache.druid.common.utils.IdUtils;
+import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.indexer.CompactionEngine;
import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
@@ -75,6 +76,7 @@ import org.apache.druid.testing.embedded.EmbeddedOverlord;
import org.apache.druid.testing.embedded.EmbeddedRouter;
import org.apache.druid.testing.embedded.indexing.MoreResources;
import org.apache.druid.testing.embedded.junit5.EmbeddedClusterTestBase;
+import org.apache.druid.timeline.DataSegment;
import org.hamcrest.Matcher;
import org.hamcrest.Matchers;
import org.joda.time.DateTime;
@@ -553,6 +555,56 @@ public class CompactionSupervisorTest extends
EmbeddedClusterTestBase
);
}
+ @MethodSource("getEngine")
+ @ParameterizedTest(name = "compactionEngine={0}")
+ public void test_compaction_legacy_string_discovery_sparse_column(
+ CompactionEngine compactionEngine
+ )
+ {
+ // test for a bug encountered where ordering contained columns not in
dimensions list
+ configureCompaction(compactionEngine);
+ String jsonallnull =
+ """
+ {"timestamp": "2026-03-04T00:00:00", "string":[],
"another_string": "a"}
+ {"timestamp": "2026-03-04T00:00:00", "string":[],
"another_string": "b"}
+ """;
+
+ final TaskBuilder.Index task = TaskBuilder
+ .ofTypeIndex()
+ .dataSource(dataSource)
+ .jsonInputFormat()
+ .inlineInputSourceWithData(jsonallnull)
+ .isoTimestampColumn("timestamp")
+ .dataSchema(builder ->
builder.withDimensions(DimensionsSpec.builder().build()))
+ .segmentGranularity("DAY");
+
+ cluster.callApi().runTask(task.withId(IdUtils.getRandomId()), overlord);
+ cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator,
broker);
+
+ List<DataSegment> segments =
cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
+ Assertions.assertEquals(1, segments.size());
+ Assertions.assertEquals(1, segments.get(0).getDimensions().size());
+
+ // switch to year granularity to trigger compaction
+ InlineSchemaDataSourceCompactionConfig config =
+ InlineSchemaDataSourceCompactionConfig
+ .builder()
+ .forDataSource(dataSource)
+ .withSkipOffsetFromLatest(Period.seconds(0))
+ .withGranularitySpec(
+ new UserCompactionTaskGranularityConfig(Granularities.YEAR,
null, null)
+ )
+ .withTuningConfig(createTuningConfigWithPartitionsSpec(new
DynamicPartitionsSpec(null, null)))
+ .build();
+
+ runCompactionWithSpec(config);
+ waitForAllCompactionTasksToFinish();
+
+ segments = cluster.callApi().getVisibleUsedSegments(dataSource,
overlord).stream().toList();
+ Assertions.assertEquals(1, segments.size());
+ Assertions.assertEquals(1, segments.get(0).getDimensions().size());
+ }
+
private int getTotalRowCount()
{
String sql = StringUtils.format("SELECT COUNT(*) as cnt FROM \"%s\"",
dataSource);
diff --git
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 65bdee9a04a..5c431648169 100644
---
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -956,11 +956,7 @@ public class CompactionTask extends AbstractBatchIndexTask
implements PendingSeg
if (ColumnHolder.TIME_COLUMN_NAME.equals(dimName) &&
!includeTimeAsDimension) {
return null;
} else {
- return Preconditions.checkNotNull(
- dimensionSchemaMap.get(dimName),
- "Cannot find dimension[%s] from dimensionSchemaMap",
- dimName
- );
+ return dimensionSchemaMap.get(dimName);
}
})
.filter(Objects::nonNull)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]