This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 933db53658b fix bug with compacting segments when ordering contains 
columns not present in dimensions list on any segments (#19096)
933db53658b is described below

commit 933db53658b33551fc2bee3c593eefce5867b639
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Mar 5 11:55:07 2026 -0800

    fix bug with compacting segments when ordering contains columns not present 
in dimensions list on any segments (#19096)
---
 .../embedded/compact/CompactionSupervisorTest.java | 52 ++++++++++++++++++++++
 .../druid/indexing/common/task/CompactionTask.java |  6 +--
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git 
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
 
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
index 35273779997..fdfe1e1430b 100644
--- 
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
+++ 
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/compact/CompactionSupervisorTest.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import org.apache.druid.catalog.guice.CatalogClientModule;
 import org.apache.druid.catalog.guice.CatalogCoordinatorModule;
 import org.apache.druid.common.utils.IdUtils;
+import org.apache.druid.data.input.impl.DimensionsSpec;
 import org.apache.druid.indexer.CompactionEngine;
 import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
@@ -75,6 +76,7 @@ import org.apache.druid.testing.embedded.EmbeddedOverlord;
 import org.apache.druid.testing.embedded.EmbeddedRouter;
 import org.apache.druid.testing.embedded.indexing.MoreResources;
 import org.apache.druid.testing.embedded.junit5.EmbeddedClusterTestBase;
+import org.apache.druid.timeline.DataSegment;
 import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
 import org.joda.time.DateTime;
@@ -553,6 +555,56 @@ public class CompactionSupervisorTest extends 
EmbeddedClusterTestBase
     );
   }
 
+  @MethodSource("getEngine")
+  @ParameterizedTest(name = "compactionEngine={0}")
+  public void test_compaction_legacy_string_discovery_sparse_column(
+      CompactionEngine compactionEngine
+  )
+  {
+    // test for a bug encountered where ordering contained columns not in 
dimensions list
+    configureCompaction(compactionEngine);
+    String jsonallnull =
+        """
+            {"timestamp": "2026-03-04T00:00:00", "string":[], 
"another_string": "a"}
+            {"timestamp": "2026-03-04T00:00:00", "string":[], 
"another_string": "b"}
+            """;
+
+    final TaskBuilder.Index task = TaskBuilder
+        .ofTypeIndex()
+        .dataSource(dataSource)
+        .jsonInputFormat()
+        .inlineInputSourceWithData(jsonallnull)
+        .isoTimestampColumn("timestamp")
+        .dataSchema(builder -> 
builder.withDimensions(DimensionsSpec.builder().build()))
+        .segmentGranularity("DAY");
+
+    cluster.callApi().runTask(task.withId(IdUtils.getRandomId()), overlord);
+    cluster.callApi().waitForAllSegmentsToBeAvailable(dataSource, coordinator, 
broker);
+
+    List<DataSegment> segments = 
cluster.callApi().getVisibleUsedSegments(dataSource, 
overlord).stream().toList();
+    Assertions.assertEquals(1, segments.size());
+    Assertions.assertEquals(1, segments.get(0).getDimensions().size());
+
+    // switch to year granularity to trigger compaction
+    InlineSchemaDataSourceCompactionConfig config =
+        InlineSchemaDataSourceCompactionConfig
+            .builder()
+            .forDataSource(dataSource)
+            .withSkipOffsetFromLatest(Period.seconds(0))
+            .withGranularitySpec(
+                new UserCompactionTaskGranularityConfig(Granularities.YEAR, 
null, null)
+            )
+            .withTuningConfig(createTuningConfigWithPartitionsSpec(new 
DynamicPartitionsSpec(null, null)))
+            .build();
+
+    runCompactionWithSpec(config);
+    waitForAllCompactionTasksToFinish();
+
+    segments = cluster.callApi().getVisibleUsedSegments(dataSource, 
overlord).stream().toList();
+    Assertions.assertEquals(1, segments.size());
+    Assertions.assertEquals(1, segments.get(0).getDimensions().size());
+  }
+
   private int getTotalRowCount()
   {
     String sql = StringUtils.format("SELECT COUNT(*) as cnt FROM \"%s\"", 
dataSource);
diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 65bdee9a04a..5c431648169 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -956,11 +956,7 @@ public class CompactionTask extends AbstractBatchIndexTask 
implements PendingSeg
                      if (ColumnHolder.TIME_COLUMN_NAME.equals(dimName) && 
!includeTimeAsDimension) {
                        return null;
                      } else {
-                       return Preconditions.checkNotNull(
-                           dimensionSchemaMap.get(dimName),
-                           "Cannot find dimension[%s] from dimensionSchemaMap",
-                           dimName
-                       );
+                       return dimensionSchemaMap.get(dimName);
                      }
                    })
                    .filter(Objects::nonNull)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to