This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new a315387e0af fix issue with projections when base table has strings 
with empty columns (#17906)
a315387e0af is described below

commit a315387e0afbf11b499bfc2a9da49a0f36abd652
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Apr 14 05:05:15 2025 -0700

    fix issue with projections when base table has strings with empty columns 
(#17906)
    
    changes:
    * projections now use NullColumnPartSerde to store a null column if 
`merger.hasOnlyNulls()` reports true
    * add projection test to ensure missing columns work correctly no matter 
how `storeEmptyColumns` is set
---
 .../org/apache/druid/segment/IndexMergerV9.java    |  23 ++-
 .../druid/segment/CursorFactoryProjectionTest.java | 175 ++++++++++++++-------
 2 files changed, 138 insertions(+), 60 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java 
b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
index e0666b18a97..658242a6ed7 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
@@ -533,12 +533,27 @@ public class IndexMergerV9 implements IndexMerger
 
       for (int i = 0; i < dimensions.size(); i++) {
         final String dimension = dimensions.get(i);
-        DimensionMergerV9 merger = mergers.get(i);
+        final DimensionMergerV9 merger = mergers.get(i);
         merger.writeIndexes(rowNumConversions);
-        if (!merger.hasOnlyNulls()) {
-          ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
-          makeColumn(smoosher, Projections.getProjectionSmooshV9FileName(spec, 
dimension), columnDesc);
+        final ColumnDescriptor columnDesc;
+        if (merger.hasOnlyNulls()) {
+          // synthetic null column descriptor if merger participates in 
generic null column stuff
+          // always write a null column if hasOnlyNulls is true. This is 
correct regardless of how storeEmptyColumns is
+          // set because:
+          // - if storeEmptyColumns is true, the base table also does this,
+          // - if storeEmptyColumns is false, the base table omits the column 
from the dimensions list as if it does not
+          //   exist, however for projections the dimensions list is always 
populated by the projection schema, so a
+          //   column always needs to exist to not run into null pointer 
exceptions.
+          columnDesc = ColumnDescriptor
+              .builder()
+              
.setValueType(columnFormats.get(dimension).getLogicalType().getType())
+              .addSerde(new NullColumnPartSerde(rowCount, 
indexSpec.getBitmapSerdeFactory()))
+              .build();
+        } else {
+          // use merger descriptor, merger either has values or handles it own 
null column storage details
+          columnDesc = merger.makeColumnDescriptor();
         }
+        makeColumn(smoosher, Projections.getProjectionSmooshV9FileName(spec, 
dimension), columnDesc);
       }
 
       progress.stopSection(section2);
diff --git 
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
 
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
index 7bc570a6ed6..d9002fbfc93 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
@@ -251,6 +251,14 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
           new AggregatorFactory[]{
               new LongSumAggregatorFactory("_c_sum", "c")
           }
+      ),
+      new AggregateProjectionSpec(
+          "missing_column",
+          VirtualColumns.EMPTY,
+          List.of(new StringDimensionSchema("missing")),
+          new AggregatorFactory[]{
+              new LongSumAggregatorFactory("csum", "c")
+          }
       )
   );
 
@@ -314,7 +322,7 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
                         ))
                         .collect(Collectors.toList());
 
-  @Parameterized.Parameters(name = "name: {0}, sortByDim: {3}, autoSchema: 
{4}")
+  @Parameterized.Parameters(name = "name: {0}, sortByDim: {5}, autoSchema: 
{6}")
   public static Collection<?> constructorFeeder()
   {
     final List<Object[]> constructors = new ArrayList<>();
@@ -326,7 +334,8 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
                               new StringDimensionSchema("b"),
                               new LongDimensionSchema("c"),
                               new DoubleDimensionSchema("d"),
-                              new FloatDimensionSchema("e")
+                              new FloatDimensionSchema("e"),
+                              new StringDimensionSchema("missing")
                           )
                       );
 
@@ -362,56 +371,59 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
                                                             
.collect(Collectors.toList());
 
     for (boolean incremental : new boolean[]{true, false}) {
-      for (boolean sortByDim : new boolean[]{/*true,*/ false}) {
-        for (boolean autoSchema : new boolean[]{/*true,*/ false}) {
-          final DimensionsSpec dims;
-          final DimensionsSpec rollupDims;
-          if (sortByDim) {
-            if (autoSchema) {
-              dims = dimsOrdered.withDimensions(autoDims);
-              rollupDims = rollupDimsOrdered.withDimensions(rollupAutoDims);
+      for (boolean sortByDim : new boolean[]{true, false}) {
+        for (boolean autoSchema : new boolean[]{false, true}) {
+          for (boolean writeNullColumns : new boolean[]{true, false}) {
+
+            final DimensionsSpec dims;
+            final DimensionsSpec rollupDims;
+            if (sortByDim) {
+              if (autoSchema) {
+                dims = dimsOrdered.withDimensions(autoDims);
+                rollupDims = rollupDimsOrdered.withDimensions(rollupAutoDims);
+              } else {
+                dims = dimsOrdered;
+                rollupDims = rollupDimsOrdered;
+              }
             } else {
-              dims = dimsOrdered;
-              rollupDims = rollupDimsOrdered;
+              if (autoSchema) {
+                dims = dimsTimeOrdered.withDimensions(autoDims);
+                rollupDims = rollupDimsTimeOrdered.withDimensions(autoDims);
+              } else {
+                dims = dimsTimeOrdered;
+                rollupDims = rollupDimsTimeOrdered;
+              }
             }
-          } else {
-            if (autoSchema) {
-              dims = dimsTimeOrdered.withDimensions(autoDims);
-              rollupDims = rollupDimsTimeOrdered.withDimensions(autoDims);
+            if (incremental) {
+              IncrementalIndex index = CLOSER.register(makeBuilder(dims, 
autoSchema, writeNullColumns).buildIncrementalIndex());
+              IncrementalIndex rollupIndex = CLOSER.register(
+                  makeRollupBuilder(rollupDims, rollupAggs, 
autoSchema).buildIncrementalIndex()
+              );
+              constructors.add(new Object[]{
+                  "incrementalIndex",
+                  new IncrementalIndexCursorFactory(index),
+                  new IncrementalIndexTimeBoundaryInspector(index),
+                  new IncrementalIndexCursorFactory(rollupIndex),
+                  new IncrementalIndexTimeBoundaryInspector(rollupIndex),
+                  sortByDim,
+                  autoSchema
+              });
             } else {
-              dims = dimsTimeOrdered;
-              rollupDims = rollupDimsTimeOrdered;
+              QueryableIndex index = CLOSER.register(makeBuilder(dims, 
autoSchema, writeNullColumns).buildMMappedIndex());
+              QueryableIndex rollupIndex = CLOSER.register(
+                  makeRollupBuilder(rollupDims, rollupAggs, 
autoSchema).buildMMappedIndex()
+              );
+              constructors.add(new Object[]{
+                  "queryableIndex",
+                  new QueryableIndexCursorFactory(index),
+                  QueryableIndexTimeBoundaryInspector.create(index),
+                  new QueryableIndexCursorFactory(rollupIndex),
+                  QueryableIndexTimeBoundaryInspector.create(rollupIndex),
+                  sortByDim,
+                  autoSchema
+              });
             }
           }
-          if (incremental) {
-            IncrementalIndex index = CLOSER.register(makeBuilder(dims, 
autoSchema).buildIncrementalIndex());
-            IncrementalIndex rollupIndex = CLOSER.register(
-                makeRollupBuilder(rollupDims, rollupAggs, 
autoSchema).buildIncrementalIndex()
-            );
-            constructors.add(new Object[]{
-                "incrementalIndex",
-                new IncrementalIndexCursorFactory(index),
-                new IncrementalIndexTimeBoundaryInspector(index),
-                new IncrementalIndexCursorFactory(rollupIndex),
-                new IncrementalIndexTimeBoundaryInspector(rollupIndex),
-                sortByDim,
-                autoSchema
-            });
-          } else {
-            QueryableIndex index = CLOSER.register(makeBuilder(dims, 
autoSchema).buildMMappedIndex());
-            QueryableIndex rollupIndex = CLOSER.register(
-                makeRollupBuilder(rollupDims, rollupAggs, 
autoSchema).buildMMappedIndex()
-            );
-            constructors.add(new Object[]{
-                "queryableIndex",
-                new QueryableIndexCursorFactory(index),
-                QueryableIndexTimeBoundaryInspector.create(index),
-                new QueryableIndexCursorFactory(rollupIndex),
-                QueryableIndexTimeBoundaryInspector.create(rollupIndex),
-                sortByDim,
-                autoSchema
-            });
-          }
         }
       }
     }
@@ -796,6 +808,43 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
     );
   }
 
+  @Test
+  public void testProjectionSingleDimMissing()
+  {
+    // test can use the single dimension projection
+    final GroupByQuery query =
+        GroupByQuery.builder()
+                    .setDataSource("test")
+                    .setGranularity(Granularities.ALL)
+                    .setInterval(Intervals.ETERNITY)
+                    .addDimension("missing")
+                    .addAggregator(new LongSumAggregatorFactory("c_sum", "c"))
+                    .build();
+    final CursorBuildSpec buildSpec = 
GroupingEngine.makeCursorBuildSpec(query, null);
+    try (final CursorHolder cursorHolder = 
projectionsCursorFactory.makeCursorHolder(buildSpec)) {
+      final Cursor cursor = cursorHolder.asCursor();
+      int rowCount = 0;
+      while (!cursor.isDone()) {
+        rowCount++;
+        cursor.advance();
+      }
+      Assert.assertEquals(1, rowCount);
+    }
+    final Sequence<ResultRow> resultRows = groupingEngine.process(
+        query,
+        projectionsCursorFactory,
+        projectionsTimeBoundaryInspector,
+        nonBlockingPool,
+        null
+    );
+    final List<ResultRow> results = resultRows.toList();
+    Assert.assertEquals(1, results.size());
+    Assert.assertArrayEquals(
+        new Object[]{null, 19L},
+        results.get(0).getArray()
+    );
+  }
+
   @Test
   public void testProjectionSingleDimFilter()
   {
@@ -1390,17 +1439,29 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
     );
     final List<ResultRow> results = resultRows.toList();
     Assert.assertEquals(2, results.size());
-    Assert.assertArrayEquals(
-        new Object[]{"afoo", 7L},
-        results.get(0).getArray()
-    );
-    Assert.assertArrayEquals(
-        new Object[]{"bfoo", 12L},
-        results.get(1).getArray()
-    );
+    if (!(projectionsCursorFactory instanceof QueryableIndexCursorFactory) || 
!autoSchema) {
+      Assert.assertArrayEquals(
+          new Object[]{"afoo", 7L},
+          results.get(0).getArray()
+      );
+      Assert.assertArrayEquals(
+          new Object[]{"bfoo", 12L},
+          results.get(1).getArray()
+      );
+    } else {
+      // inconsistent ordering since query isn't ordering
+      Assert.assertArrayEquals(
+          new Object[]{"bfoo", 12L},
+          results.get(0).getArray()
+      );
+      Assert.assertArrayEquals(
+          new Object[]{"afoo", 7L},
+          results.get(1).getArray()
+      );
+    }
   }
 
-  private static IndexBuilder makeBuilder(DimensionsSpec dimensionsSpec, 
boolean autoSchema)
+  private static IndexBuilder makeBuilder(DimensionsSpec dimensionsSpec, 
boolean autoSchema, boolean writeNullColumns)
   {
     File tmp = FileUtils.createTempDir();
     CLOSER.register(tmp::delete);
@@ -1414,6 +1475,7 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
                                                  .withProjections(autoSchema ? 
AUTO_PROJECTIONS : PROJECTIONS)
                                                  .build()
                        )
+                       .writeNullColumns(writeNullColumns)
                        .rows(ROWS);
   }
 
@@ -1432,6 +1494,7 @@ public class CursorFactoryProjectionTest extends 
InitializedNullHandlingTest
                                                  .withProjections(autoSchema ? 
AUTO_ROLLUP_PROJECTIONS : ROLLUP_PROJECTIONS)
                                                  .build()
                        )
+                       .writeNullColumns(true)
                        .rows(ROLLUP_ROWS);
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to