This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new a315387e0af fix issue with projections when base table has strings
with empty columns (#17906)
a315387e0af is described below
commit a315387e0afbf11b499bfc2a9da49a0f36abd652
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Apr 14 05:05:15 2025 -0700
fix issue with projections when base table has strings with empty columns
(#17906)
changes:
* projections now use NullColumnPartSerde to store a null column if
`merger.hasOnlyNulls()` reports true
* add projection test to ensure missing columns work correctly no matter
how `storeEmptyColumns` is set
---
.../org/apache/druid/segment/IndexMergerV9.java | 23 ++-
.../druid/segment/CursorFactoryProjectionTest.java | 175 ++++++++++++++-------
2 files changed, 138 insertions(+), 60 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
index e0666b18a97..658242a6ed7 100644
--- a/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
+++ b/processing/src/main/java/org/apache/druid/segment/IndexMergerV9.java
@@ -533,12 +533,27 @@ public class IndexMergerV9 implements IndexMerger
for (int i = 0; i < dimensions.size(); i++) {
final String dimension = dimensions.get(i);
- DimensionMergerV9 merger = mergers.get(i);
+ final DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
- if (!merger.hasOnlyNulls()) {
- ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
- makeColumn(smoosher, Projections.getProjectionSmooshV9FileName(spec,
dimension), columnDesc);
+ final ColumnDescriptor columnDesc;
+ if (merger.hasOnlyNulls()) {
+ // synthetic null column descriptor if merger participates in
generic null column stuff
+ // always write a null column if hasOnlyNulls is true. This is
correct regardless of how storeEmptyColumns is
+ // set because:
+ // - if storeEmptyColumns is true, the base table also does this,
+ // - if storeEmptyColumns is false, the base table omits the column
from the dimensions list as if it does not
+ // exist, however for projections the dimensions list is always
populated by the projection schema, so a
+ // column always needs to exist to not run into null pointer
exceptions.
+ columnDesc = ColumnDescriptor
+ .builder()
+
.setValueType(columnFormats.get(dimension).getLogicalType().getType())
+ .addSerde(new NullColumnPartSerde(rowCount,
indexSpec.getBitmapSerdeFactory()))
+ .build();
+ } else {
+ // use merger descriptor, merger either has values or handles it own
null column storage details
+ columnDesc = merger.makeColumnDescriptor();
}
+ makeColumn(smoosher, Projections.getProjectionSmooshV9FileName(spec,
dimension), columnDesc);
}
progress.stopSection(section2);
diff --git
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
index 7bc570a6ed6..d9002fbfc93 100644
---
a/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java
@@ -251,6 +251,14 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
new AggregatorFactory[]{
new LongSumAggregatorFactory("_c_sum", "c")
}
+ ),
+ new AggregateProjectionSpec(
+ "missing_column",
+ VirtualColumns.EMPTY,
+ List.of(new StringDimensionSchema("missing")),
+ new AggregatorFactory[]{
+ new LongSumAggregatorFactory("csum", "c")
+ }
)
);
@@ -314,7 +322,7 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
))
.collect(Collectors.toList());
- @Parameterized.Parameters(name = "name: {0}, sortByDim: {3}, autoSchema:
{4}")
+ @Parameterized.Parameters(name = "name: {0}, sortByDim: {5}, autoSchema:
{6}")
public static Collection<?> constructorFeeder()
{
final List<Object[]> constructors = new ArrayList<>();
@@ -326,7 +334,8 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
new StringDimensionSchema("b"),
new LongDimensionSchema("c"),
new DoubleDimensionSchema("d"),
- new FloatDimensionSchema("e")
+ new FloatDimensionSchema("e"),
+ new StringDimensionSchema("missing")
)
);
@@ -362,56 +371,59 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
.collect(Collectors.toList());
for (boolean incremental : new boolean[]{true, false}) {
- for (boolean sortByDim : new boolean[]{/*true,*/ false}) {
- for (boolean autoSchema : new boolean[]{/*true,*/ false}) {
- final DimensionsSpec dims;
- final DimensionsSpec rollupDims;
- if (sortByDim) {
- if (autoSchema) {
- dims = dimsOrdered.withDimensions(autoDims);
- rollupDims = rollupDimsOrdered.withDimensions(rollupAutoDims);
+ for (boolean sortByDim : new boolean[]{true, false}) {
+ for (boolean autoSchema : new boolean[]{false, true}) {
+ for (boolean writeNullColumns : new boolean[]{true, false}) {
+
+ final DimensionsSpec dims;
+ final DimensionsSpec rollupDims;
+ if (sortByDim) {
+ if (autoSchema) {
+ dims = dimsOrdered.withDimensions(autoDims);
+ rollupDims = rollupDimsOrdered.withDimensions(rollupAutoDims);
+ } else {
+ dims = dimsOrdered;
+ rollupDims = rollupDimsOrdered;
+ }
} else {
- dims = dimsOrdered;
- rollupDims = rollupDimsOrdered;
+ if (autoSchema) {
+ dims = dimsTimeOrdered.withDimensions(autoDims);
+ rollupDims = rollupDimsTimeOrdered.withDimensions(autoDims);
+ } else {
+ dims = dimsTimeOrdered;
+ rollupDims = rollupDimsTimeOrdered;
+ }
}
- } else {
- if (autoSchema) {
- dims = dimsTimeOrdered.withDimensions(autoDims);
- rollupDims = rollupDimsTimeOrdered.withDimensions(autoDims);
+ if (incremental) {
+ IncrementalIndex index = CLOSER.register(makeBuilder(dims,
autoSchema, writeNullColumns).buildIncrementalIndex());
+ IncrementalIndex rollupIndex = CLOSER.register(
+ makeRollupBuilder(rollupDims, rollupAggs,
autoSchema).buildIncrementalIndex()
+ );
+ constructors.add(new Object[]{
+ "incrementalIndex",
+ new IncrementalIndexCursorFactory(index),
+ new IncrementalIndexTimeBoundaryInspector(index),
+ new IncrementalIndexCursorFactory(rollupIndex),
+ new IncrementalIndexTimeBoundaryInspector(rollupIndex),
+ sortByDim,
+ autoSchema
+ });
} else {
- dims = dimsTimeOrdered;
- rollupDims = rollupDimsTimeOrdered;
+ QueryableIndex index = CLOSER.register(makeBuilder(dims,
autoSchema, writeNullColumns).buildMMappedIndex());
+ QueryableIndex rollupIndex = CLOSER.register(
+ makeRollupBuilder(rollupDims, rollupAggs,
autoSchema).buildMMappedIndex()
+ );
+ constructors.add(new Object[]{
+ "queryableIndex",
+ new QueryableIndexCursorFactory(index),
+ QueryableIndexTimeBoundaryInspector.create(index),
+ new QueryableIndexCursorFactory(rollupIndex),
+ QueryableIndexTimeBoundaryInspector.create(rollupIndex),
+ sortByDim,
+ autoSchema
+ });
}
}
- if (incremental) {
- IncrementalIndex index = CLOSER.register(makeBuilder(dims,
autoSchema).buildIncrementalIndex());
- IncrementalIndex rollupIndex = CLOSER.register(
- makeRollupBuilder(rollupDims, rollupAggs,
autoSchema).buildIncrementalIndex()
- );
- constructors.add(new Object[]{
- "incrementalIndex",
- new IncrementalIndexCursorFactory(index),
- new IncrementalIndexTimeBoundaryInspector(index),
- new IncrementalIndexCursorFactory(rollupIndex),
- new IncrementalIndexTimeBoundaryInspector(rollupIndex),
- sortByDim,
- autoSchema
- });
- } else {
- QueryableIndex index = CLOSER.register(makeBuilder(dims,
autoSchema).buildMMappedIndex());
- QueryableIndex rollupIndex = CLOSER.register(
- makeRollupBuilder(rollupDims, rollupAggs,
autoSchema).buildMMappedIndex()
- );
- constructors.add(new Object[]{
- "queryableIndex",
- new QueryableIndexCursorFactory(index),
- QueryableIndexTimeBoundaryInspector.create(index),
- new QueryableIndexCursorFactory(rollupIndex),
- QueryableIndexTimeBoundaryInspector.create(rollupIndex),
- sortByDim,
- autoSchema
- });
- }
}
}
}
@@ -796,6 +808,43 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
);
}
+ @Test
+ public void testProjectionSingleDimMissing()
+ {
+ // test can use the single dimension projection
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .addDimension("missing")
+ .addAggregator(new LongSumAggregatorFactory("c_sum", "c"))
+ .build();
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, null);
+ try (final CursorHolder cursorHolder =
projectionsCursorFactory.makeCursorHolder(buildSpec)) {
+ final Cursor cursor = cursorHolder.asCursor();
+ int rowCount = 0;
+ while (!cursor.isDone()) {
+ rowCount++;
+ cursor.advance();
+ }
+ Assert.assertEquals(1, rowCount);
+ }
+ final Sequence<ResultRow> resultRows = groupingEngine.process(
+ query,
+ projectionsCursorFactory,
+ projectionsTimeBoundaryInspector,
+ nonBlockingPool,
+ null
+ );
+ final List<ResultRow> results = resultRows.toList();
+ Assert.assertEquals(1, results.size());
+ Assert.assertArrayEquals(
+ new Object[]{null, 19L},
+ results.get(0).getArray()
+ );
+ }
+
@Test
public void testProjectionSingleDimFilter()
{
@@ -1390,17 +1439,29 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
);
final List<ResultRow> results = resultRows.toList();
Assert.assertEquals(2, results.size());
- Assert.assertArrayEquals(
- new Object[]{"afoo", 7L},
- results.get(0).getArray()
- );
- Assert.assertArrayEquals(
- new Object[]{"bfoo", 12L},
- results.get(1).getArray()
- );
+ if (!(projectionsCursorFactory instanceof QueryableIndexCursorFactory) ||
!autoSchema) {
+ Assert.assertArrayEquals(
+ new Object[]{"afoo", 7L},
+ results.get(0).getArray()
+ );
+ Assert.assertArrayEquals(
+ new Object[]{"bfoo", 12L},
+ results.get(1).getArray()
+ );
+ } else {
+ // inconsistent ordering since query isn't ordering
+ Assert.assertArrayEquals(
+ new Object[]{"bfoo", 12L},
+ results.get(0).getArray()
+ );
+ Assert.assertArrayEquals(
+ new Object[]{"afoo", 7L},
+ results.get(1).getArray()
+ );
+ }
}
- private static IndexBuilder makeBuilder(DimensionsSpec dimensionsSpec,
boolean autoSchema)
+ private static IndexBuilder makeBuilder(DimensionsSpec dimensionsSpec,
boolean autoSchema, boolean writeNullColumns)
{
File tmp = FileUtils.createTempDir();
CLOSER.register(tmp::delete);
@@ -1414,6 +1475,7 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
.withProjections(autoSchema ?
AUTO_PROJECTIONS : PROJECTIONS)
.build()
)
+ .writeNullColumns(writeNullColumns)
.rows(ROWS);
}
@@ -1432,6 +1494,7 @@ public class CursorFactoryProjectionTest extends
InitializedNullHandlingTest
.withProjections(autoSchema ?
AUTO_ROLLUP_PROJECTIONS : ROLLUP_PROJECTIONS)
.build()
)
+ .writeNullColumns(true)
.rows(ROLLUP_ROWS);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]