This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 6ab65f8cf7 Handle the case when enableNullHandling is true and an
aggregation function is used w/ a column that has an empty null bitmap (#9566)
6ab65f8cf7 is described below
commit 6ab65f8cf734a40bc56c26534c2fb82a2c3c8849
Author: nizarhejazi <[email protected]>
AuthorDate: Tue Oct 11 11:31:11 2022 -0700
Handle the case when enableNullHandling is true and an aggregation function
is used w/ a column that has an empty null bitmap (#9566)
---
.../function/MaxAggregationFunction.java | 31 +++--
.../function/MinAggregationFunction.java | 30 +++--
.../function/SumAggregationFunction.java | 42 +++---
.../pinot/queries/NullEnabledQueriesTest.java | 141 ++++++++++++++++-----
4 files changed, 167 insertions(+), 77 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java
index 2d0bac2803..79c08bd2be 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java
@@ -72,11 +72,13 @@ public class MaxAggregationFunction extends
BaseSingleInputAggregationFunction<D
Map<ExpressionContext, BlockValSet> blockValSetMap) {
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
+ // TODO: avoid the null bitmap check when it is null or empty for better
performance.
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
- return;
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
}
+ aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
+ return;
}
switch (blockValSet.getValueType().getStoredType()) {
@@ -219,20 +221,21 @@ public class MaxAggregationFunction extends
BaseSingleInputAggregationFunction<D
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- if (nullBitmap.getCardinality() < length) {
- double[] valueArray = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- double value = valueArray[i];
- int groupKey = groupKeyArray[i];
- Double result = groupByResultHolder.getResult(groupKey);
- if (!nullBitmap.contains(i) && (result == null || value > result))
{
- groupByResultHolder.setValueForKey(groupKey, value);
- }
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
+ }
+ if (nullBitmap.getCardinality() < length) {
+ double[] valueArray = blockValSet.getDoubleValuesSV();
+ for (int i = 0; i < length; i++) {
+ double value = valueArray[i];
+ int groupKey = groupKeyArray[i];
+ Double result = groupByResultHolder.getResult(groupKey);
+ if (!nullBitmap.contains(i) && (result == null || value > result)) {
+ groupByResultHolder.setValueForKey(groupKey, value);
}
}
- return;
}
+ return;
}
double[] valueArray = blockValSet.getDoubleValuesSV();
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java
index 2e80387cd2..b2ed9390d3 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java
@@ -73,10 +73,11 @@ public class MinAggregationFunction extends
BaseSingleInputAggregationFunction<D
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
- return;
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
}
+ aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
+ return;
}
switch (blockValSet.getValueType().getStoredType()) {
@@ -219,20 +220,21 @@ public class MinAggregationFunction extends
BaseSingleInputAggregationFunction<D
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- if (nullBitmap.getCardinality() < length) {
- double[] valueArray = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- double value = valueArray[i];
- int groupKey = groupKeyArray[i];
- Double result = groupByResultHolder.getResult(groupKey);
- if (!nullBitmap.contains(i) && (result == null || value < result))
{
- groupByResultHolder.setValueForKey(groupKey, value);
- }
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
+ }
+ if (nullBitmap.getCardinality() < length) {
+ double[] valueArray = blockValSet.getDoubleValuesSV();
+ for (int i = 0; i < length; i++) {
+ double value = valueArray[i];
+ int groupKey = groupKeyArray[i];
+ Double result = groupByResultHolder.getResult(groupKey);
+ if (!nullBitmap.contains(i) && (result == null || value < result)) {
+ groupByResultHolder.setValueForKey(groupKey, value);
}
}
- return;
}
+ return;
}
double[] valueArray = blockValSet.getDoubleValuesSV();
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java
index 8456eda436..04f63bd4e2 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java
@@ -73,10 +73,11 @@ public class SumAggregationFunction extends
BaseSingleInputAggregationFunction<D
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
- return;
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
}
+ aggregateNullHandlingEnabled(length, aggregationResultHolder,
blockValSet, nullBitmap);
+ return;
}
double sum = aggregationResultHolder.getDoubleResult();
@@ -207,26 +208,27 @@ public class SumAggregationFunction extends
BaseSingleInputAggregationFunction<D
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (_nullHandlingEnabled) {
RoaringBitmap nullBitmap = blockValSet.getNullBitmap();
- if (nullBitmap != null && !nullBitmap.isEmpty()) {
- if (nullBitmap.getCardinality() < length) {
- double[] valueArray = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- if (!nullBitmap.contains(i)) {
- int groupKey = groupKeyArray[i];
- Double result = groupByResultHolder.getResult(groupKey);
- groupByResultHolder.setValueForKey(groupKey, result == null ?
valueArray[i] : result + valueArray[i]);
- // In presto:
- // SELECT sum (cast(id AS DOUBLE)) as sum, min(id) as min,
max(id) as max, key FROM (VALUES (null, 1),
- // (null, 2)) AS t(id, key) GROUP BY key ORDER BY max DESC;
- // sum | min | max | key
- //------+------+------+-----
- // NULL | NULL | NULL | 2
- // NULL | NULL | NULL | 1
- }
+ if (nullBitmap == null) {
+ nullBitmap = new RoaringBitmap();
+ }
+ if (nullBitmap.getCardinality() < length) {
+ double[] valueArray = blockValSet.getDoubleValuesSV();
+ for (int i = 0; i < length; i++) {
+ if (!nullBitmap.contains(i)) {
+ int groupKey = groupKeyArray[i];
+ Double result = groupByResultHolder.getResult(groupKey);
+ groupByResultHolder.setValueForKey(groupKey, result == null ?
valueArray[i] : result + valueArray[i]);
+ // In presto:
+ // SELECT sum (cast(id AS DOUBLE)) as sum, min(id) as min,
max(id) as max, key FROM (VALUES (null, 1),
+ // (null, 2)) AS t(id, key) GROUP BY key ORDER BY max DESC;
+ // sum | min | max | key
+ //------+------+------+-----
+ // NULL | NULL | NULL | 2
+ // NULL | NULL | NULL | 1
}
}
- return;
}
+ return;
}
double[] valueArray = blockValSet.getDoubleValuesSV();
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java
index e4ba37268c..a9ba3aa654 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java
@@ -92,7 +92,7 @@ public class NullEnabledQueriesTest extends BaseQueriesTest {
return _indexSegments;
}
- public void createRecords(Number baseValue)
+ public void createRecords(Number baseValue, boolean generateNulls)
throws Exception {
FileUtils.deleteDirectory(INDEX_DIR);
@@ -115,11 +115,12 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
record.putValue(KEY_COLUMN, 2);
_sumKey2 += value;
}
- } else {
+ _records.add(record);
+ } else if (generateNulls) {
// Key column value here is null.
record.putValue(COLUMN_NAME, null);
+ _records.add(record);
}
- _records.add(record);
}
}
@@ -160,12 +161,13 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
throws Exception {
ColumnDataType columnDataType = ColumnDataType.FLOAT;
float baseValue = RANDOM.nextFloat();
- createRecords(baseValue);
+ boolean generateNulls = true;
+ createRecords(baseValue, generateNulls);
TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
.setTableName(RAW_TABLE_NAME)
.build();
setUp(tableConfig, columnDataType.toDataType());
- testQueries(baseValue, columnDataType);
+ testQueries(baseValue, columnDataType, generateNulls);
}
@Test(priority = 1)
@@ -173,7 +175,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
throws Exception {
ColumnDataType columnDataType = ColumnDataType.FLOAT;
float baseValue = RANDOM.nextFloat();
- createRecords(baseValue);
+ boolean generateNulls = true;
+ createRecords(baseValue, generateNulls);
List<String> noDictionaryColumns = new ArrayList<String>();
noDictionaryColumns.add(COLUMN_NAME);
TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
@@ -181,7 +184,7 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
.setNoDictionaryColumns(noDictionaryColumns)
.build();
setUp(tableConfig, columnDataType.toDataType());
- testQueries(baseValue, columnDataType);
+ testQueries(baseValue, columnDataType, generateNulls);
}
@Test(priority = 2)
@@ -189,12 +192,13 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
throws Exception {
ColumnDataType columnDataType = ColumnDataType.DOUBLE;
double baseValue = RANDOM.nextDouble();
- createRecords(baseValue);
+ boolean generateNulls = true;
+ createRecords(baseValue, generateNulls);
TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
.setTableName(RAW_TABLE_NAME)
.build();
setUp(tableConfig, columnDataType.toDataType());
- testQueries(baseValue, columnDataType);
+ testQueries(baseValue, columnDataType, generateNulls);
}
@Test(priority = 3)
@@ -202,7 +206,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
throws Exception {
ColumnDataType columnDataType = ColumnDataType.DOUBLE;
double baseValue = RANDOM.nextDouble();
- createRecords(baseValue);
+ boolean generateNulls = true;
+ createRecords(baseValue, generateNulls);
List<String> noDictionaryColumns = new ArrayList<String>();
noDictionaryColumns.add(COLUMN_NAME);
TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
@@ -210,10 +215,72 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
.setNoDictionaryColumns(noDictionaryColumns)
.build();
setUp(tableConfig, columnDataType.toDataType());
- testQueries(baseValue, columnDataType);
+ testQueries(baseValue, columnDataType, generateNulls);
}
- public void testQueries(Number baseValue, ColumnDataType dataType) {
+ @Test(priority = 4)
+ public void testQueriesWithDictFloatColumnNoNullValues()
+ throws Exception {
+ ColumnDataType columnDataType = ColumnDataType.FLOAT;
+ float baseValue = RANDOM.nextFloat();
+ boolean generateNulls = false;
+ createRecords(baseValue, generateNulls);
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName(RAW_TABLE_NAME)
+ .build();
+ setUp(tableConfig, columnDataType.toDataType());
+ testQueries(baseValue, columnDataType, generateNulls);
+ }
+
+ @Test(priority = 5)
+ public void testQueriesWithNoDictFloatColumnNoNullValues()
+ throws Exception {
+ ColumnDataType columnDataType = ColumnDataType.FLOAT;
+ float baseValue = RANDOM.nextFloat();
+ boolean generateNulls = false;
+ createRecords(baseValue, generateNulls);
+ List<String> noDictionaryColumns = new ArrayList<String>();
+ noDictionaryColumns.add(COLUMN_NAME);
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName(RAW_TABLE_NAME)
+ .setNoDictionaryColumns(noDictionaryColumns)
+ .build();
+ setUp(tableConfig, columnDataType.toDataType());
+ testQueries(baseValue, columnDataType, generateNulls);
+ }
+
+ @Test(priority = 6)
+ public void testQueriesWithDictDoubleColumnNoNullValues()
+ throws Exception {
+ ColumnDataType columnDataType = ColumnDataType.DOUBLE;
+ double baseValue = RANDOM.nextDouble();
+ boolean generateNulls = false;
+ createRecords(baseValue, generateNulls);
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName(RAW_TABLE_NAME)
+ .build();
+ setUp(tableConfig, columnDataType.toDataType());
+ testQueries(baseValue, columnDataType, generateNulls);
+ }
+
+ @Test(priority = 7)
+ public void testQueriesWithNoDictDoubleColumnNoNullValues()
+ throws Exception {
+ ColumnDataType columnDataType = ColumnDataType.DOUBLE;
+ double baseValue = RANDOM.nextDouble();
+ boolean generateNulls = false;
+ createRecords(baseValue, generateNulls);
+ List<String> noDictionaryColumns = new ArrayList<String>();
+ noDictionaryColumns.add(COLUMN_NAME);
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName(RAW_TABLE_NAME)
+ .setNoDictionaryColumns(noDictionaryColumns)
+ .build();
+ setUp(tableConfig, columnDataType.toDataType());
+ testQueries(baseValue, columnDataType, generateNulls);
+ }
+
+ public void testQueries(Number baseValue, ColumnDataType dataType, boolean
nullValuesExist) {
DataTableBuilderFactory.setDataTableVersion(DataTableFactory.VERSION_4);
Map<String, String> queryOptions = new HashMap<>();
queryOptions.put("enableNullHandling", "true");
@@ -228,8 +295,9 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
ColumnDataType.DOUBLE, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE,
ColumnDataType.LONG, ColumnDataType.INT
}));
List<Object[]> rows = resultTable.getRows();
- assertEquals(rows.size(), 3);
- for (int index = 0; index < 3; index++) {
+ int resultCount = nullValuesExist ? 3 : 2;
+ assertEquals(rows.size(), resultCount);
+ for (int index = 0; index < resultCount; index++) {
Object[] row = rows.get(index);
assertEquals(row.length, 5);
int keyColumnIdx = 4;
@@ -277,7 +345,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
Object[] row = rows.get(0);
assertEquals(row.length, 4);
// Note: count(*) returns total number of docs (nullable and
non-nullable).
- assertEquals((long) row[0], 1000 * 4);
+ int totalDocs = nullValuesExist ? 1000 : 500;
+ assertEquals((long) row[0], totalDocs * 4);
// count(col) returns the count of non-nullable docs.
assertEquals((long) row[1], 500 * 4);
assertEquals(row[2], baseValue.doubleValue());
@@ -296,7 +365,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
Object[] row = rows.get(i);
assertEquals(row.length, 2);
if (row[0] != null) {
- assertTrue(Math.abs(((Number) row[0]).doubleValue() -
(baseValue.doubleValue() + i)) < 1e-1);
+ int incValue = nullValuesExist ? i : i * 2;
+ assertTrue(Math.abs(((Number) row[0]).doubleValue() -
(baseValue.doubleValue() + incValue)) < 1e-1);
assertEquals(row[1], 1);
} else {
assertNull(row[1]);
@@ -313,7 +383,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
assertEquals(dataSchema,
new DataSchema(new String[]{COLUMN_NAME, KEY_COLUMN}, new
ColumnDataType[]{dataType, ColumnDataType.INT}));
List<Object[]> rows = resultTable.getRows();
- assertEquals(rows.size(), 4000);
+ int rowsCount = nullValuesExist ? 4000 : 2000;
+ assertEquals(rows.size(), rowsCount);
int k = 0;
for (int i = 0; i < 2000; i += 4) {
// Null values are inserted at indices where: index % 2 equals 1. Skip
null values.
@@ -331,7 +402,7 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
// Note 1: we inserted 500 nulls in _records, and since we query 4
identical index segments, the number of null
// values is: 500 * 4 = 2000.
// Note 2: The default null ordering is 'NULLS LAST', regardless of the
ordering direction.
- for (int i = 2000; i < 4000; i++) {
+ for (int i = 2000; i < rowsCount; i++) {
Object[] values = rows.get(i);
assertEquals(values.length, 2);
assertNull(values[0]);
@@ -361,7 +432,9 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
index++;
}
// The default null ordering is 'NULLS LAST'. Therefore, null will
appear as the last record.
- assertNull(rows.get(rows.size() - 1)[0]);
+ if (nullValuesExist) {
+ assertNull(rows.get(rows.size() - 1)[0]);
+ }
}
{
int limit = 40;
@@ -388,7 +461,9 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
index++;
}
// The default null ordering is 'NULLS LAST'. Therefore, null will
appear as the last record.
- assertNull(rows.get(rows.size() - 1)[0]);
+ if (nullValuesExist) {
+ assertNull(rows.get(rows.size() - 1)[0]);
+ }
}
{
// This test case was added to validate path-code for distinct w/o order
by.
@@ -434,8 +509,10 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
List<Object[]> rows = resultTable.getRows();
assertEquals(rows.size(), 10);
// The default null ordering is 'NULLS LAST'. Therefore, null will
appear as the last record.
- assertNull(rows.get(0)[0]);
- int index = 1;
+ if (nullValuesExist) {
+ assertNull(rows.get(0)[0]);
+ }
+ int index = nullValuesExist ? 1 : 0;
int i = 0;
while (index < rows.size()) {
if ((NUM_RECORDS - i - 1) % 2 == 1) {
@@ -459,7 +536,8 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
assertEquals(dataSchema, new DataSchema(new String[]{"count",
COLUMN_NAME},
new ColumnDataType[]{ColumnDataType.LONG, dataType}));
List<Object[]> rows = resultTable.getRows();
- assertEquals(rows.size(), 501);
+ int rowsCount = nullValuesExist ? 501 : 500;
+ assertEquals(rows.size(), rowsCount);
int i = 0;
for (int index = 0; index < 500; index++) {
Object[] row = rows.get(index);
@@ -474,9 +552,11 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
i++;
}
// The default null ordering is 'NULLS LAST'.
- Object[] row = rows.get(500);
- assertEquals(row[0], 2000L);
- assertNull(row[1]);
+ if (nullValuesExist) {
+ Object[] row = rows.get(500);
+ assertEquals(row[0], 2000L);
+ assertNull(row[1]);
+ }
}
{
String query = String.format("SELECT SUMPRECISION(%s) AS sum FROM
testTable", COLUMN_NAME);
@@ -638,9 +718,10 @@ public class NullEnabledQueriesTest extends
BaseQueriesTest {
assertEquals(dataSchema, new DataSchema(new String[]{"max", COLUMN_NAME},
new ColumnDataType[]{ColumnDataType.DOUBLE, dataType}));
List<Object[]> rows = resultTable.getRows();
- assertEquals(rows.size(), 501);
+ int rowsCount = 500;
+ assertEquals(rows.size(), rowsCount + (nullValuesExist ? 1 : 0));
int i = 0;
- for (int index = 0; index < 500; index++) {
+ for (int index = 0; index < rowsCount; index++) {
if (i % 2 == 1) {
// Null values are inserted at: index % 2 == 1.
i++;
@@ -651,7 +732,9 @@ public class NullEnabledQueriesTest extends BaseQueriesTest
{
assertTrue(Math.abs(((Number) row[1]).doubleValue() -
(baseValue.doubleValue() + i)) < 1e-1);
i++;
}
- assertNull(rows.get(rows.size() - 1)[0]);
+ if (nullValuesExist) {
+ assertNull(rows.get(rows.size() - 1)[0]);
+ }
}
DataTableBuilderFactory.setDataTableVersion(DataTableBuilderFactory.DEFAULT_VERSION);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]