This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 79b12ed Fix empty data table for distinct query (#6363)
79b12ed is described below
commit 79b12ed36fccd28cd68937ad53d9df76cd29c9b3
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Thu Dec 17 13:39:07 2020 -0800
Fix empty data table for distinct query (#6363)
Support building empty data table for distinct query. Currently it will
throw exception for distinct queries when all the segments are pruned
---
.../core/common/datatable/DataTableUtils.java | 95 ++++++++++++++++------
.../core/common/datatable/DataTableUtilsTest.java | 28 +++++--
2 files changed, 94 insertions(+), 29 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
index b3dea8a..859a65c 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/common/datatable/DataTableUtils.java
@@ -23,10 +23,14 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.common.utils.DataTable;
import org.apache.pinot.core.query.aggregation.function.AggregationFunction;
+import
org.apache.pinot.core.query.aggregation.function.DistinctAggregationFunction;
+import org.apache.pinot.core.query.distinct.DistinctTable;
import org.apache.pinot.core.query.request.context.ExpressionContext;
import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.core.query.request.context.utils.QueryContextUtils;
import org.apache.pinot.core.util.QueryOptions;
@@ -86,40 +90,56 @@ public class DataTableUtils {
*/
public static DataTable buildEmptyDataTable(QueryContext queryContext)
throws IOException {
- AggregationFunction[] aggregationFunctions =
queryContext.getAggregationFunctions();
+ if (QueryContextUtils.isSelectionQuery(queryContext)) {
+ return buildEmptyDataTableForSelectionQuery(queryContext);
+ } else if (QueryContextUtils.isAggregationQuery(queryContext)) {
+ return buildEmptyDataTableForAggregationQuery(queryContext);
+ } else {
+ assert QueryContextUtils.isDistinctQuery(queryContext);
+ return buildEmptyDataTableForDistinctQuery(queryContext);
+ }
+ }
- // Selection query.
- if (aggregationFunctions == null) {
- List<ExpressionContext> selectExpressions =
queryContext.getSelectExpressions();
- int numSelectExpressions = selectExpressions.size();
- String[] columnNames = new String[numSelectExpressions];
- for (int i = 0; i < numSelectExpressions; i++) {
- columnNames[i] = selectExpressions.get(i).toString();
- }
- DataSchema.ColumnDataType[] columnDataTypes = new
DataSchema.ColumnDataType[numSelectExpressions];
- // NOTE: Use STRING column data type as default for selection query.
- Arrays.fill(columnDataTypes, DataSchema.ColumnDataType.STRING);
- DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
- return new DataTableBuilder(dataSchema).build();
+ /**
+ * Helper method to build an empty data table for selection query.
+ */
+ private static DataTable buildEmptyDataTableForSelectionQuery(QueryContext
queryContext) {
+ List<ExpressionContext> selectExpressions =
queryContext.getSelectExpressions();
+ int numSelectExpressions = selectExpressions.size();
+ String[] columnNames = new String[numSelectExpressions];
+ for (int i = 0; i < numSelectExpressions; i++) {
+ columnNames[i] = selectExpressions.get(i).toString();
}
+ ColumnDataType[] columnDataTypes = new
ColumnDataType[numSelectExpressions];
+ // NOTE: Use STRING column data type as default for selection query
+ Arrays.fill(columnDataTypes, ColumnDataType.STRING);
+ DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
+ return new DataTableBuilder(dataSchema).build();
+ }
- // Aggregation query.
+ /**
+ * Helper method to build an empty data table for aggregation query.
+ */
+ private static DataTable buildEmptyDataTableForAggregationQuery(QueryContext
queryContext)
+ throws IOException {
+ AggregationFunction[] aggregationFunctions =
queryContext.getAggregationFunctions();
+ assert aggregationFunctions != null;
int numAggregations = aggregationFunctions.length;
List<ExpressionContext> groupByExpressions =
queryContext.getGroupByExpressions();
if (groupByExpressions != null) {
- // Aggregation group-by query.
+ // Aggregation group-by query
if (new QueryOptions(queryContext.getQueryOptions()).isGroupByModeSQL())
{
// SQL format
int numColumns = groupByExpressions.size() + numAggregations;
String[] columnNames = new String[numColumns];
- DataSchema.ColumnDataType[] columnDataTypes = new
DataSchema.ColumnDataType[numColumns];
+ ColumnDataType[] columnDataTypes = new ColumnDataType[numColumns];
int index = 0;
for (ExpressionContext groupByExpression : groupByExpressions) {
columnNames[index] = groupByExpression.toString();
// Use STRING column data type as default for group-by expressions
- columnDataTypes[index] = DataSchema.ColumnDataType.STRING;
+ columnDataTypes[index] = ColumnDataType.STRING;
index++;
}
for (AggregationFunction aggregationFunction : aggregationFunctions) {
@@ -133,10 +153,9 @@ public class DataTableUtils {
// PQL format
String[] columnNames = new String[]{"functionName",
"GroupByResultMap"};
- DataSchema.ColumnDataType[] columnDataTypes =
- new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING,
DataSchema.ColumnDataType.OBJECT};
+ ColumnDataType[] columnDataTypes = new
ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT};
- // Build the data table.
+ // Build the data table
DataTableBuilder dataTableBuilder = new DataTableBuilder(new
DataSchema(columnNames, columnDataTypes));
for (AggregationFunction aggregationFunction : aggregationFunctions) {
dataTableBuilder.startRow();
@@ -148,10 +167,10 @@ public class DataTableUtils {
return dataTableBuilder.build();
}
} else {
- // Aggregation only query.
+ // Aggregation only query
String[] aggregationColumnNames = new String[numAggregations];
- DataSchema.ColumnDataType[] columnDataTypes = new
DataSchema.ColumnDataType[numAggregations];
+ ColumnDataType[] columnDataTypes = new ColumnDataType[numAggregations];
Object[] aggregationResults = new Object[numAggregations];
for (int i = 0; i < numAggregations; i++) {
AggregationFunction aggregationFunction = aggregationFunctions[i];
@@ -162,7 +181,7 @@ public class DataTableUtils {
aggregationFunction.extractAggregationResult(aggregationFunction.createAggregationResultHolder());
}
- // Build the data table.
+ // Build the data table
DataTableBuilder dataTableBuilder = new DataTableBuilder(new
DataSchema(aggregationColumnNames, columnDataTypes));
dataTableBuilder.startRow();
for (int i = 0; i < numAggregations; i++) {
@@ -186,4 +205,32 @@ public class DataTableUtils {
return dataTableBuilder.build();
}
}
+
+ /**
+ * Helper method to build an empty data table for distinct query.
+ */
+ private static DataTable buildEmptyDataTableForDistinctQuery(QueryContext
queryContext)
+ throws IOException {
+ AggregationFunction[] aggregationFunctions =
queryContext.getAggregationFunctions();
+ assert aggregationFunctions != null && aggregationFunctions.length == 1
+ && aggregationFunctions[0] instanceof DistinctAggregationFunction;
+ DistinctAggregationFunction distinctAggregationFunction =
(DistinctAggregationFunction) aggregationFunctions[0];
+
+ // Create the distinct table
+ String[] columnNames = distinctAggregationFunction.getColumns();
+ ColumnDataType[] columnDataTypes = new ColumnDataType[columnNames.length];
+ // NOTE: Use STRING column data type as default for distinct query
+ Arrays.fill(columnDataTypes, ColumnDataType.STRING);
+ DistinctTable distinctTable =
+ new DistinctTable(new DataSchema(columnNames, columnDataTypes),
Collections.emptySet());
+
+ // Build the data table
+ DataTableBuilder dataTableBuilder = new DataTableBuilder(
+ new DataSchema(new
String[]{distinctAggregationFunction.getColumnName()},
+ new ColumnDataType[]{ColumnDataType.OBJECT}));
+ dataTableBuilder.startRow();
+ dataTableBuilder.setColumn(0, distinctTable);
+ dataTableBuilder.finishRow();
+ return dataTableBuilder.build();
+ }
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
index 2b2a2a3..f2095ea 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/common/datatable/DataTableUtilsTest.java
@@ -21,12 +21,15 @@ package org.apache.pinot.core.common.datatable;
import java.io.IOException;
import java.util.Collections;
import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.common.utils.DataTable;
+import org.apache.pinot.core.query.distinct.DistinctTable;
import org.apache.pinot.core.query.request.context.QueryContext;
import
org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
public class DataTableUtilsTest {
@@ -40,7 +43,7 @@ public class DataTableUtilsTest {
DataTable dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
DataSchema dataSchema = dataTable.getDataSchema();
assertEquals(dataSchema.getColumnNames(), new String[]{"*"});
- assertEquals(dataSchema.getColumnDataTypes(), new
DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING});
+ assertEquals(dataSchema.getColumnDataTypes(), new
ColumnDataType[]{ColumnDataType.STRING});
assertEquals(dataTable.getNumberOfRows(), 0);
// Aggregation
@@ -50,7 +53,7 @@ public class DataTableUtilsTest {
dataSchema = dataTable.getDataSchema();
assertEquals(dataSchema.getColumnNames(), new String[]{"count_star",
"sum_a", "max_b"});
assertEquals(dataSchema.getColumnDataTypes(),
- new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.LONG,
DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE});
+ new ColumnDataType[]{ColumnDataType.LONG, ColumnDataType.DOUBLE,
ColumnDataType.DOUBLE});
assertEquals(dataTable.getNumberOfRows(), 1);
assertEquals(dataTable.getLong(0, 0), 0L);
assertEquals(dataTable.getDouble(0, 1), 0.0);
@@ -62,8 +65,7 @@ public class DataTableUtilsTest {
dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
dataSchema = dataTable.getDataSchema();
assertEquals(dataSchema.getColumnNames(), new String[]{"functionName",
"GroupByResultMap"});
- assertEquals(dataSchema.getColumnDataTypes(),
- new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING,
DataSchema.ColumnDataType.OBJECT});
+ assertEquals(dataSchema.getColumnDataTypes(), new
ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.OBJECT});
assertEquals(dataTable.getNumberOfRows(), 3);
assertEquals(dataTable.getString(0, 0), "count_star");
assertEquals(dataTable.getObject(0, 1), Collections.emptyMap());
@@ -79,7 +81,23 @@ public class DataTableUtilsTest {
dataSchema = dataTable.getDataSchema();
assertEquals(dataSchema.getColumnNames(), new String[]{"c", "d",
"count(*)", "sum(a)", "max(b)"});
assertEquals(dataSchema.getColumnDataTypes(),
- new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING,
DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.LONG,
DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE});
+ new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING,
ColumnDataType.LONG, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE});
assertEquals(dataTable.getNumberOfRows(), 0);
+
+ // Distinct
+ queryContext =
+ QueryContextConverterUtils.getQueryContextFromPQL("SELECT DISTINCT(a,
b) FROM table WHERE foo = 'bar'");
+ dataTable = DataTableUtils.buildEmptyDataTable(queryContext);
+ dataSchema = dataTable.getDataSchema();
+ assertEquals(dataSchema.getColumnNames(), new String[]{"distinct_a:b"});
+ assertEquals(dataSchema.getColumnDataTypes(), new
ColumnDataType[]{ColumnDataType.OBJECT});
+ assertEquals(dataTable.getNumberOfRows(), 1);
+ Object firstObject = dataTable.getObject(0, 0);
+ assertTrue(firstObject instanceof DistinctTable);
+ DistinctTable distinctTable = (DistinctTable) firstObject;
+ assertEquals(distinctTable.size(), 0);
+ assertEquals(distinctTable.getDataSchema().getColumnNames(), new
String[]{"a", "b"});
+ assertEquals(distinctTable.getDataSchema().getColumnDataTypes(),
+ new ColumnDataType[]{ColumnDataType.STRING, ColumnDataType.STRING});
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]