npawar commented on a change in pull request #5019: Make output schema to match 
selection list for aggregation groupbys
URL: https://github.com/apache/incubator-pinot/pull/5019#discussion_r371438504
 
 

 ##########
 File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GroupByDataTableReducer.java
 ##########
 @@ -194,14 +210,104 @@ private void 
setSQLGroupByInResultTable(BrokerResponseNative brokerResponseNativ
         values[index] = 
_aggregationFunctions[aggNum++].extractFinalResult(values[index]);
         index++;
       }
-      rows.add(values);
+      if (_sqlSelectionList != null) {
+        Object[] finalValues = new Object[_sqlSelectionList.size()];
+        for (int i = 0; i < finalSchemaMapIdx.length; i++) {
+          if (finalSchemaMapIdx[i] == -1) {
+            finalValues[i] = null;
+          } else {
+            finalValues[i] = values[finalSchemaMapIdx[i]];
+          }
+        }
+        rows.add(finalValues);
+      } else {
+        rows.add(values);
+      }
       numRows++;
     }
 
     DataSchema finalDataSchema = getSQLResultTableSchema(dataSchema);
+    if (_sqlSelectionList != null) {
+      int columnSize = _sqlSelectionList.size();
+      String[] columns = new String[columnSize];
+      DataSchema.ColumnDataType[] columnDataTypes = new 
DataSchema.ColumnDataType[columnSize];
+      for (int i = 0; i < columnSize; i++) {
+        if (finalSchemaMapIdx[i] == -1) {
+          columns[i] = RequestUtils.prettyPrint(_sqlSelectionList.get(i));
+          columnDataTypes[i] = DataSchema.ColumnDataType.STRING;
+        } else {
+          columns[i] = finalDataSchema.getColumnName(finalSchemaMapIdx[i]);
+          columnDataTypes[i] = 
finalDataSchema.getColumnDataType(finalSchemaMapIdx[i]);
+        }
+      }
+      finalDataSchema = new DataSchema(columns, columnDataTypes);
+    }
     brokerResponseNative.setResultTable(new ResultTable(finalDataSchema, 
rows));
   }
 
+  /**
+   * Generate index mapping based on selection expression to DataTable schema, 
which is groupBy columns,
+   * then aggregation functions.
+   * @param dataSchema
+   *
+   * @return a mapping from final schema idx to corresponding idx in data 
table schema.
+   */
+  private int[] getFinalSchemaMapIdx(DataSchema dataSchema) {
+    int[] finalSchemaMapIdx = new int[_sqlSelectionList.size()];
+    int nextAggregationIdx = _numGroupBy;
+    for (int i = 0; i < _sqlSelectionList.size(); i++) {
+      finalSchemaMapIdx[i] = getExpressionMapIdx(dataSchema, 
_sqlSelectionList.get(i), nextAggregationIdx);
+      if (finalSchemaMapIdx[i] == nextAggregationIdx) {
+        nextAggregationIdx++;
+      }
+    }
+    return finalSchemaMapIdx;
+  }
+
+  private int getExpressionMapIdx(DataSchema dataSchema, Expression 
expression, int nextAggregationIdx) {
+    // Check if expression matches groupBy list.
+    int idxFromGroupByList = getGroupByIdx(_groupByList, expression);
+    if (idxFromGroupByList != -1) {
+      return idxFromGroupByList;
+    }
+    // Handle all functions
+    if (expression.getFunctionCall() != null) {
+      // handle AS
+      if (expression.getFunctionCall().getOperator().equalsIgnoreCase("AS")) {
+        return getExpressionMapIdx(dataSchema, 
expression.getFunctionCall().getOperands().get(0), nextAggregationIdx);
+      }
+      // Return next aggregation idx.
+      return nextAggregationIdx;
+    }
+    // Handle identifier, which is a column.
+    if (expression.getIdentifier() != null) {
+      String columnName = expression.getIdentifier().getName();
+      for (int i = 0; i < dataSchema.size(); i++) {
+        if (columnName.equalsIgnoreCase(dataSchema.getColumnName(i))) {
+          return i;
+        }
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * Trying to match an expression based on given groupByList.
+   *
+   * @param groupByList
+   * @param expression
+   * @return matched idx from groupByList
+   */
+  private int getGroupByIdx(List<Expression> groupByList, Expression 
expression) {
+    for (int i = 0; i < groupByList.size(); i++) {
 
 Review comment:
   this group by list will be scanned for every single expression in the 
selection. Do you think it's worth constructing a map upfront groupByExpr -> 
index , within getFinalSchemaIdx method?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to