Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2396#discussion_r197501384 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/model/QueryModelBuilder.java --- @@ -54,18 +58,134 @@ public QueryModelBuilder projectColumns(String[] projectionColumns) { } else { CarbonMeasure measure = table.getMeasureByName(factTableName, projectionColumnName); if (measure == null) { - throw new RuntimeException(projectionColumnName + - " column not found in the table " + factTableName); + throw new RuntimeException( + projectionColumnName + " column not found in the table " + factTableName); } projection.addMeasure(measure, i); i++; } } - + optimizeProjectionForComplexColumns(projection); this.projection = projection; return this; } + private void optimizeProjectionForComplexColumns(QueryProjection projection) { + // Get the List of Complex Column Projection. + // The optimization techniques which can be applied are + // A. Merging in Driver Side + // B. Merging in the result Collector side. + // Merging is driver side cases are + // Driver merging will eliminate one of the CarbonDimension. + // Executor merging will merge the column output in Result Collector. + // In this routine we are going to do driver merging and leave executor merging. + Map<Integer, List<Integer>> complexColumnMap = new HashMap<>(); + List<ProjectionDimension> carbonDimensions = projection.getDimensions(); + for (ProjectionDimension cols : carbonDimensions) { + // get all the Projections with Parent Ordinal Set. + if (cols.getDimension().getParentOrdinal() != -1) { + if (complexColumnMap.get(cols.getDimension().getParentOrdinal()) != null) { + List<Integer> childColumns = complexColumnMap.get(cols.getDimension().getParentOrdinal()); + childColumns.add(cols.getDimension().getOrdinal()); + complexColumnMap.put(cols.getDimension().getParentOrdinal(), childColumns); + } else { + List<Integer> childColumns = new ArrayList<>(); + childColumns.add(cols.getDimension().getOrdinal()); + complexColumnMap.put(cols.getDimension().getParentOrdinal(), childColumns); + } + } + } + + // Traverse the Map to Find any columns are parent. + for (Map.Entry<Integer, List<Integer>> entry : complexColumnMap.entrySet()) { + List<Integer> childOrdinals = entry.getValue(); + if (childOrdinals.size() > 1) { + // In case of more that one child, have to check if the child columns are in the same path + // and have a common parent. + Collections.sort(childOrdinals); + List<Integer> mergedOrdinals = mergeChildColumns(childOrdinals, entry.getKey()); + if (mergedOrdinals.size() > 0) { + projection = removeDimension(projection, mergedOrdinals); + } + } + } + } + + private QueryProjection removeDimension(QueryProjection projection, + List<Integer> mergedOrdinals) { + List<ProjectionDimension> carbonDimensions = projection.getDimensions(); + QueryProjection outputProjection = new QueryProjection(); + int i = 0; + for (ProjectionDimension cols : carbonDimensions) { + if (!mergedOrdinals.contains(cols.getDimension().getOrdinal())) { + outputProjection.addDimension(cols.getDimension(), i++); + } + } + List<ProjectionMeasure> carbonMeasures = projection.getMeasures(); + for (ProjectionMeasure cols : carbonMeasures) { + outputProjection.addMeasure(cols.getMeasure(), i++); + } + return outputProjection; + } + + private List<Integer> mergeChildColumns(List<Integer> childOrdinals, Integer key) { + // Check If children if they are in the path of not. + List<Integer> mergedChild = new ArrayList<>(); + + for (int i = 0; i < childOrdinals.size(); i++) { + for (int j = i; j < childOrdinals.size(); j++) { + if (!mergedChild.contains(childOrdinals.get(j)) && checkChildsInSamePath( + childOrdinals.get(i), childOrdinals.get(j))) { + mergedChild.add(j); + } + } + } + return mergedChild; + } + + private boolean checkChildsInSamePath(Integer parentOrdinal, Integer childOrdinal) { + List<CarbonDimension> dimList = table.getDimensions(); + CarbonDimension parentDimension = getDimensionBasedOnOrdinal(dimList, parentOrdinal); + CarbonDimension childDimension = getDimensionBasedOnOrdinal(dimList, childOrdinal); + if (checkForChildColumns(parentDimension, childDimension)) { + return true; + } else { + return false; + } + } + + private boolean checkForChildColumns(CarbonDimension parentDimension, --- End diff -- Better try to use dimension oridinal range to decide the childs to merge.
---