liukun4515 commented on code in PR #2089: URL: https://github.com/apache/kylin/pull/2089#discussion_r1107948753
########## src/spark-project/engine-spark/src/main/scala/org/apache/kylin/engine/spark/model/planner/CuboIdToLayoutUtils.java: ########## @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.engine.spark.model.planner; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.metadata.cube.cuboid.NAggregationGroup; +import org.apache.kylin.metadata.cube.model.LayoutEntity; +import org.apache.kylin.metadata.cube.model.RuleBasedIndex; + +import com.google.common.collect.Lists; + +public class CuboIdToLayoutUtils { + /** + * convert the cuboids to layout entity + * + * @param cuboids + * @param ruleBasedIndex + * @return + */ + public static Set<LayoutEntity> convertCuboIdsToLayoutEntity(Map<BigInteger, Long> cuboids, + RuleBasedIndex ruleBasedIndex) { + // convert the cuboid to each agg group + Set<LayoutEntity> result = new HashSet<>(); + List<NAggregationGroup> aggregationGroups = ruleBasedIndex.getAggregationGroups(); + for (NAggregationGroup group : aggregationGroups) { + // dimension order in this agg group + List<Integer> dimensionOrder = Lists.newArrayList(group.getIncludes()); + // measure order in this agg group + List<Integer> measuresIds = Lists.newArrayList(group.getMeasures()); + Set<List<Integer>> colOrders = convertCuboIdsToColOrders(cuboids, ruleBasedIndex.countOfIncludeDimension(), + measuresIds, ruleBasedIndex.getRowKeyIdToColumnId(), dimensionOrder); + for (List<Integer> colOrder : colOrders) { + result.add(createRecommendAggIndexLayout(colOrder)); + } + } + + // base agg layout for each agg group + for (NAggregationGroup group : aggregationGroups) { + List<Integer> colOrders = Lists.newArrayList(); + // all dimension in the agg + colOrders.addAll(Lists.newArrayList(group.getIncludes())); + // all measure in this agg + colOrders.addAll(Lists.newArrayList(group.getMeasures())); + result.add(createRecommendAggIndexLayout(colOrders)); + } + return result; + } + + /** + * create recommend agg layout base on the colOrder. + * + * @param colOrder + * @return LayoutEntity or null + * null: if the measures in the colOrder can't match the measures in this Index Plan + */ + private static LayoutEntity createRecommendAggIndexLayout(List<Integer> colOrder) { + LayoutEntity newAddIndexLayout = new LayoutEntity(); + // The layout is not the manual + newAddIndexLayout.setManual(false); Review Comment: > 这个地方不是很合理。如果需要强调是 cubeplanner 的结果,可以加个字断表示一下。 cubeplanner,推荐出来的结果会和 `RuleBasedIndex` 产生的结果进行对比,选择公共的部分。 相当于对`RuleBasedIndex` 的结果进行 剪枝。 `RuleBasedIndex` 本身的结果集 是 `2^n-1`是非常大的,这里只是为了获得较小的结果。 所以这里的LayoutEntity并不会被保存到 kylin的index系统中。只是用来表示推荐出来的layout的内容,最终保存的值就是 一个 ID,这个ID是RuleBasedIndex本身就存储有的。 > 还有就是需要考虑 shardBy 什么的吗? 创建agg-goup的时候,并没有shardby属性需要配置。那么`RuleBasedIndex`推荐出来的结果会包含有shardby属性吗? 所以目前不考虑sharby. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
