nichunen commented on a change in pull request #1005: KYLIN-4185: optimize 
CuboidSizeMap by using historical segments
URL: https://github.com/apache/kylin/pull/1005#discussion_r367896842
 
 

 ##########
 File path: engine-mr/src/main/java/org/apache/kylin/engine/mr/CubingJob.java
 ##########
 @@ -362,4 +369,63 @@ public long findCubeSizeBytes() {
         return Long.parseLong(findExtraInfoBackward(CUBE_SIZE_BYTES, "0"));
     }
 
+    public List<Double> findEstimateRatio(CubeSegment seg, KylinConfig config) 
{
+        CubeInstance cubeInstance = seg.getCubeInstance();
+        CuboidScheduler cuboidScheduler = cubeInstance.getCuboidScheduler();
+        List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
+        int totalLevels = cuboidScheduler.getBuildLevel();
+
+        List<Double> result = Lists.newArrayList();
+
+        Map<Long, Double> estimatedSizeMap;
+
+        String cuboidRootPath = getCuboidRootPath(seg, config);
+
+        try {
+            estimatedSizeMap = new CubeStatsReader(seg, 
config).getCuboidSizeMap(true);
+        } catch (IOException e) {
+            logger.warn("Cannot get segment {} estimated size map", 
seg.getName());
+
+            return null;
+        }
+
+        for (int level = 0; level <= totalLevels; level++) {
+            double levelEstimatedSize = 0;
+            for (Long cuboidId : layeredCuboids.get(level)) {
+                levelEstimatedSize += estimatedSizeMap.get(cuboidId) == null ? 
0.0 : estimatedSizeMap.get(cuboidId);
+            }
+
+            double levelRealSize = getRealSizeByLevel(cuboidRootPath, level);
+
+            if (levelEstimatedSize == 0.0 || levelRealSize == 0.0){
+                result.add(level, -1.0);
+            } else {
+                result.add(level, levelRealSize / levelEstimatedSize);
+            }
+        }
+
+        return result;
+    }
+
+
+    private double getRealSizeByLevel(String rootPath, int level) {
+        try {
+            String levelPath = 
JobBuilderSupport.getCuboidOutputPathsByLevel(rootPath, level);
+            FileSystem fs = HadoopUtil.getFileSystem(levelPath);
+            return fs.getContentSummary(new Path(levelPath)).getLength() / 
(1024L * 1024L);
 
 Review comment:
   {long} / {long} can not return double

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to