http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java b/cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java deleted file mode 100644 index 17d62e7..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.cuboid; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.cube.model.*; -import org.apache.kylin.cube.model.RowKeyDesc.AggrGroupMask; -import org.apache.kylin.cube.model.RowKeyDesc.HierarchyMask; -import org.apache.kylin.metadata.model.TblColRef; - -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; - -/** - * @author George Song (ysong1) - */ -public class Cuboid implements Comparable<Cuboid> { - - private final static Map<String, Map<Long, Cuboid>> CUBOID_CACHE = new ConcurrentHashMap<String, Map<Long, Cuboid>>(); - - public static Cuboid findById(CubeDesc cube, byte[] cuboidID) { - return findById(cube, Bytes.toLong(cuboidID)); - } - - public static Cuboid findById(CubeDesc cube, long cuboidID) { - Map<Long, Cuboid> cubeCache = CUBOID_CACHE.get(cube.getName()); - if (cubeCache == null) { - cubeCache = new ConcurrentHashMap<Long, Cuboid>(); - CUBOID_CACHE.put(cube.getName(), cubeCache); - } - Cuboid cuboid = cubeCache.get(cuboidID); - if (cuboid == null) { - long validCuboidID = translateToValidCuboid(cube, cuboidID); - if (Cuboid.isValid(cube, validCuboidID) == false) { - throw new RuntimeException("Didn't find a valid cuboid: " + validCuboidID); - } - - cuboid = new Cuboid(cube, cuboidID, validCuboidID); - cubeCache.put(cuboidID, cuboid); - } - return cuboid; - - } - - public static boolean isValid(CubeDesc cube, long cuboidID) { - RowKeyDesc rowkey = cube.getRowkey(); - - if (cuboidID < 0) { - throw new IllegalArgumentException("Cuboid " + cuboidID + " should be greater than 0"); - } - - if (checkBaseCuboid(rowkey, cuboidID)) { - return true; - } - - if (checkMandatoryColumns(rowkey, cuboidID) == false) { - return false; - } - - if (checkAggregationGroup(rowkey, cuboidID) == false) { - return false; - } - - if (checkHierarchy(rowkey, cuboidID) == false) { - return false; - } - - return true; - } - - public static long getBaseCuboidId(CubeDesc cube) { - return cube.getRowkey().getFullMask(); - } - - public static Cuboid getBaseCuboid(CubeDesc cube) { - return findById(cube, getBaseCuboidId(cube)); - } - - private static long translateToValidCuboid(CubeDesc cubeDesc, long cuboidID) { - // add mandantory - RowKeyDesc rowkey = cubeDesc.getRowkey(); - long mandatoryColumnMask = rowkey.getMandatoryColumnMask(); - if (cuboidID < mandatoryColumnMask) { - cuboidID = cuboidID | mandatoryColumnMask; - } - - // add hierarchy - for (DimensionDesc dimension : cubeDesc.getDimensions()) { - HierarchyDesc[] hierarchies = dimension.getHierarchy(); - boolean found = false; - long result = 0; - if (hierarchies != null && hierarchies.length > 0) { - for (int i = hierarchies.length - 1; i >= 0; i--) { - TblColRef hColumn = hierarchies[i].getColumnRef(); - Integer index = rowkey.getColumnBitIndex(hColumn); - long bit = 1L << index; - - if ((rowkey.getTailMask() & bit) > 0) - continue; // ignore levels in tail, they don't participate - - if ((bit & cuboidID) > 0) { - found = true; - } - - if (found == true) { - result = result | bit; - } - } - cuboidID = cuboidID | result; - } - } - - // find the left-most aggregation group - long cuboidWithoutMandatory = cuboidID & ~rowkey.getMandatoryColumnMask(); - long leftover; - for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) { - if ((cuboidWithoutMandatory & mask.uniqueMask) > 0) { - leftover = cuboidWithoutMandatory & ~mask.groupMask; - - if (leftover == 0) { - return cuboidID; - } - - if (leftover != 0) { - cuboidID = cuboidID | mask.leftoverMask; - return cuboidID; - } - } - } - - // doesn't have column in aggregation groups - leftover = cuboidWithoutMandatory & rowkey.getTailMask(); - if (leftover == 0) { - // doesn't have column in tail group - if (cuboidWithoutMandatory != 0) { - return cuboidID; - } else { - // no column except mandatory, add one column - cuboidID = cuboidID | Long.lowestOneBit(rowkey.getAggrGroupFullMask()); - return translateToValidCuboid(cubeDesc, cuboidID); - } - } - - // has column in tail group - cuboidID = cuboidID | rowkey.getTailMask(); - return cuboidID; - - } - - private static boolean checkBaseCuboid(RowKeyDesc rowkey, long cuboidID) { - long baseCuboidId = rowkey.getFullMask(); - if (cuboidID > baseCuboidId) { - throw new IllegalArgumentException("Cubiod " + cuboidID + " is out of scope 0-" + baseCuboidId); - } - return baseCuboidId == cuboidID; - } - - private static boolean checkMandatoryColumns(RowKeyDesc rowkey, long cuboidID) { - long mandatoryColumnMask = rowkey.getMandatoryColumnMask(); - - // note the all-zero cuboid (except for mandatory) is not valid - if (cuboidID <= mandatoryColumnMask) - return false; - - return (cuboidID & mandatoryColumnMask) == mandatoryColumnMask; - } - - private static boolean checkHierarchy(RowKeyDesc rowkey, long cuboidID) { - List<HierarchyMask> hierarchyMaskList = rowkey.getHierarchyMasks(); - // if no hierarchy defined in metadata - if (hierarchyMaskList == null || hierarchyMaskList.size() == 0) { - return true; - } - - hier: for (HierarchyMask hierarchyMasks : hierarchyMaskList) { - long result = cuboidID & hierarchyMasks.fullMask; - if (result > 0) { - // if match one of the hierarchy constrains, return true; - for (long mask : hierarchyMasks.allMasks) { - if (result == mask) { - continue hier; - } - } - return false; - } - } - return true; - } - - private static boolean checkAggregationGroup(RowKeyDesc rowkey, long cuboidID) { - long cuboidWithoutMandatory = cuboidID & ~rowkey.getMandatoryColumnMask(); - long leftover; - for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) { - if ((cuboidWithoutMandatory & mask.uniqueMask) != 0) { - leftover = cuboidWithoutMandatory & ~mask.groupMask; - return leftover == 0 || leftover == mask.leftoverMask; - } - } - - leftover = cuboidWithoutMandatory & rowkey.getTailMask(); - return leftover == 0 || leftover == rowkey.getTailMask(); - } - - private CubeDesc cube; - private final long inputID; - private final long id; - private final byte[] idBytes; - private final boolean requirePostAggregation; - private List<TblColRef> dimensionColumns; - - // will translate the cuboidID if it is not valid - private Cuboid(CubeDesc cube, long originalID, long validID) { - this.cube = cube; - this.inputID = originalID; - this.id = validID; - this.idBytes = Bytes.toBytes(id); - this.dimensionColumns = translateIdToColumns(this.id); - this.requirePostAggregation = calcExtraAggregation(this.inputID, this.id) != 0; - } - - private List<TblColRef> translateIdToColumns(long cuboidID) { - List<TblColRef> dimesnions = new ArrayList<TblColRef>(); - RowKeyColDesc[] allColumns = cube.getRowkey().getRowKeyColumns(); - for (int i = 0; i < allColumns.length; i++) { - // NOTE: the order of column in list!!! - long bitmask = 1L << allColumns[i].getBitIndex(); - if ((cuboidID & bitmask) != 0) { - TblColRef colRef = allColumns[i].getColRef(); - dimesnions.add(colRef); - } - } - return dimesnions; - } - - private long calcExtraAggregation(long inputID, long id) { - long diff = id ^ inputID; - return eliminateHierarchyAggregation(diff); - } - - // higher level in hierarchy can be ignored when counting aggregation columns - private long eliminateHierarchyAggregation(long id) { - List<HierarchyMask> hierarchyMaskList = cube.getRowkey().getHierarchyMasks(); - if (hierarchyMaskList != null && hierarchyMaskList.size() > 0) { - for (HierarchyMask hierMask : hierarchyMaskList) { - long[] allMasks = hierMask.allMasks; - for (int i = allMasks.length - 1; i > 0; i--) { - long bit = allMasks[i] ^ allMasks[i - 1]; - if ((inputID & bit) != 0) { - id &= ~allMasks[i - 1]; - } - } - } - } - return id; - } - - public CubeDesc getCube() { - return cube; - } - - public List<TblColRef> getColumns() { - return dimensionColumns; - } - - public List<TblColRef> getAggregationColumns() { - long aggrColsID = eliminateHierarchyAggregation(id); - return translateIdToColumns(aggrColsID); - } - - public long getId() { - return id; - } - - public byte[] getBytes() { - return idBytes; - } - - public long getInputID() { - return inputID; - } - - public boolean useAncestor() { - return inputID != id; - } - - public boolean requirePostAggregation() { - return requirePostAggregation; - } - - public static void clearCache() { - CUBOID_CACHE.clear(); - } - - public static void reloadCache(String cubeDescName) { - CUBOID_CACHE.remove(cubeDescName); - } - - @Override - public String toString() { - return "Cuboid [id=" + id + "]"; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (int) (id ^ (id >>> 32)); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - Cuboid other = (Cuboid) obj; - if (id != other.id) - return false; - return true; - } - - @Override - public int compareTo(Cuboid o) { - if (this.id < o.id) { - return -1; - } else if (this.id > o.id) { - return 1; - } else { - return 0; - } - } - -}
http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidCLI.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidCLI.java b/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidCLI.java deleted file mode 100644 index 7bb40f6..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidCLI.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.cuboid; - -import java.io.IOException; -import java.util.Collection; -import java.util.LinkedList; -import java.util.TreeSet; - -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.cube.CubeDescManager; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.RowKeyDesc; -import org.apache.kylin.cube.model.RowKeyDesc.AggrGroupMask; -import org.apache.kylin.cube.model.RowKeyDesc.HierarchyMask; - -/** - * @author yangli9 - * - */ -public class CuboidCLI { - - public static void main(String[] args) throws IOException { - CubeDescManager cubeDescMgr = CubeDescManager.getInstance(KylinConfig.getInstanceFromEnv()); - - if ("test".equals(args[0])) { - CubeDesc cubeDesc = cubeDescMgr.getCubeDesc(args[1]); - simulateCuboidGeneration(cubeDesc); - } - } - - public static int simulateCuboidGeneration(CubeDesc cube) { - CuboidScheduler scheduler = new CuboidScheduler(cube); - - long baseCuboid = Cuboid.getBaseCuboidId(cube); - Collection<Long> cuboidSet = new TreeSet<Long>(); - cuboidSet.add(baseCuboid); - LinkedList<Long> cuboidQueue = new LinkedList<Long>(); - cuboidQueue.push(baseCuboid); - while (!cuboidQueue.isEmpty()) { - long cuboid = cuboidQueue.pop(); - Collection<Long> spnanningCuboids = scheduler.getSpanningCuboid(cuboid); - for (Long sc : spnanningCuboids) { - boolean notfound = cuboidSet.add(sc); - if (!notfound) { - throw new IllegalStateException("Find duplicate spanning cuboid " + sc + " from cuboid " + cuboid); - } - cuboidQueue.push(sc); - } - } - - /** disable this due to poor performance when dimension number is big - TreeSet<Long> enumCuboids = enumCalcCuboidCount(cube); - if (enumCuboids.equals(cuboidSet) == false) { - throw new IllegalStateException("Expected cuboid set " + enumCuboids + "; but actual cuboid set " + cuboidSet); - } - **/ - - int mathCount = mathCalcCuboidCount(cube); - if (mathCount != cuboidSet.size()) { - throw new IllegalStateException("Math cuboid count " + mathCount + ", but actual cuboid count " + cuboidSet.size() + ", make sure aggregation groups has no duplication."); - } - - return mathCount; - - } - - public static TreeSet<Long> enumCalcCuboidCount(CubeDesc cube) { - long baseCuboid = Cuboid.getBaseCuboidId(cube); - TreeSet<Long> expectedCuboids = new TreeSet<Long>(); - for (long cuboid = 0; cuboid <= baseCuboid; cuboid++) { - if (Cuboid.isValid(cube, cuboid)) { - expectedCuboids.add(cuboid); - } - } - return expectedCuboids; - } - - public static int[] calculateAllLevelCount(CubeDesc cube) { - int levels = cube.getRowkey().getNCuboidBuildLevels(); - int[] allLevelCounts = new int[levels + 1]; - - CuboidScheduler scheduler = new CuboidScheduler(cube); - LinkedList<Long> nextQueue = new LinkedList<Long>(); - LinkedList<Long> currentQueue = new LinkedList<Long>(); - long baseCuboid = Cuboid.getBaseCuboidId(cube); - currentQueue.push(baseCuboid); - - for (int i = 0; i <= levels; i++) { - allLevelCounts[i] = currentQueue.size(); - while (!currentQueue.isEmpty()) { - long cuboid = currentQueue.pop(); - Collection<Long> spnanningCuboids = scheduler.getSpanningCuboid(cuboid); - nextQueue.addAll(spnanningCuboids); - } - currentQueue = nextQueue; - nextQueue = new LinkedList<Long>(); - } - - return allLevelCounts; - } - - public static int mathCalcCuboidCount(CubeDesc cube) { - int result = 1; // 1 for base cuboid - - RowKeyDesc rowkey = cube.getRowkey(); - AggrGroupMask[] aggrGroupMasks = rowkey.getAggrGroupMasks(); - for (int i = 0; i < aggrGroupMasks.length; i++) { - boolean hasTail = i < aggrGroupMasks.length - 1 || rowkey.getTailMask() > 0; - result += mathCalcCuboidCount_aggrGroup(rowkey, aggrGroupMasks[i], hasTail); - } - - return result; - } - - private static int mathCalcCuboidCount_aggrGroup(RowKeyDesc rowkey, AggrGroupMask aggrGroupMask, boolean hasTail) { - long groupMask = aggrGroupMask.groupMask; - int n = mathCalcCuboidCount_combination(rowkey, groupMask); - n -= 2; // exclude group all 1 and all 0 - - long nonUniqueMask = groupMask & (~aggrGroupMask.uniqueMask); - if (nonUniqueMask > 0) { - // exclude duplicates caused by non-unique columns - // FIXME this assumes non-unique masks consolidates in ONE following group which maybe not be true - n -= mathCalcCuboidCount_combination(rowkey, nonUniqueMask) - 1; // exclude all 0 - } - - if (hasTail) { - n *= 2; // tail being 1 and 0 - n += 2; // +1 for group all 1 and tail 0; +1 for group all 0 and tail 1 - } - - return n; - } - - private static int mathCalcCuboidCount_combination(RowKeyDesc rowkey, long colMask) { - if (colMask == 0) // no column selected - return 0; - - int count = 1; - - for (HierarchyMask hierMask : rowkey.getHierarchyMasks()) { - long hierBits = colMask & hierMask.fullMask; - if (hierBits != 0) { - count *= Long.bitCount(hierBits) + 1; // +1 is for all-zero case - colMask &= ~hierBits; - } - } - - count *= Math.pow(2, Long.bitCount(colMask)); - - return count; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java b/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java deleted file mode 100644 index 026d898..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.cuboid; - -/** - * @author George Song (ysong1) - * - */ - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.RowKeyDesc; -import org.apache.kylin.cube.model.RowKeyDesc.AggrGroupMask; - -public class CuboidScheduler { - - private final CubeDesc cubeDef; - private final int size; - private final long max; - private final Map<Long, Collection<Long>> cache; - - public CuboidScheduler(CubeDesc cube) { - this.cubeDef = cube; - this.size = cube.getRowkey().getRowKeyColumns().length; - this.max = (long) Math.pow(2, size) - 1; - this.cache = new ConcurrentHashMap<Long, Collection<Long>>(); - } - - public Collection<Long> getSpanningCuboid(long cuboid) { - if (cuboid > max || cuboid < 0) { - throw new IllegalArgumentException("Cuboid " + cuboid + " is out of scope 0-" + max); - } - - Collection<Long> result = cache.get(cuboid); - if (result != null) { - return result; - } - - // smaller sibling's children - Collection<Long> allPrevOffspring = new HashSet<Long>(); - for (Long sibling : findSmallerSibling(cuboid)) { - Collection<Long> prevOffsprings = generateChildren(sibling); - allPrevOffspring.addAll(prevOffsprings); - } - - // my children is my generation excluding smaller sibling's generation - result = new ArrayList<Long>(); - for (Long offspring : generateChildren(cuboid)) { - if (!allPrevOffspring.contains(offspring)) { - result.add(offspring); - } - } - - cache.put(cuboid, result); - return result; - } - - private Collection<Long> generateChildren(long cuboid) { - Collection<Long> result = new HashSet<Long>(); - - // generate zero tail cuboid -- the one with all 1 in the first - // aggregation group and all 0 for the rest bits - generateZeroTailBase(cuboid, result); - - RowKeyDesc rowkey = cubeDef.getRowkey(); - long cuboidWithoutMandatory = cuboid & ~rowkey.getMandatoryColumnMask(); - for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) { - if (belongTo(cuboidWithoutMandatory, mask) == false) - continue; - - long[] groupOneBitMasks = mask.groupOneBitMasks; - for (int i = 0; i < groupOneBitMasks.length; i++) { - long oneBit = groupOneBitMasks[i]; - if ((cuboid & oneBit) == 0) - continue; - - long child = cuboid ^ oneBit; - if (Cuboid.isValid(cubeDef, child)) { - result.add(child); - } - } - - if ((cuboidWithoutMandatory & mask.uniqueMask) > 0) - break; - } - - return result; - } - - private void generateZeroTailBase(long cuboid, Collection<Long> result) { - RowKeyDesc rowkey = cubeDef.getRowkey(); - - long cuboidWithoutMandatory = cuboid & ~rowkey.getMandatoryColumnMask(); - - for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) { - if ((cuboidWithoutMandatory & mask.groupMask) == mask.groupMask && (cuboidWithoutMandatory & mask.leftoverMask) == mask.leftoverMask) { - long zeroTail = rowkey.getMandatoryColumnMask() | mask.groupMask; - if (zeroTail > 0 && zeroTail != cuboid) { - result.add(zeroTail); - } - } - if ((cuboidWithoutMandatory & mask.uniqueMask) > 0) - break; - } - } - - public Collection<Long> findSmallerSibling(long cuboid) { - if (!Cuboid.isValid(cubeDef, cuboid)) { - return Collections.emptyList(); - } - - RowKeyDesc rowkey = cubeDef.getRowkey(); - - // do combination in all related groups - long groupAllBitMask = 0; - for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) { - if ((mask.groupMask & cuboid) > 0) { - groupAllBitMask |= mask.groupMask; - } - } - - long groupBitValue = cuboid & groupAllBitMask; - long leftBitValue = cuboid & ~groupAllBitMask; - long[] groupOneBits = bits(groupAllBitMask); - - Collection<Long> siblings = new HashSet<Long>(); - combination(cuboid, siblings, groupOneBits, 0, leftBitValue, Long.bitCount(groupBitValue)); - return siblings; - } - - private long[] bits(long groupAllBitMask) { - int size = Long.bitCount(groupAllBitMask); - long[] r = new long[size]; - long l = groupAllBitMask; - int i = 0; - while (l != 0) { - long bit = Long.highestOneBit(l); - r[i++] = bit; - l ^= bit; - } - return r; - } - - private void combination(long cuboid, Collection<Long> siblings, long[] bitMasks, int offset, long bitValue, int k) { - if (k == 0) { - if (Cuboid.isValid(cubeDef, bitValue)) { - siblings.add(bitValue); - } - } else { - for (int i = offset; i < bitMasks.length; i++) { - long newBitValue = bitValue | bitMasks[i]; - if (newBitValue < cuboid) { - combination(cuboid, siblings, bitMasks, i + 1, newBitValue, k - 1); - } - } - } - } - - private boolean belongTo(long cuboidWithoutMandatory, AggrGroupMask mask) { - long groupBits = cuboidWithoutMandatory & mask.groupMask; - long leftoverBits = cuboidWithoutMandatory & mask.leftoverMask; - return groupBits > 0 && (leftoverBits == 0 || leftoverBits == mask.leftoverMask); - } - - public int getCardinality(long cuboid) { - if (cuboid > max || cuboid < 0) { - throw new IllegalArgumentException("Cubiod " + cuboid + " is out of scope 0-" + max); - } - - return Long.bitCount(cuboid); - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java b/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java deleted file mode 100644 index 188b157..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/estimation/CubeSizeEstimationCLI.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.estimation; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; - -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.cube.CubeInstance; -import org.apache.kylin.cube.CubeManager; -import org.apache.kylin.cube.cuboid.Cuboid; -import org.apache.kylin.cube.cuboid.CuboidScheduler; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.DimensionDesc; -import org.apache.kylin.cube.model.HierarchyDesc; -import org.apache.kylin.cube.model.RowKeyColDesc; -import org.apache.kylin.cube.model.RowKeyDesc; -import org.apache.kylin.metadata.datatype.DataType; -import org.apache.kylin.metadata.model.MeasureDesc; - -/** - * Created by honma on 9/1/14. - */ -public class CubeSizeEstimationCLI { - - public static class RowKeyColInfo { - public List<List<Integer>> hierachyColBitIndice; - public List<Integer> nonHierachyColBitIndice; - } - - public static long estimatedCubeSize(String cubeName, long[] cardinality) { - KylinConfig config = KylinConfig.getInstanceFromEnv(); - CubeManager cubeManager = CubeManager.getInstance(config); - CubeInstance cubeInstance = cubeManager.getCube(cubeName); - CubeDesc cubeDesc = cubeInstance.getDescriptor(); - - CuboidScheduler scheduler = new CuboidScheduler(cubeDesc); - long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc); - LinkedList<Long> cuboidQueue = new LinkedList<Long>(); - cuboidQueue.push(baseCuboid); - - long totalSpace = 0; - - while (!cuboidQueue.isEmpty()) { - long cuboidID = cuboidQueue.pop(); - Collection<Long> spanningCuboid = scheduler.getSpanningCuboid(cuboidID); - for (Long sc : spanningCuboid) { - cuboidQueue.push(sc); - } - - totalSpace += estimateCuboidSpace(cuboidID, cardinality, cubeDesc); - } - return totalSpace; - } - - public static long estimateCuboidSpace(long cuboidID, long[] cardinality, CubeDesc cubeDesc) { - - RowKeyColInfo rowKeyColInfo = extractRowKeyInfo(cubeDesc); - RowKeyDesc rowKeyDesc = cubeDesc.getRowkey(); - - long rowCount = 1; - int[] rowKeySpaces = estimateRowKeyColSpace(rowKeyDesc, cardinality); - int dimensionSpace = 0; - int measureSpace = getMeasureSpace(cubeDesc); - - for (List<Integer> hlist : rowKeyColInfo.hierachyColBitIndice) { - // for hierachy columns, the cardinality of the most detailed column - // nominates. - int i; - for (i = 0; i < hlist.size() && rowKeyColExists(hlist.get(i), cuboidID); ++i) { - dimensionSpace += rowKeySpaces[hlist.get(i)]; - } - - if (i != 0) - rowCount *= cardinality[hlist.get(i - 1)]; - } - - for (int index : rowKeyColInfo.nonHierachyColBitIndice) { - if (rowKeyColExists(index, cuboidID)) { - rowCount *= cardinality[index]; - dimensionSpace += rowKeySpaces[index]; - } - } - return rowCount * (dimensionSpace + measureSpace); - } - - private static int[] estimateRowKeyColSpace(RowKeyDesc rowKeyDesc, long[] cardinality) { - RowKeyColDesc[] rowKeyColDescs = rowKeyDesc.getRowKeyColumns(); - int[] ret = new int[rowKeyColDescs.length]; - for (int i = 0; i < rowKeyColDescs.length; ++i) { - RowKeyColDesc rowKeyColDesc = rowKeyColDescs[rowKeyColDescs.length - 1 - i]; - if (rowKeyColDesc.getDictionary() == null) { - if (rowKeyColDesc.getLength() == 0) - throw new IllegalStateException("The non-dictionary col " + rowKeyColDesc.getColumn() + " has length of 0"); - ret[i] = rowKeyColDesc.getLength(); - } else { - ret[i] = estimateDictionaryColSpace(cardinality[i]); - } - } - return ret; - } - - // TODO what if it's date dictionary? - private static int estimateDictionaryColSpace(long cardinality) { - long mask = 1L; - int i; - for (i = Long.SIZE - 1; i >= 0; i--) { - if ((cardinality & (mask << i)) != 0) { - break; - } - } - - if (i < 0) - throw new IllegalStateException("the cardinality is 0"); - - return ((i + 1) + 7) / 8;// the bytes required to save at most - // cardinality numbers - } - - private static int getMeasureSpace(CubeDesc cubeDesc) { - int space = 0; - for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { - DataType returnType = measureDesc.getFunction().getReturnDataType(); - space += returnType.getStorageBytesEstimate(); - } - return space; - } - - private static boolean rowKeyColExists(int bitIndex, long cuboidID) { - long mask = 1L << bitIndex; - return (cuboidID & mask) != 0; - } - - private static RowKeyColInfo extractRowKeyInfo(CubeDesc cubeDesc) { - RowKeyDesc rowKeyDesc = cubeDesc.getRowkey(); - RowKeyColInfo info = new RowKeyColInfo(); - info.hierachyColBitIndice = new ArrayList<List<Integer>>(); - info.nonHierachyColBitIndice = new ArrayList<Integer>(); - HashSet<Integer> heirachyIndexSet = new HashSet<Integer>(); - - for (DimensionDesc dim : cubeDesc.getDimensions()) { - if (dim.getHierarchy() != null) { - LinkedList<Integer> hlist = new LinkedList<Integer>(); - for (HierarchyDesc hierarchyDesc : dim.getHierarchy()) { - int index = rowKeyDesc.getColumnBitIndex(hierarchyDesc.getColumnRef()); - hlist.add(index); - heirachyIndexSet.add(index); - } - info.hierachyColBitIndice.add(hlist); - } - } - - for (int i = 0; i < rowKeyDesc.getRowKeyColumns().length; ++i) { - if (!heirachyIndexSet.contains(i)) { - info.nonHierachyColBitIndice.add(i); - } - } - - return info; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java deleted file mode 100644 index 532950b..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.Map; - -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.cube.cuboid.Cuboid; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.metadata.model.TblColRef; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * - * @author xjiang - * - */ -public abstract class AbstractRowKeyEncoder { - - public static final byte DEFAULT_BLANK_BYTE = Dictionary.NULL; - - protected static final Logger logger = LoggerFactory.getLogger(AbstractRowKeyEncoder.class); - - public static AbstractRowKeyEncoder createInstance(CubeSegment cubeSeg, Cuboid cuboid) { - return new RowKeyEncoder(cubeSeg, cuboid); - } - - protected final Cuboid cuboid; - protected byte blankByte = DEFAULT_BLANK_BYTE; - - protected AbstractRowKeyEncoder(Cuboid cuboid) { - this.cuboid = cuboid; - } - - public void setBlankByte(byte blankByte) { - this.blankByte = blankByte; - } - - abstract public byte[] encode(Map<TblColRef, String> valueMap); - - abstract public byte[] encode(byte[][] values); -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyKeyEncoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyKeyEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyKeyEncoder.java deleted file mode 100644 index f44affd..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyKeyEncoder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.Arrays; - -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.cube.cuboid.Cuboid; - -/** - * - * @author xjiang - * - */ -public class FuzzyKeyEncoder extends RowKeyEncoder { - - public FuzzyKeyEncoder(CubeSegment seg, Cuboid cuboid) { - super(seg, cuboid); - } - - @Override - protected byte[] defaultValue(int length) { - byte[] keyBytes = new byte[length]; - Arrays.fill(keyBytes, RowConstants.FUZZY_MASK_ZERO); - return keyBytes; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java deleted file mode 100644 index 6aae631..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/FuzzyMaskEncoder.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.Arrays; - -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.cube.cuboid.Cuboid; -import org.apache.kylin.metadata.model.TblColRef; - -/** - * - * @author xjiang - * - */ -public class FuzzyMaskEncoder extends RowKeyEncoder { - - public FuzzyMaskEncoder(CubeSegment seg, Cuboid cuboid) { - super(seg, cuboid); - } - - @Override - protected int fillHeader(byte[] bytes, byte[][] values) { - // always fuzzy match cuboid ID to lock on the selected cuboid - int cuboidStart = this.headerLength - RowConstants.ROWKEY_CUBOIDID_LEN; - Arrays.fill(bytes, 0, cuboidStart, RowConstants.FUZZY_MASK_ONE); - Arrays.fill(bytes, cuboidStart, this.headerLength, RowConstants.FUZZY_MASK_ZERO); - return this.headerLength; - } - - @Override - protected void fillColumnValue(TblColRef column, int columnLen, byte[] value, int valueLen, byte[] outputValue, int outputValueOffset) { - if (value == null) { - Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, RowConstants.FUZZY_MASK_ONE); - } else { - Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, RowConstants.FUZZY_MASK_ZERO); - } - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java deleted file mode 100644 index 6f5a20f..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -/** - * - * @author xjiang - * - */ -public class RowConstants { - - // row key fixed length place holder - public static final byte ROWKEY_PLACE_HOLDER_BYTE = 9; - // row key lower bound - public static final byte ROWKEY_LOWER_BYTE = 0; - // row key upper bound - public static final byte ROWKEY_UPPER_BYTE = (byte) 0xff; - // row key cuboid id length - public static final int ROWKEY_CUBOIDID_LEN = 8; - - // fuzzy mask - public static final byte FUZZY_MASK_ZERO = 0; - public static final byte FUZZY_MASK_ONE = 1; - - // row value delimiter - public static final byte ROWVALUE_DELIMITER_BYTE = 7; - public static final String ROWVALUE_DELIMITER_STRING = String.valueOf((char) 7); - public static final byte[] ROWVALUE_DELIMITER_BYTES = { 7 }; - - public static final int ROWVALUE_BUFFER_SIZE = 1024 * 1024; // 1 MB - - // marker class - public static final byte[][] BYTE_ARR_MARKER = new byte[0][]; - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java deleted file mode 100644 index 00ecd46..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.Arrays; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.dict.ISegment; -import org.apache.kylin.metadata.model.TblColRef; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Read/Write column values from/into bytes - * - * @author yangli9 - */ -@SuppressWarnings("unchecked") -public class RowKeyColumnIO { - - private static final Logger logger = LoggerFactory.getLogger(RowKeyColumnIO.class); - - private ISegment ISegment; - - public RowKeyColumnIO(ISegment ISegment) { - this.ISegment = ISegment; - } - - public int getColumnLength(TblColRef col) { - return ISegment.getColumnLength(col); - } - - //TODO is type cast really necessary here? - public Dictionary<String> getDictionary(TblColRef col) { - return (Dictionary<String>) ISegment.getDictionary(col); - } - - public void writeColumn(TblColRef column, byte[] value, int valueLen, byte dft, byte[] output, int outputOffset) { - writeColumn(column, value, valueLen, 0, dft, output, outputOffset); - } - - public void writeColumn(TblColRef column, byte[] value, int valueLen, int roundingFlag, byte dft, byte[] output, int outputOffset) { - - Dictionary<String> dict = getDictionary(column); - int columnLen = getColumnLength(column); - - // non-dict value - if (dict == null) { - byte[] valueBytes = padFixLen(columnLen, value); - System.arraycopy(valueBytes, 0, output, outputOffset, columnLen); - return; - } - - // dict value - try { - int id = dict.getIdFromValueBytes(value, 0, valueLen, roundingFlag); - BytesUtil.writeUnsigned(id, output, outputOffset, dict.getSizeOfId()); - } catch (IllegalArgumentException ex) { - for (int i = outputOffset; i < outputOffset + columnLen; i++) - output[i] = dft; - logger.error("Can't translate value " + Bytes.toString(value, 0, valueLen) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", dft)); - } - } - - private byte[] padFixLen(int length, byte[] valueBytes) { - int valLen = valueBytes.length; - if (valLen == length) { - return valueBytes; - } else if (valLen < length) { - byte[] newValueBytes = new byte[length]; - System.arraycopy(valueBytes, 0, newValueBytes, 0, valLen); - Arrays.fill(newValueBytes, valLen, length, RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - return newValueBytes; - } else { - return Arrays.copyOf(valueBytes, length); - } - } - - public String readColumnString(TblColRef col, byte[] bytes, int bytesLen) { - Dictionary<String> dict = getDictionary(col); - if (dict == null) { - bytes = Bytes.head(bytes, bytesLen); - if (isNull(bytes)) { - return null; - } - bytes = removeFixLenPad(bytes, 0); - return Bytes.toString(bytes); - } else { - int id = BytesUtil.readUnsigned(bytes, 0, bytesLen); - try { - String value = dict.getValueFromId(id); - return value; - } catch (IllegalArgumentException e) { - logger.error("Can't get dictionary value for column " + col.getName() + " (id = " + id + ")"); - return ""; - } - } - } - - private boolean isNull(byte[] bytes) { - // all 0xFF is NULL - if (bytes.length == 0) - return false; - for (int i = 0; i < bytes.length; i++) { - if (bytes[i] != AbstractRowKeyEncoder.DEFAULT_BLANK_BYTE) - return false; - } - return true; - } - - private byte[] removeFixLenPad(byte[] bytes, int offset) { - int padCount = 0; - for (int i = offset; i < bytes.length; i++) { - byte vb = bytes[i]; - if (vb == RowConstants.ROWKEY_PLACE_HOLDER_BYTE) { - padCount++; - } - } - - int size = bytes.length - offset - padCount; - byte[] stripBytes = new byte[size]; - int index = 0; - for (int i = offset; i < bytes.length; i++) { - byte vb = bytes[i]; - if (vb != RowConstants.ROWKEY_PLACE_HOLDER_BYTE) { - stripBytes[index++] = vb; - } - } - return stripBytes; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java deleted file mode 100644 index 64bc813..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnOrder.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.Collection; -import java.util.Comparator; - -import org.apache.kylin.metadata.datatype.DataType; - -/** - * @author yangli9 - */ -abstract public class RowKeyColumnOrder implements Comparator<String> { - - public static final NumberOrder NUMBER_ORDER = new NumberOrder(); - public static final StringOrder STRING_ORDER = new StringOrder(); - - public static RowKeyColumnOrder getInstance(DataType type) { - if (type.isNumberFamily()) - return NUMBER_ORDER; - else - return STRING_ORDER; - } - - public String max(Collection<String> values) { - String max = null; - for (String v : values) { - if (max == null || compare(max, v) < 0) - max = v; - } - return max; - } - - public String min(Collection<String> values) { - String min = null; - for (String v : values) { - if (min == null || compare(min, v) > 0) - min = v; - } - return min; - } - - public String min(String v1, String v2) { - if (v1 == null) - return v2; - else if (v2 == null) - return v1; - else - return compare(v1, v2) <= 0 ? v1 : v2; - } - - public String max(String v1, String v2) { - if (v1 == null) - return v2; - else if (v2 == null) - return v1; - else - return compare(v1, v2) >= 0 ? v1 : v2; - } - - @Override - public int compare(String o1, String o2) { - // consider null - if (o1 == o2) - return 0; - if (o1 == null) - return -1; - if (o2 == null) - return 1; - - return compareNonNull(o1, o2); - } - - abstract int compareNonNull(String o1, String o2); - - private static class StringOrder extends RowKeyColumnOrder { - @Override - public int compareNonNull(String o1, String o2) { - return o1.compareTo(o2); - } - } - - private static class NumberOrder extends RowKeyColumnOrder { - @Override - public int compareNonNull(String o1, String o2) { - double d1 = Double.parseDouble(o1); - double d2 = Double.parseDouble(o2); - return Double.compare(d1, d2); - } - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java deleted file mode 100644 index d599abc..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyDecoder.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.kylin.common.util.SplittedBytes; -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.cube.common.RowKeySplitter; -import org.apache.kylin.cube.cuboid.Cuboid; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.metadata.model.TblColRef; - -/** - * - * @author xjiang - * - */ -public class RowKeyDecoder { - - private final CubeDesc cubeDesc; - private final RowKeyColumnIO colIO; - private final RowKeySplitter rowKeySplitter; - - private Cuboid cuboid; - private List<String> names; - private List<String> values; - - public RowKeyDecoder(CubeSegment cubeSegment) { - this.cubeDesc = cubeSegment.getCubeDesc(); - this.rowKeySplitter = new RowKeySplitter(cubeSegment, 65, 255); - this.colIO = new RowKeyColumnIO(cubeSegment); - this.values = new ArrayList<String>(); - } - - public long decode(byte[] bytes) throws IOException { - this.values.clear(); - - long cuboidId = rowKeySplitter.split(bytes, bytes.length); - initCuboid(cuboidId); - - SplittedBytes[] splits = rowKeySplitter.getSplitBuffers(); - - int offset = 1; // skip cuboid id part - - for (int i = 0; i < this.cuboid.getColumns().size(); i++) { - TblColRef col = this.cuboid.getColumns().get(i); - collectValue(col, splits[offset].value, splits[offset].length); - offset++; - } - - return cuboidId; - } - - private void initCuboid(long cuboidID) { - if (this.cuboid != null && this.cuboid.getId() == cuboidID) { - return; - } - this.cuboid = Cuboid.findById(cubeDesc, cuboidID); - } - - private void collectValue(TblColRef col, byte[] valueBytes, int length) throws IOException { - String strValue = colIO.readColumnString(col, valueBytes, length); - values.add(strValue); - } - - public RowKeySplitter getRowKeySplitter() { - return rowKeySplitter; - } - - public void setCuboid(Cuboid cuboid) { - this.cuboid = cuboid; - this.names = null; - } - - public List<String> getNames(Map<TblColRef, String> aliasMap) { - if (names == null) { - names = buildNameList(aliasMap); - } - return names; - } - - private List<String> buildNameList(Map<TblColRef, String> aliasMap) { - List<TblColRef> columnList = getColumns(); - List<String> result = new ArrayList<String>(columnList.size()); - for (TblColRef col : columnList) - result.add(findName(col, aliasMap)); - return result; - } - - private String findName(TblColRef column, Map<TblColRef, String> aliasMap) { - String name = null; - if (aliasMap != null) { - name = aliasMap.get(column); - } - if (name == null) { - name = column.getName(); - } - return name; - } - - public List<TblColRef> getColumns() { - return cuboid.getColumns(); - } - - public List<String> getValues() { - return values; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - buf.append(cuboid.getId()); - for (Object value : values) { - buf.append(","); - buf.append(value); - } - return buf.toString(); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java deleted file mode 100644 index 90676ba..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.cube.CubeSegment; -import org.apache.kylin.cube.cuboid.Cuboid; -import org.apache.kylin.metadata.model.TblColRef; - -/** - * @author George Song (ysong1) - */ -public class RowKeyEncoder extends AbstractRowKeyEncoder { - - private int bytesLength; - protected int headerLength; - private RowKeyColumnIO colIO; - - protected RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) { - super(cuboid); - colIO = new RowKeyColumnIO(cubeSeg); - bytesLength = headerLength = RowConstants.ROWKEY_CUBOIDID_LEN; // header - for (TblColRef column : cuboid.getColumns()) { - bytesLength += colIO.getColumnLength(column); - } - } - - public RowKeyColumnIO getColumnIO() { - return colIO; - } - - public int getColumnOffset(TblColRef col) { - int offset = RowConstants.ROWKEY_CUBOIDID_LEN; - - for (TblColRef dimCol : cuboid.getColumns()) { - if (col.equals(dimCol)) - return offset; - offset += colIO.getColumnLength(dimCol); - } - - throw new IllegalArgumentException("Column " + col + " not found on cuboid " + cuboid); - } - - public int getColumnLength(TblColRef col) { - return colIO.getColumnLength(col); - } - - public int getRowKeyLength() { - return bytesLength; - } - - public int getHeaderLength() { - return headerLength; - } - - @Override - public byte[] encode(Map<TblColRef, String> valueMap) { - List<byte[]> valueList = new ArrayList<byte[]>(); - for (TblColRef bdCol : cuboid.getColumns()) { - String value = valueMap.get(bdCol); - valueList.add(valueStringToBytes(value)); - } - byte[][] values = valueList.toArray(RowConstants.BYTE_ARR_MARKER); - return encode(values); - } - - public byte[] valueStringToBytes(String value) { - if (value == null) - return null; - else - return Bytes.toBytes(value); - } - - @Override - public byte[] encode(byte[][] values) { - byte[] bytes = new byte[this.bytesLength]; - int offset = fillHeader(bytes, values); - - for (int i = 0; i < cuboid.getColumns().size(); i++) { - TblColRef column = cuboid.getColumns().get(i); - int colLength = colIO.getColumnLength(column); - byte[] value = values[i]; - if (value == null) { - fillColumnValue(column, colLength, null, 0, bytes, offset); - } else { - fillColumnValue(column, colLength, value, value.length, bytes, offset); - } - offset += colLength; - - } - return bytes; - } - - protected int fillHeader(byte[] bytes, byte[][] values) { - int offset = 0; - System.arraycopy(cuboid.getBytes(), 0, bytes, offset, RowConstants.ROWKEY_CUBOIDID_LEN); - offset += RowConstants.ROWKEY_CUBOIDID_LEN; - if (this.headerLength != offset) { - throw new IllegalStateException("Expected header length is " + headerLength + ". But the offset is " + offset); - } - return offset; - } - - protected void fillColumnValue(TblColRef column, int columnLen, byte[] value, int valueLen, byte[] outputValue, int outputValueOffset) { - // special null value case - if (value == null) { - byte[] valueBytes = defaultValue(columnLen); - System.arraycopy(valueBytes, 0, outputValue, outputValueOffset, columnLen); - return; - } - - colIO.writeColumn(column, value, valueLen, this.blankByte, outputValue, outputValueOffset); - } - - protected byte[] defaultValue(int length) { - byte[] values = new byte[length]; - Arrays.fill(values, this.blankByte); - return values; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java b/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java deleted file mode 100644 index 5fe4e2e..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/kv/RowValueDecoder.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.kv; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.kylin.cube.model.HBaseColumnDesc; -import org.apache.kylin.measure.MeasureCodec; -import org.apache.kylin.metadata.model.FunctionDesc; -import org.apache.kylin.metadata.model.MeasureDesc; - -/** - * - * @author xjiang - * - */ -public class RowValueDecoder implements Cloneable { - - private final HBaseColumnDesc hbaseColumn; - private final MeasureCodec codec; - private final BitSet projectionIndex; - private final MeasureDesc[] measures; - private final List<String> names; - private Object[] values; - - public RowValueDecoder(RowValueDecoder rowValueDecoder) { - this.hbaseColumn = rowValueDecoder.getHBaseColumn(); - this.projectionIndex = rowValueDecoder.getProjectionIndex(); - this.names = new ArrayList<String>(); - this.measures = hbaseColumn.getMeasures(); - for (MeasureDesc measure : measures) { - this.names.add(measure.getFunction().getRewriteFieldName()); - } - this.codec = new MeasureCodec(measures); - this.values = new Object[measures.length]; - } - - public RowValueDecoder(HBaseColumnDesc hbaseColumn) { - this.hbaseColumn = hbaseColumn; - this.projectionIndex = new BitSet(); - this.names = new ArrayList<String>(); - this.measures = hbaseColumn.getMeasures(); - for (MeasureDesc measure : measures) { - this.names.add(measure.getFunction().getRewriteFieldName()); - } - this.codec = new MeasureCodec(measures); - this.values = new Object[measures.length]; - } - - public void decode(byte[] bytes) { - codec.decode(ByteBuffer.wrap(bytes), values); - convertToJavaObjects(values, values); - } - - private void convertToJavaObjects(Object[] mapredObjs, Object[] results) { - for (int i = 0; i < mapredObjs.length; i++) { - Object o = mapredObjs[i]; - - // if (o instanceof LongWritable) - // o = ((LongWritable) o).get(); - // else if (o instanceof IntWritable) - // o = ((IntWritable) o).get(); - // else if (o instanceof DoubleWritable) - // o = ((DoubleWritable) o).get(); - // else if (o instanceof FloatWritable) - // o = ((FloatWritable) o).get(); - - if (o instanceof LongWritable) - o = ((LongWritable) o).get(); - else if (o instanceof DoubleWritable) - o = ((DoubleWritable) o).get(); - - results[i] = o; - } - } - - public void setIndex(int bitIndex) { - projectionIndex.set(bitIndex); - } - - public HBaseColumnDesc getHBaseColumn() { - return hbaseColumn; - } - - public BitSet getProjectionIndex() { - return projectionIndex; - } - - public Object[] getValues() { - return values; - } - - public List<String> getNames() { - return names; - } - - public MeasureDesc[] getMeasures() { - return measures; - } - - public boolean hasMemHungryMeasures() { - for (int i = projectionIndex.nextSetBit(0); i >= 0; i = projectionIndex.nextSetBit(i + 1)) { - FunctionDesc func = measures[i].getFunction(); - if (func.getMeasureType().isMemoryHungry()) - return true; - } - return false; - } - - public static boolean hasMemHungryMeasures(Collection<RowValueDecoder> rowValueDecoders) { - for (RowValueDecoder decoder : rowValueDecoders) { - if (decoder.hasMemHungryMeasures()) - return true; - } - return false; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/cube/src/main/java/org/apache/kylin/cube/model/CubeBuildTypeEnum.java ---------------------------------------------------------------------- diff --git a/cube/src/main/java/org/apache/kylin/cube/model/CubeBuildTypeEnum.java b/cube/src/main/java/org/apache/kylin/cube/model/CubeBuildTypeEnum.java deleted file mode 100644 index 53c7135..0000000 --- a/cube/src/main/java/org/apache/kylin/cube/model/CubeBuildTypeEnum.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.model; - -/** - * @author xduo - * - */ -public enum CubeBuildTypeEnum { - /** - * rebuild a segment or incremental build - */ - BUILD, - /** - * merge segments - */ - MERGE, - - /** - * refresh segments - */ - REFRESH -}
