Repository: kylin Updated Branches: refs/heads/master 2850586c8 -> 4bee8d9ef
KYLIN-1492 Make custom dimension encoding extension point Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4bee8d9e Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4bee8d9e Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4bee8d9e Branch: refs/heads/master Commit: 4bee8d9efe489ab874e14a23a78724f3f8e6d596 Parents: 2850586 Author: Li Yang <[email protected]> Authored: Tue Mar 22 10:26:27 2016 +0800 Committer: Li Yang <[email protected]> Committed: Tue Mar 22 10:26:40 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 6 +- .../apache/kylin/common/util/StringUtil.java | 8 + .../cube/gridtable/TrimmedCubeCodeSystem.java | 13 + .../org/apache/kylin/cube/kv/CubeDimEncMap.java | 7 +- .../apache/kylin/cube/model/RowKeyColDesc.java | 244 +++++++++---------- .../org/apache/kylin/cube/model/RowKeyDesc.java | 13 - .../kylin/dimension/DictionaryDimEnc.java | 26 +- .../kylin/dimension/DimensionEncoding.java | 4 +- .../apache/kylin/dimension/FixedLenDimEnc.java | 36 ++- 9 files changed, 204 insertions(+), 153 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 82f5434..1603d2a 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -368,13 +368,17 @@ public class KylinConfigBase implements Serializable { } public double getCubeAlgorithmAutoThreshold() { - return Double.parseDouble(getOptional("kylin.cube.algorithm.auto.threshold", "8")); + return Double.parseDouble(getOptional("kylin.cube.algorithm.autoThreshold", "8")); } public int getCubeAggrGroupMaxSize() { return Integer.parseInt(getOptional("kylin.cube.aggrgroup.max.size", "12")); } + public String[] getCubeDimensionCustomEncodingFactories() { + return getOptionalStringArray("kylin.cube.dimension.customEncodingFactories", new String[0]); + } + public int getDictionaryMaxCardinality() { return Integer.parseInt(getOptional("kylin.dictionary.max.cardinality", "5000000")); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java index 842f656..43a469f 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java @@ -116,4 +116,12 @@ public class StringUtil { return s1.compareTo(s2) > 0 ? s1 : s2; } + public static String[] subArray(String[] array, int start, int endExclusive) { + if (start < 0 || start > endExclusive || endExclusive > array.length) + throw new IllegalArgumentException(); + String[] result = new String[endExclusive - start]; + System.arraycopy(array, start, result, 0, endExclusive - start); + return result; + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java index e5169d2..b892520 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java @@ -20,6 +20,9 @@ package org.apache.kylin.cube.gridtable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Map; @@ -200,6 +203,16 @@ public class TrimmedCubeCodeSystem implements IGTCodeSystem { public DataTypeSerializer<Object> asDataTypeSerializer() { throw new UnsupportedOperationException(); } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + throw new UnsupportedOperationException(); + } } static class TrimmedDimensionSerializer extends DataTypeSerializer<Object> { http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java index e8c5266..75ea117 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/CubeDimEncMap.java @@ -26,6 +26,7 @@ import org.apache.kylin.cube.model.RowKeyColDesc; import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.dimension.DictionaryDimEnc; import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.dimension.DimensionEncodingFactory; import org.apache.kylin.dimension.FixedLenDimEnc; import org.apache.kylin.dimension.IDimensionEncodingMap; import org.apache.kylin.metadata.model.TblColRef; @@ -61,7 +62,7 @@ public class CubeDimEncMap implements IDimensionEncodingMap { if (result == null) { RowKeyColDesc colDesc = cubeDesc.getRowkey().getColDesc(col); if (colDesc.isUsingDictionary()) { - // dictionary encoding + // special dictionary encoding Dictionary<String> dict = getDictionary(col); if (dict == null) { logger.warn("No dictionary found for dict-encoding column " + col + ", segment " + seg); @@ -70,8 +71,8 @@ public class CubeDimEncMap implements IDimensionEncodingMap { result = new DictionaryDimEnc(dict); } } else { - // fixed length encoding - result = new FixedLenDimEnc(colDesc.getLength()); + // normal case + DimensionEncodingFactory.create(colDesc.getEncodingName(), colDesc.getEncodingArgs()); } encMap.put(col, result); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java index 7c26e8f..452d689 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java @@ -1,129 +1,115 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.cube.model; - -import org.apache.commons.lang.StringUtils; -import org.apache.kylin.cube.kv.RowConstants; -import org.apache.kylin.metadata.model.TblColRef; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; - -/** - * @author yangli9 - * - */ -@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) -public class RowKeyColDesc { - - public enum ColEncodingType { - DICT, FIXED_LEN - } - - public class ColEncoding { - public ColEncodingType type; - public Object param; - - public ColEncoding(ColEncodingType type, Object param) { - this.type = type; - this.param = param; - } - } - - @JsonProperty("column") - private String column; - @JsonProperty("encoding") - private String encoding; - - // computed - private ColEncoding colEncoding; - private int bitIndex; - private TblColRef colRef; - - public void init() { - - //dict or fix length? - Preconditions.checkState(StringUtils.isNotEmpty(this.encoding)); - if (this.encoding.equalsIgnoreCase("dict")) { - this.colEncoding = new ColEncoding(ColEncodingType.DICT, null); - } else if (this.encoding.startsWith("fixed_length")) { - int length = RowConstants.ROWKEY_COL_DEFAULT_LENGTH; - if (this.encoding.indexOf(":") > 0) { - length = Integer.parseInt(this.encoding.substring(this.encoding.indexOf(":") + 1)); - } - this.colEncoding = new ColEncoding(ColEncodingType.FIXED_LEN, length); - } else { - throw new IllegalArgumentException("Not supported row key col encoding:" + this.encoding); - } - } - - public String getEncoding() { - return encoding; - } - - public void setEncoding(String encoding) { - this.encoding = encoding; - } - - public String getColumn() { - return column; - } - - public void setColumn(String column) { - this.column = column; - } - - public boolean isUsingDictionary() { - return this.colEncoding.type == ColEncodingType.DICT; - - } - - public int getLength() { - if (this.colEncoding.type == ColEncodingType.FIXED_LEN) { - return (Integer) this.colEncoding.param; - } else { - return 0; - } - } - - public int getBitIndex() { - return bitIndex; - } - - void setBitIndex(int index) { - this.bitIndex = index; - } - - public TblColRef getColRef() { - return colRef; - } - - void setColRef(TblColRef colRef) { - this.colRef = colRef; - } - - @Override - public String toString() { - return Objects.toStringHelper(this).add("column", column).add("encoding", encoding).toString(); - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.model; + +import org.apache.commons.lang.StringUtils; +import org.apache.kylin.common.util.StringUtil; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncodingFactory; +import org.apache.kylin.metadata.model.TblColRef; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; + +/** + * @author yangli9 + * + */ +@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) +public class RowKeyColDesc { + + @JsonProperty("column") + private String column; + @JsonProperty("encoding") + private String encoding; + + // computed + private String encodingName; + private String[] encodingArgs; + private int bitIndex; + private TblColRef colRef; + + public void init() { + Preconditions.checkState(StringUtils.isNotEmpty(this.encoding)); + + String[] parts = this.encoding.split("\\s*[(),:]\\s*"); + if (parts == null || parts.length == 0 || parts[0].isEmpty()) + throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); + + this.encodingName = parts[0]; + this.encodingArgs = parts[parts.length - 1].isEmpty() // + ? StringUtil.subArray(parts, 1, parts.length - 1) : StringUtil.subArray(parts, 1, parts.length); + + if (!DimensionEncodingFactory.isVaildEncoding(this.encodingName)) + throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); + } + + public String getEncoding() { + return encoding; + } + + public void setEncoding(String encoding) { + this.encoding = encoding; + } + + public String getColumn() { + return column; + } + + public void setColumn(String column) { + this.column = column; + } + + public String getEncodingName() { + return encodingName; + } + + public String[] getEncodingArgs() { + return encodingArgs; + } + + public boolean isUsingDictionary() { + return DictionaryDimEnc.ENCODING_NAME.equals(encodingName); + } + + public int getBitIndex() { + return bitIndex; + } + + void setBitIndex(int index) { + this.bitIndex = index; + } + + public TblColRef getColRef() { + return colRef; + } + + void setColRef(TblColRef colRef) { + this.colRef = colRef; + } + + @Override + public String toString() { + return Objects.toStringHelper(this).add("column", column).add("encoding", encoding).toString(); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java index 3f33505..6052c08 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java @@ -46,19 +46,6 @@ public class RowKeyDesc { return rowkeyColumns; } - // public int getNCuboidBuildLevels() { - // // N aggregation columns requires N levels of cuboid build - // // - N columns requires N-1 levels build - // // - zero tail cuboid needs one more additional level - // Set<String> aggDims = new HashSet<String>(); - // for (String[] aggrGroup : aggregationGroups) { - // for (String dim : aggrGroup) { - // aggDims.add(dim); - // } - // } - // return aggDims.size(); - // } - public void setCubeDesc(CubeDesc cubeRef) { this.cubeDesc = cubeRef; } http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java index bfb0e28..7f80a7f 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java @@ -18,6 +18,9 @@ package org.apache.kylin.dimension; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; import java.nio.ByteBuffer; import org.apache.kylin.common.util.Bytes; @@ -30,6 +33,10 @@ public class DictionaryDimEnc extends DimensionEncoding { private static final Logger logger = LoggerFactory.getLogger(DictionaryDimEnc.class); + public static final String ENCODING_NAME = "dict"; + + // ============================================================================ + // could use a lazy loading trick here, to prevent loading all dictionaries of a segment at once private final Dictionary<String> dict; private final int fixedLen; @@ -52,21 +59,21 @@ public class DictionaryDimEnc extends DimensionEncoding { public int getRoundingFlag() { return roundingFlag; } - + public DictionaryDimEnc copy(int roundingFlag) { if (this.roundingFlag == roundingFlag) return this; else return new DictionaryDimEnc(dict, roundingFlag, defaultByte); } - + public DictionaryDimEnc copy(int roundingFlag, byte defaultByte) { if (this.roundingFlag == roundingFlag && this.defaultByte == defaultByte) return this; else return new DictionaryDimEnc(dict, roundingFlag, defaultByte); } - + public Dictionary<String> getDictionary() { return dict; } @@ -133,5 +140,16 @@ public class DictionaryDimEnc extends DimensionEncoding { public int getStorageBytesEstimate() { return dict.getSizeOfId(); } - }; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + throw new UnsupportedOperationException(); + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java index 74c0555..5c32114 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java @@ -18,6 +18,8 @@ package org.apache.kylin.dimension; +import java.io.Externalizable; + import org.apache.kylin.metadata.datatype.DataTypeSerializer; /** @@ -29,7 +31,7 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer; * hard to enumerate all values of a encoding, thus TupleFilterDictionaryTranslater * cannot work on DimensionEncoding. */ -public abstract class DimensionEncoding { +public abstract class DimensionEncoding implements Externalizable { // it's convention that all 0xff means NULL public static final byte NULL = (byte) 0xff; http://git-wip-us.apache.org/repos/asf/kylin/blob/4bee8d9e/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java index 40e9f6d..195bdb9 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/FixedLenDimEnc.java @@ -18,6 +18,9 @@ package org.apache.kylin.dimension; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; import java.nio.ByteBuffer; import java.util.Arrays; @@ -33,7 +36,26 @@ public class FixedLenDimEnc extends DimensionEncoding { // row key fixed length place holder public static final byte ROWKEY_PLACE_HOLDER_BYTE = 9; - private final int fixedLen; + public static final String ENCODING_NAME = "fixed_length"; + + public static DimensionEncodingFactory getFactory() { + return new DimensionEncodingFactory() { + + @Override + public String getSupportedEncodingName() { + return ENCODING_NAME; + } + + @Override + public DimensionEncoding createDimensionEncoding(String encodingName, String[] args) { + return new FixedLenDimEnc(Integer.parseInt(args[0])); + } + }; + }; + + // ============================================================================ + + private int fixedLen; transient private int avoidVerbose = 0; @@ -131,6 +153,16 @@ public class FixedLenDimEnc extends DimensionEncoding { public Object valueOf(String str) { return str; } - }; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeShort(fixedLen); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + fixedLen = in.readShort(); + } }
