Repository: kylin Updated Branches: refs/heads/master 3b2ebd243 -> 09da1e190
KYLIN-1486 Fix error of 'Too many digits for NumberDictionary' Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/09da1e19 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/09da1e19 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/09da1e19 Branch: refs/heads/master Commit: 09da1e19027cffcdd8d676b5caa5d444389c11e3 Parents: 3b2ebd2 Author: Yang Li <[email protected]> Authored: Sat Mar 26 18:41:33 2016 +0800 Committer: Yang Li <[email protected]> Committed: Sat Mar 26 18:41:33 2016 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/dict/NumberDictionary.java | 461 ++++++++++--------- .../apache/kylin/dict/NumberDictionary2.java | 48 ++ .../kylin/dict/NumberDictionaryBuilder.java | 96 ++-- .../apache/kylin/dict/NumberDictionaryTest.java | 3 +- 4 files changed, 333 insertions(+), 275 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/09da1e19/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java index 828a928..f15f28f 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java @@ -1,226 +1,235 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; - -/** - * @author yangli9 - * - */ -public class NumberDictionary<T> extends TrieDictionary<T> { - - public static final int MAX_DIGITS_BEFORE_DECIMAL_POINT = 19; - - // encode a number into an order preserving byte sequence - // for positives -- padding '0' - // for negatives -- '-' sign, padding '9', invert digits, and terminate by ';' - static class NumberBytesCodec { - - byte[] buf = new byte[MAX_DIGITS_BEFORE_DECIMAL_POINT * 2]; - int bufOffset = 0; - int bufLen = 0; - - void encodeNumber(byte[] value, int offset, int len) { - if (len == 0) { - bufOffset = 0; - bufLen = 0; - return; - } - - if (len > buf.length) { - throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Internal buffer is only " + buf.length + " bytes"); - } - - boolean negative = value[offset] == '-'; - - // terminate negative ';' - int start = buf.length - len; - int end = buf.length; - if (negative) { - start--; - end--; - buf[end] = ';'; - } - - // copy & find decimal point - int decimalPoint = end; - for (int i = start, j = offset; i < end; i++, j++) { - buf[i] = value[j]; - if (buf[i] == '.' && i < decimalPoint) { - decimalPoint = i; - } - } - // remove '-' sign - if (negative) { - start++; - } - - // prepend '0' - int nZeroPadding = MAX_DIGITS_BEFORE_DECIMAL_POINT - (decimalPoint - start); - if (nZeroPadding < 0 || nZeroPadding + 1 > start) - throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Expect " + MAX_DIGITS_BEFORE_DECIMAL_POINT + " digits before decimal point at max."); - for (int i = 0; i < nZeroPadding; i++) { - buf[--start] = '0'; - } - - // consider negative - if (negative) { - buf[--start] = '-'; - for (int i = start + 1; i < buf.length; i++) { - int c = buf[i]; - if (c >= '0' && c <= '9') { - buf[i] = (byte) ('9' - (c - '0')); - } - } - } else { - buf[--start] = '0'; - } - - bufOffset = start; - bufLen = buf.length - start; - } - - int decodeNumber(byte[] returnValue, int offset) { - if (bufLen == 0) { - return 0; - } - - int in = bufOffset; - int end = bufOffset + bufLen; - int out = offset; - - // sign - boolean negative = buf[in] == '-'; - if (negative) { - returnValue[out++] = '-'; - in++; - end--; - } - - // remove padding - byte padding = (byte) (negative ? '9' : '0'); - for (; in < end; in++) { - if (buf[in] != padding) - break; - } - - // all paddings before '.', special case for '0' - if (in == end || !(buf[in] >= '0' && buf[in] <= '9')) { - returnValue[out++] = '0'; - } - - // copy the rest - if (negative) { - for (; in < end; in++, out++) { - int c = buf[in]; - if (c >= '0' && c <= '9') { - c = '9' - (c - '0'); - } - returnValue[out] = (byte) c; - } - } else { - System.arraycopy(buf, in, returnValue, out, end - in); - out += end - in; - } - - return out - offset; - } - } - - static ThreadLocal<NumberBytesCodec> localCodec = new ThreadLocal<NumberBytesCodec>(); - - // ============================================================================ - - public NumberDictionary() { // default constructor for Writable interface - super(); - } - - public NumberDictionary(byte[] trieBytes) { - super(trieBytes); - } - - private NumberBytesCodec getCodec() { - NumberBytesCodec codec = localCodec.get(); - if (codec == null) { - codec = new NumberBytesCodec(); - localCodec.set(codec); - } - return codec; - } - - @Override - protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) { - NumberBytesCodec codec = getCodec(); - codec.encodeNumber(value, offset, len); - return super.getIdFromValueBytesImpl(codec.buf, codec.bufOffset, codec.bufLen, roundingFlag); - } - - @Override - protected boolean isNullObjectForm(T value) { - return value == null || value.equals(""); - } - - @Override - protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { - NumberBytesCodec codec = getCodec(); - codec.bufOffset = 0; - codec.bufLen = super.getValueBytesFromIdImpl(id, codec.buf, 0); - return codec.decodeNumber(returnValue, offset); - } - - @Override - public void enableIdToValueBytesCache() { - enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() { - NumberBytesCodec codec = getCodec(); - byte[] tmp = new byte[getSizeOfValue()]; - - @Override - public byte[] getBuffer() { - return codec.buf; - } - - @Override - public byte[] makeValueBytes(byte[] buf, int length) { - // the given buf is the codec buf, which we returned in getBuffer() - codec.bufOffset = 0; - codec.bufLen = length; - int numLen = codec.decodeNumber(tmp, 0); - - byte[] result = new byte[numLen]; - System.arraycopy(tmp, 0, result, 0, numLen); - return result; - } - }); - } - - public static void main(String[] args) throws Exception { - NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<String>(new StringBytesConverter()); - b.addValue("10"); - b.addValue("100"); - b.addValue("40"); - b.addValue("7"); - TrieDictionary<String> dict = b.build(0); - - dict.enableIdToValueBytesCache(); - for (int i = 0; i <= dict.getMaxId(); i++) { - System.out.println(Bytes.toString(dict.getValueBytesFromId(i))); - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dict; + +import org.apache.kylin.common.util.Bytes; + +/** + * @author yangli9 + * + */ +@SuppressWarnings("serial") +public class NumberDictionary<T> extends TrieDictionary<T> { + + public static final int MAX_DIGITS_BEFORE_DECIMAL_POINT_LEGACY = 16; + public static final int MAX_DIGITS_BEFORE_DECIMAL_POINT = 19; + + // encode a number into an order preserving byte sequence + // for positives -- padding '0' + // for negatives -- '-' sign, padding '9', invert digits, and terminate by ';' + static class NumberBytesCodec { + int maxDigitsBeforeDecimalPoint; + byte[] buf; + int bufOffset; + int bufLen; + + NumberBytesCodec(int maxDigitsBeforeDecimalPoint) { + this.maxDigitsBeforeDecimalPoint = maxDigitsBeforeDecimalPoint; + this.buf = new byte[maxDigitsBeforeDecimalPoint * 2]; + this.bufOffset = 0; + this.bufLen = 0; + } + + void encodeNumber(byte[] value, int offset, int len) { + if (len == 0) { + bufOffset = 0; + bufLen = 0; + return; + } + + if (len > buf.length) { + throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Internal buffer is only " + buf.length + " bytes"); + } + + boolean negative = value[offset] == '-'; + + // terminate negative ';' + int start = buf.length - len; + int end = buf.length; + if (negative) { + start--; + end--; + buf[end] = ';'; + } + + // copy & find decimal point + int decimalPoint = end; + for (int i = start, j = offset; i < end; i++, j++) { + buf[i] = value[j]; + if (buf[i] == '.' && i < decimalPoint) { + decimalPoint = i; + } + } + // remove '-' sign + if (negative) { + start++; + } + + // prepend '0' + int nZeroPadding = maxDigitsBeforeDecimalPoint - (decimalPoint - start); + if (nZeroPadding < 0 || nZeroPadding + 1 > start) + throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Expect " + maxDigitsBeforeDecimalPoint + " digits before decimal point at max."); + for (int i = 0; i < nZeroPadding; i++) { + buf[--start] = '0'; + } + + // consider negative + if (negative) { + buf[--start] = '-'; + for (int i = start + 1; i < buf.length; i++) { + int c = buf[i]; + if (c >= '0' && c <= '9') { + buf[i] = (byte) ('9' - (c - '0')); + } + } + } else { + buf[--start] = '0'; + } + + bufOffset = start; + bufLen = buf.length - start; + } + + int decodeNumber(byte[] returnValue, int offset) { + if (bufLen == 0) { + return 0; + } + + int in = bufOffset; + int end = bufOffset + bufLen; + int out = offset; + + // sign + boolean negative = buf[in] == '-'; + if (negative) { + returnValue[out++] = '-'; + in++; + end--; + } + + // remove padding + byte padding = (byte) (negative ? '9' : '0'); + for (; in < end; in++) { + if (buf[in] != padding) + break; + } + + // all paddings before '.', special case for '0' + if (in == end || !(buf[in] >= '0' && buf[in] <= '9')) { + returnValue[out++] = '0'; + } + + // copy the rest + if (negative) { + for (; in < end; in++, out++) { + int c = buf[in]; + if (c >= '0' && c <= '9') { + c = '9' - (c - '0'); + } + returnValue[out] = (byte) c; + } + } else { + System.arraycopy(buf, in, returnValue, out, end - in); + out += end - in; + } + + return out - offset; + } + } + + static ThreadLocal<NumberBytesCodec> localCodec = new ThreadLocal<NumberBytesCodec>(); + + // ============================================================================ + + public NumberDictionary() { // default constructor for Writable interface + super(); + } + + public NumberDictionary(byte[] trieBytes) { + super(trieBytes); + } + + protected NumberBytesCodec getCodec() { + NumberBytesCodec codec = localCodec.get(); + if (codec == null) { + codec = new NumberBytesCodec(MAX_DIGITS_BEFORE_DECIMAL_POINT_LEGACY); + localCodec.set(codec); + } + return codec; + } + + @Override + protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) { + NumberBytesCodec codec = getCodec(); + codec.encodeNumber(value, offset, len); + return super.getIdFromValueBytesImpl(codec.buf, codec.bufOffset, codec.bufLen, roundingFlag); + } + + @Override + protected boolean isNullObjectForm(T value) { + return value == null || value.equals(""); + } + + @Override + protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { + NumberBytesCodec codec = getCodec(); + codec.bufOffset = 0; + codec.bufLen = super.getValueBytesFromIdImpl(id, codec.buf, 0); + return codec.decodeNumber(returnValue, offset); + } + + @Override + public void enableIdToValueBytesCache() { + enableIdToValueBytesCache(new EnableIdToValueBytesCacheVisitor() { + NumberBytesCodec codec = getCodec(); + byte[] tmp = new byte[getSizeOfValue()]; + + @Override + public byte[] getBuffer() { + return codec.buf; + } + + @Override + public byte[] makeValueBytes(byte[] buf, int length) { + // the given buf is the codec buf, which we returned in getBuffer() + codec.bufOffset = 0; + codec.bufLen = length; + int numLen = codec.decodeNumber(tmp, 0); + + byte[] result = new byte[numLen]; + System.arraycopy(tmp, 0, result, 0, numLen); + return result; + } + }); + } + + public static void main(String[] args) throws Exception { + NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<String>(new StringBytesConverter()); + b.addValue("10"); + b.addValue("100"); + b.addValue("40"); + b.addValue("7"); + TrieDictionary<String> dict = b.build(0); + + dict.enableIdToValueBytesCache(); + for (int i = 0; i <= dict.getMaxId(); i++) { + System.out.println(Bytes.toString(dict.getValueBytesFromId(i))); + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/09da1e19/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary2.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary2.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary2.java new file mode 100644 index 0000000..b5ce1ff --- /dev/null +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary2.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dict; + +/** + * This class uses MAX_DIGITS_BEFORE_DECIMAL_POINT (=19) instead of legacy (=16). + */ +@SuppressWarnings("serial") +public class NumberDictionary2<T> extends NumberDictionary<T> { + + static ThreadLocal<NumberBytesCodec> localCodec = new ThreadLocal<NumberBytesCodec>(); + + // ============================================================================ + + public NumberDictionary2() { // default constructor for Writable interface + super(); + } + + public NumberDictionary2(byte[] trieBytes) { + super(trieBytes); + } + + protected NumberBytesCodec getCodec() { + NumberBytesCodec codec = localCodec.get(); + if (codec == null) { + codec = new NumberBytesCodec(MAX_DIGITS_BEFORE_DECIMAL_POINT); + localCodec.set(codec); + } + return codec; + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/09da1e19/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java index d8e5476..6d7d0db 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java @@ -1,48 +1,48 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; - -/** - * @author yangli9 - * - */ -public class NumberDictionaryBuilder<T> extends TrieDictionaryBuilder<T> { - - NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(); - - public NumberDictionaryBuilder(BytesConverter<T> bytesConverter) { - super(bytesConverter); - } - - @Override - public void addValue(byte[] value) { - codec.encodeNumber(value, 0, value.length); - byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen); - super.addValue(copy); - } - - public NumberDictionary<T> build(int baseId) { - byte[] trieBytes = buildTrieBytes(baseId); - NumberDictionary<T> r = new NumberDictionary<T>(trieBytes); - return r; - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dict; + +import org.apache.kylin.common.util.Bytes; + +/** + * @author yangli9 + * + */ +public class NumberDictionaryBuilder<T> extends TrieDictionaryBuilder<T> { + + NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT); + + public NumberDictionaryBuilder(BytesConverter<T> bytesConverter) { + super(bytesConverter); + } + + @Override + public void addValue(byte[] value) { + codec.encodeNumber(value, 0, value.length); + byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen); + super.addValue(copy); + } + + public NumberDictionary<T> build(int baseId) { + byte[] trieBytes = buildTrieBytes(baseId); + NumberDictionary2<T> r = new NumberDictionary2<T>(trieBytes); + return r; + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/09da1e19/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java index ed082e4..2511f48 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java @@ -42,7 +42,7 @@ import com.google.common.collect.Sets; */ public class NumberDictionaryTest { - NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(); + NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT); Random rand = new Random(); @Test @@ -58,6 +58,7 @@ public class NumberDictionaryTest { assertEquals(1, maxId); } + @SuppressWarnings("unchecked") @Test public void testEmptyInput() throws IOException{ String[] ints = new String[] { "", "0", "5", "100", "13" };
