Repository: kylin Updated Branches: refs/heads/2.x-staging 50b3f429d -> 294476854
KYLIN-993 Support functions in where clause Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4b54ca87 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4b54ca87 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4b54ca87 Branch: refs/heads/2.x-staging Commit: 4b54ca87cb7df055879a3df938b43ddaf3142eb6 Parents: 50b3f42 Author: lidongsjtu <[email protected]> Authored: Thu Jan 7 16:37:08 2016 +0800 Committer: lidongsjtu <[email protected]> Committed: Fri Jan 8 11:12:47 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/cube/kv/RowKeyColumnIO.java | 4 + .../dict/TupleFilterDictionaryTranslater.java | 165 ++++++++++ .../metadata/filter/CompareTupleFilter.java | 14 +- .../metadata/filter/FunctionTupleFilter.java | 181 +++++++++++ .../metadata/filter/ITupleFilterTranslator.java | 26 ++ .../kylin/metadata/filter/TupleFilter.java | 2 +- .../metadata/filter/TupleFilterSerializer.java | 7 +- .../metadata/filter/function/BuiltInMethod.java | 120 +++++++ .../kylin/metadata/filter/function/Like.java | 312 +++++++++++++++++++ .../kylin/query/relnode/OLAPFilterRel.java | 10 +- query/src/test/resources/query/sql/query94.sql | 22 ++ .../common/coprocessor/FilterDecorator.java | 5 + .../storage/hbase/cube/v1/CubeStorageQuery.java | 3 + .../hbase/cube/v2/CubeSegmentScanner.java | 6 + .../storage/hbase/cube/v2/CubeStorageQuery.java | 3 + 15 files changed, 874 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java index ba15b48..1d57cf9 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java @@ -44,6 +44,10 @@ public class RowKeyColumnIO { this.IDictionaryAwareness = IDictionaryAwareness; } + public IDictionaryAware getIDictionaryAware() { + return IDictionaryAwareness; + } + public int getColumnLength(TblColRef col) { return IDictionaryAwareness.getColumnLength(col); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java new file mode 100644 index 0000000..1826cd6 --- /dev/null +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.dict; + +import com.google.common.primitives.Primitives; +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.metadata.filter.ColumnTupleFilter; +import org.apache.kylin.metadata.filter.CompareTupleFilter; +import org.apache.kylin.metadata.filter.ConstantTupleFilter; +import org.apache.kylin.metadata.filter.FunctionTupleFilter; +import org.apache.kylin.metadata.filter.ITupleFilterTranslator; +import org.apache.kylin.metadata.filter.LogicalTupleFilter; +import org.apache.kylin.metadata.filter.TupleFilter; +import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum; +import org.apache.kylin.metadata.model.TblColRef; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ListIterator; + +/** + * Created by dongli on 1/7/16. + */ +public class TupleFilterDictionaryTranslater implements ITupleFilterTranslator { + public static final Logger logger = LoggerFactory.getLogger(TupleFilterDictionaryTranslater.class); + + private IDictionaryAware dictionaryAware; + + public TupleFilterDictionaryTranslater(IDictionaryAware dictionaryAware) { + this.dictionaryAware = dictionaryAware; + } + + @Override + public TupleFilter translate(TupleFilter tupleFilter) { + TupleFilter translated = tupleFilter; + if (tupleFilter instanceof CompareTupleFilter) { + logger.info("Translation to IN clause: " + tupleFilter); + translated = translateCompareTupleFilter((CompareTupleFilter) tupleFilter); + logger.info(translated == null ? "Failed, will use Calcite to handle computed comparison." : "Succeed: " + translated); + } else if (tupleFilter instanceof FunctionTupleFilter) { + logger.info("Translation to IN clause: " + tupleFilter); + translated = translateFunctionTupleFilter((FunctionTupleFilter) tupleFilter); + logger.info(translated == null ? "Failed, will use Calcite to handle computed column." : "Succeed: " + translated); + } else if (tupleFilter instanceof LogicalTupleFilter) { + logger.info("Translation to IN clause: " + tupleFilter); + ListIterator<TupleFilter> childIterator = (ListIterator<TupleFilter>) tupleFilter.getChildren().listIterator(); + while (childIterator.hasNext()) { + TupleFilter tempTranslated = translate(childIterator.next()); + if (tempTranslated != null) + childIterator.set(tempTranslated); + } + logger.info(translated == null ? "Failed, will use Calcite to handle computed column." : "Succeed: " + translated); + } + return translated == null ? tupleFilter : translated; + } + + private TupleFilter translateFunctionTupleFilter(FunctionTupleFilter functionTupleFilter) { + if (!functionTupleFilter.isValid()) + return null; + + TblColRef columnRef = functionTupleFilter.getColumn(); + Dictionary<?> dict = dictionaryAware.getDictionary(columnRef); + if (dict == null) + return null; + + CompareTupleFilter translated = new CompareTupleFilter(FilterOperatorEnum.IN); + translated.addChild(new ColumnTupleFilter(columnRef)); + + try { + for (int i = dict.getMinId(); i <= dict.getMaxId(); i++) { + Object dictVal = dict.getValueFromId(i); + if ((Boolean) functionTupleFilter.invokeFunction(dictVal)) { + translated.addChild(new ConstantTupleFilter(dictVal)); + } + } + } catch (Exception e) { + logger.debug(e.getMessage()); + return null; + } + return translated; + } + + @SuppressWarnings("unchecked") + private TupleFilter translateCompareTupleFilter(CompareTupleFilter compTupleFilter) { + if (compTupleFilter.getFunction() == null) + return null; + + FunctionTupleFilter functionTupleFilter = compTupleFilter.getFunction(); + if (!functionTupleFilter.isValid()) + return null; + + TblColRef columnRef = functionTupleFilter.getColumn(); + Dictionary<?> dict = dictionaryAware.getDictionary(columnRef); + if (dict == null) + return null; + + CompareTupleFilter translated = new CompareTupleFilter(FilterOperatorEnum.IN); + translated.addChild(new ColumnTupleFilter(columnRef)); + + try { + for (int i = dict.getMinId(); i <= dict.getMaxId(); i++) { + Object dictVal = dict.getValueFromId(i); + Object computedVal = functionTupleFilter.invokeFunction(dictVal); + Class clazz = Primitives.wrap(computedVal.getClass()); + Object targetVal = compTupleFilter.getFirstValue(); + if (Primitives.isWrapperType(clazz)) + targetVal = clazz.cast(clazz.getDeclaredMethod("valueOf", String.class).invoke(null, compTupleFilter.getFirstValue())); + + int comp = ((Comparable) computedVal).compareTo(targetVal); + boolean compResult = false; + switch (compTupleFilter.getOperator()) { + case EQ: + compResult = comp == 0; + break; + case NEQ: + compResult = comp != 0; + break; + case LT: + compResult = comp < 0; + break; + case LTE: + compResult = comp <= 0; + break; + case GT: + compResult = comp > 0; + break; + case GTE: + compResult = comp >= 0; + break; + case IN: + compResult = compTupleFilter.getValues().contains(computedVal.toString()); + break; + case NOTIN: + compResult = !compTupleFilter.getValues().contains(computedVal.toString()); + break; + default: + break; + } + if (compResult) { + translated.addChild(new ConstantTupleFilter(dictVal)); + } + } + } catch (Exception e) { + logger.debug(e.getMessage()); + return null; + } + return translated; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java index f3f7f34..ef16de2 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/CompareTupleFilter.java @@ -34,7 +34,11 @@ import org.apache.kylin.metadata.tuple.IEvaluatableTuple; */ public class CompareTupleFilter extends TupleFilter { + // operand 1 is either a column or a function private TblColRef column; + private FunctionTupleFilter function; + + // operand 2 is constants private Set<Object> conditionValues; private Object firstCondValue; private Map<String, Object> dynamicVariables; @@ -84,6 +88,8 @@ public class CompareTupleFilter extends TupleFilter { if (!this.dynamicVariables.containsKey(dynamicFilter.getVariableName())) { this.dynamicVariables.put(dynamicFilter.getVariableName(), null); } + } else if (child instanceof FunctionTupleFilter) { + this.function = (FunctionTupleFilter) child; } } @@ -104,6 +110,10 @@ public class CompareTupleFilter extends TupleFilter { return column; } + public FunctionTupleFilter getFunction() { + return function; + } + public Map<String, Object> getVariables() { return dynamicVariables; } @@ -128,7 +138,7 @@ public class CompareTupleFilter extends TupleFilter { @Override public String toString() { - return column + " " + operator + " " + conditionValues; + return (function == null ? column : function) + " " + operator + " " + conditionValues; } // TODO requires generalize, currently only evaluates COLUMN {op} CONST @@ -197,7 +207,7 @@ public class CompareTupleFilter extends TupleFilter { @Override public boolean isEvaluable() { - return column != null && !conditionValues.isEmpty(); + return ((function != null && function.isEvaluable()) || column != null) && !conditionValues.isEmpty(); } @SuppressWarnings({ "unchecked", "rawtypes" }) http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/FunctionTupleFilter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/FunctionTupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/FunctionTupleFilter.java new file mode 100644 index 0000000..15fcb72 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/FunctionTupleFilter.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.metadata.filter; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.metadata.filter.function.BuiltInMethod; +import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.metadata.tuple.IEvaluatableTuple; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; +import com.google.common.primitives.Primitives; + +/** + * Created by dongli on 11/11/15. + */ +public class FunctionTupleFilter extends TupleFilter { + public static final Logger logger = LoggerFactory.getLogger(FunctionTupleFilter.class); + + private String name; + // FIXME Only supports single parameter functions currently + private TupleFilter columnContainerFilter; + private int colPosition; + private Method method; + private List<Serializable> methodParams; + private boolean isValid = false; + + public FunctionTupleFilter(String name) { + super(Lists.<TupleFilter> newArrayList(), FilterOperatorEnum.FUNCTION); + this.methodParams = Lists.newArrayList(); + + if (name != null) { + this.name = name.toUpperCase(); + initMethod(); + } + } + + public String getName() { + return name; + } + + public TblColRef getColumn() { + if (columnContainerFilter == null) + return null; + + if (columnContainerFilter instanceof ColumnTupleFilter) + return ((ColumnTupleFilter) columnContainerFilter).getColumn(); + else if (columnContainerFilter instanceof FunctionTupleFilter) + return ((FunctionTupleFilter) columnContainerFilter).getColumn(); + + throw new UnsupportedOperationException("Wrong type TupleFilter in FunctionTupleFilter."); + } + + public Object invokeFunction(Object input) throws InvocationTargetException, IllegalAccessException { + if (columnContainerFilter instanceof ColumnTupleFilter) + methodParams.set(colPosition, (Serializable) input); + else if (columnContainerFilter instanceof FunctionTupleFilter) + methodParams.set(colPosition, (Serializable) ((FunctionTupleFilter) columnContainerFilter).invokeFunction((Serializable) input)); + return method.invoke(null, (Object[]) (methodParams.toArray())); + } + + public boolean isValid() { + return isValid && method != null && methodParams.size() == children.size(); + } + + @Override + @SuppressWarnings("unchecked") + public void addChild(TupleFilter child) { + if (child instanceof ColumnTupleFilter || child instanceof FunctionTupleFilter) { + columnContainerFilter = child; + colPosition = methodParams.size(); + methodParams.add(null); + } else if (child instanceof ConstantTupleFilter) { + Serializable constVal = (Serializable) child.getValues().iterator().next(); + try { + Class<?> clazz = Primitives.wrap(method.getParameterTypes()[methodParams.size()]); + if (!Primitives.isWrapperType(clazz)) + methodParams.add(constVal); + else + methodParams.add((Serializable) clazz.cast(clazz.getDeclaredMethod("valueOf", String.class).invoke(null, constVal))); + } catch (Exception e) { + logger.warn(e.getMessage()); + isValid = false; + } + } + super.addChild(child); + } + + @Override + public boolean isEvaluable() { + return false; + } + + @Override + public boolean evaluate(IEvaluatableTuple tuple, IFilterCodeSystem<?> cs) { + throw new UnsupportedOperationException("Function filter cannot be evaluated immediately"); + } + + @Override + public Collection<String> getValues() { + throw new UnsupportedOperationException("Function filter cannot be evaluated immediately"); + } + + @Override + byte[] serialize(IFilterCodeSystem<?> cs) { + ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE); + BytesUtil.writeUTFString(name, buffer); + BytesUtil.writeVInt(colPosition, buffer); + BytesUtil.writeVInt(isValid ? 1 : 0, buffer); + BytesUtil.writeByteArray(TupleFilterSerializer.serialize(columnContainerFilter, cs), buffer); + + byte[] result = new byte[buffer.position()]; + System.arraycopy(buffer.array(), 0, result, 0, buffer.position()); + return result; + } + + @Override + void deserialize(byte[] bytes, IFilterCodeSystem<?> cs) { + ByteBuffer buffer = ByteBuffer.wrap(bytes); + + this.name = BytesUtil.readUTFString(buffer); + this.initMethod(); + + this.colPosition = BytesUtil.readVInt(buffer); + this.isValid = BytesUtil.readVInt(buffer) == 1; + + byte[] columnFilter = BytesUtil.readByteArray(buffer); + if (columnFilter != null) { + this.columnContainerFilter = TupleFilterSerializer.deserialize(columnFilter, cs); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(name); + sb.append("("); + for (int i = 0; i < methodParams.size(); i++) { + if (colPosition == i) { + sb.append(columnContainerFilter); + } else { + sb.append(methodParams.get(i)); + } + if (i < methodParams.size() - 1) + sb.append(","); + } + sb.append(")"); + return sb.toString(); + } + + private void initMethod() { + if (BuiltInMethod.MAP.containsKey(name)) { + this.method = BuiltInMethod.MAP.get(name).method; + isValid = true; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/ITupleFilterTranslator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/ITupleFilterTranslator.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/ITupleFilterTranslator.java new file mode 100644 index 0000000..aed284c --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/ITupleFilterTranslator.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.metadata.filter; + +/** + * Created by dongli on 1/7/16. + */ +public interface ITupleFilterTranslator { + TupleFilter translate(TupleFilter tupleFilter); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilter.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilter.java index e456ac1..944ddd0 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilter.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilter.java @@ -37,7 +37,7 @@ import com.google.common.collect.Maps; public abstract class TupleFilter { public enum FilterOperatorEnum { - EQ(1), NEQ(2), GT(3), LT(4), GTE(5), LTE(6), ISNULL(7), ISNOTNULL(8), IN(9), NOTIN(10), AND(20), OR(21), NOT(22), COLUMN(30), CONSTANT(31), DYNAMIC(32), EXTRACT(33), CASE(34); + EQ(1), NEQ(2), GT(3), LT(4), GTE(5), LTE(6), ISNULL(7), ISNOTNULL(8), IN(9), NOTIN(10), AND(20), OR(21), NOT(22), COLUMN(30), CONSTANT(31), DYNAMIC(32), EXTRACT(33), CASE(34), FUNCTION(35); private final int value; http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java index 7404136..a394a51 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/TupleFilterSerializer.java @@ -107,8 +107,8 @@ public class TupleFilterSerializer { // deserialize filter TupleFilter filter = createTupleFilter(opVal); - byte[] filetrBytes = BytesUtil.readByteArray(buffer); - filter.deserialize(filetrBytes, cs); + byte[] filterBytes = BytesUtil.readByteArray(buffer); + filter.deserialize(filterBytes, cs); if (rootFilter == null) { // push root to stack @@ -171,6 +171,9 @@ public class TupleFilterSerializer { case DYNAMIC: filter = new DynamicTupleFilter(null); break; + case FUNCTION: + filter = new FunctionTupleFilter(null); + break; default: throw new IllegalStateException("Error FilterOperatorEnum: " + op.getValue()); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java new file mode 100644 index 0000000..b927d8d --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.metadata.filter.function; + +import com.google.common.collect.ImmutableMap; +import org.apache.commons.lang3.reflect.MethodUtils; + +import java.lang.reflect.Method; +import java.util.regex.Pattern; + +/** + * Created by dongli on 11/13/15. + */ +public enum BuiltInMethod { + UPPER(BuiltInMethod.class, "upper", String.class), + LOWER(BuiltInMethod.class, "lower", String.class), + SUBSTRING(BuiltInMethod.class, "substring", String.class, int.class, int.class), + CHAR_LENGTH(BuiltInMethod.class, "charLength", String.class), + LIKE(BuiltInMethod.class, "like", String.class, String.class), + INITCAP(BuiltInMethod.class, "initcap", String.class); + public final Method method; + + public static final ImmutableMap<String, BuiltInMethod> MAP; + + static { + final ImmutableMap.Builder<String, BuiltInMethod> builder = + ImmutableMap.builder(); + for (BuiltInMethod value : BuiltInMethod.values()) { + if (value.method != null) { + builder.put(value.name(), value); + } + } + MAP = builder.build(); + } + + BuiltInMethod(Class<?> clazz, String methodName, Class<?>... argumentTypes) { + this.method = MethodUtils.getMatchingAccessibleMethod(clazz, methodName, argumentTypes); + } + + /** SQL {@code LIKE} function. */ + public static boolean like(String s, String pattern) { + final String regex = Like.sqlToRegexLike(pattern, null); + return Pattern.matches(regex, s); + } + + /** SQL INITCAP(string) function. */ + public static String initcap(String s) { + // Assumes Alpha as [A-Za-z0-9] + // white space is treated as everything else. + final int len = s.length(); + boolean start = true; + final StringBuilder newS = new StringBuilder(); + + for (int i = 0; i < len; i++) { + char curCh = s.charAt(i); + final int c = (int) curCh; + if (start) { // curCh is whitespace or first character of word. + if (c > 47 && c < 58) { // 0-9 + start = false; + } else if (c > 64 && c < 91) { // A-Z + start = false; + } else if (c > 96 && c < 123) { // a-z + start = false; + curCh = (char) (c - 32); // Uppercase this character + } + // else {} whitespace + } else { // Inside of a word or white space after end of word. + if (c > 47 && c < 58) { // 0-9 + // noop + } else if (c > 64 && c < 91) { // A-Z + curCh = (char) (c + 32); // Lowercase this character + } else if (c > 96 && c < 123) { // a-z + // noop + } else { // whitespace + start = true; + } + } + newS.append(curCh); + } // for each character in s + return newS.toString(); + } + + /** SQL CHARACTER_LENGTH(string) function. */ + public static int charLength(String s) { + return s.length(); + } + + /** SQL SUBSTRING(string FROM ... FOR ...) function. */ + public static String substring(String s, int from, int for_) { + return s.substring(from - 1, Math.min(from - 1 + for_, s.length())); + } + + /** SQL UPPER(string) function. */ + public static String upper(String s) { + return s.toUpperCase(); + } + + /** SQL LOWER(string) function. */ + public static String lower(String s) { + return s.toLowerCase(); + } + + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/Like.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/Like.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/Like.java new file mode 100644 index 0000000..5908d3d --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/Like.java @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.metadata.filter.function; + +/** + * From Calcite + * + * Utilities for converting SQL {@code LIKE} and {@code SIMILAR} operators + * to regular expressions. + */ +public class Like { + private static final String JAVA_REGEX_SPECIALS = "[]()|^-+*?{}$\\"; + private static final String SQL_SIMILAR_SPECIALS = "[]()|^-+*_%?{}"; + private static final String [] REG_CHAR_CLASSES = { + "[:ALPHA:]", "\\p{Alpha}", + "[:alpha:]", "\\p{Alpha}", + "[:UPPER:]", "\\p{Upper}", + "[:upper:]", "\\p{Upper}", + "[:LOWER:]", "\\p{Lower}", + "[:lower:]", "\\p{Lower}", + "[:DIGIT:]", "\\d", + "[:digit:]", "\\d", + "[:SPACE:]", " ", + "[:space:]", " ", + "[:WHITESPACE:]", "\\s", + "[:whitespace:]", "\\s", + "[:ALNUM:]", "\\p{Alnum}", + "[:alnum:]", "\\p{Alnum}" + }; + + private Like() { + } + + /** + * Translates a SQL LIKE pattern to Java regex pattern, with optional + * escape string. + */ + static String sqlToRegexLike( + String sqlPattern, + CharSequence escapeStr) { + final char escapeChar; + if (escapeStr != null) { + if (escapeStr.length() != 1) { + throw invalidEscapeCharacter(escapeStr.toString()); + } + escapeChar = escapeStr.charAt(0); + } else { + escapeChar = 0; + } + return sqlToRegexLike(sqlPattern, escapeChar); + } + + /** + * Translates a SQL LIKE pattern to Java regex pattern. + */ + static String sqlToRegexLike( + String sqlPattern, + char escapeChar) { + int i; + final int len = sqlPattern.length(); + final StringBuilder javaPattern = new StringBuilder(len + len); + for (i = 0; i < len; i++) { + char c = sqlPattern.charAt(i); + if (JAVA_REGEX_SPECIALS.indexOf(c) >= 0) { + javaPattern.append('\\'); + } + if (c == escapeChar) { + if (i == (sqlPattern.length() - 1)) { + throw invalidEscapeSequence(sqlPattern, i); + } + char nextChar = sqlPattern.charAt(i + 1); + if ((nextChar == '_') + || (nextChar == '%') + || (nextChar == escapeChar)) { + javaPattern.append(nextChar); + i++; + } else { + throw invalidEscapeSequence(sqlPattern, i); + } + } else if (c == '_') { + javaPattern.append('.'); + } else if (c == '%') { + javaPattern.append("."); + javaPattern.append('*'); + } else { + javaPattern.append(c); + } + } + return javaPattern.toString(); + } + + private static RuntimeException invalidEscapeCharacter(String s) { + return new RuntimeException( + "Invalid escape character '" + s + "'"); + } + + private static RuntimeException invalidEscapeSequence(String s, int i) { + return new RuntimeException( + "Invalid escape sequence '" + s + "', " + i); + } + + private static void similarEscapeRuleChecking( + String sqlPattern, + char escapeChar) { + if (escapeChar == 0) { + return; + } + if (SQL_SIMILAR_SPECIALS.indexOf(escapeChar) >= 0) { + // The the escape character is a special character + // SQL 2003 Part 2 Section 8.6 General Rule 3.b + for (int i = 0; i < sqlPattern.length(); i++) { + if (sqlPattern.charAt(i) == escapeChar) { + if (i == (sqlPattern.length() - 1)) { + throw invalidEscapeSequence(sqlPattern, i); + } + char c = sqlPattern.charAt(i + 1); + if ((SQL_SIMILAR_SPECIALS.indexOf(c) < 0) + && (c != escapeChar)) { + throw invalidEscapeSequence(sqlPattern, i); + } + } + } + } + + // SQL 2003 Part 2 Section 8.6 General Rule 3.c + if (escapeChar == ':') { + int position; + position = sqlPattern.indexOf("[:"); + if (position >= 0) { + position = sqlPattern.indexOf(":]"); + } + if (position < 0) { + throw invalidEscapeSequence(sqlPattern, position); + } + } + } + + private static RuntimeException invalidRegularExpression( + String pattern, int i) { + return new RuntimeException( + "Invalid regular expression '" + pattern + "'"); + } + + private static int sqlSimilarRewriteCharEnumeration( + String sqlPattern, + StringBuilder javaPattern, + int pos, + char escapeChar) { + int i; + for (i = pos + 1; i < sqlPattern.length(); i++) { + char c = sqlPattern.charAt(i); + if (c == ']') { + return i - 1; + } else if (c == escapeChar) { + i++; + char nextChar = sqlPattern.charAt(i); + if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) { + if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) { + javaPattern.append('\\'); + } + javaPattern.append(nextChar); + } else if (escapeChar == nextChar) { + javaPattern.append(nextChar); + } else { + throw invalidRegularExpression(sqlPattern, i); + } + } else if (c == '-') { + javaPattern.append('-'); + } else if (c == '^') { + javaPattern.append('^'); + } else if (sqlPattern.startsWith("[:", i)) { + int numOfRegCharSets = REG_CHAR_CLASSES.length / 2; + boolean found = false; + for (int j = 0; j < numOfRegCharSets; j++) { + if (sqlPattern.startsWith(REG_CHAR_CLASSES[j + j], i)) { + javaPattern.append(REG_CHAR_CLASSES[j + j + 1]); + + i += REG_CHAR_CLASSES[j + j].length() - 1; + found = true; + break; + } + } + if (!found) { + throw invalidRegularExpression(sqlPattern, i); + } + } else if (SQL_SIMILAR_SPECIALS.indexOf(c) >= 0) { + throw invalidRegularExpression(sqlPattern, i); + } else { + javaPattern.append(c); + } + } + return i - 1; + } + + /** + * Translates a SQL SIMILAR pattern to Java regex pattern, with optional + * escape string. + */ + static String sqlToRegexSimilar( + String sqlPattern, + CharSequence escapeStr) { + final char escapeChar; + if (escapeStr != null) { + if (escapeStr.length() != 1) { + throw invalidEscapeCharacter(escapeStr.toString()); + } + escapeChar = escapeStr.charAt(0); + } else { + escapeChar = 0; + } + return sqlToRegexSimilar(sqlPattern, escapeChar); + } + + /** + * Translates SQL SIMILAR pattern to Java regex pattern. + */ + static String sqlToRegexSimilar( + String sqlPattern, + char escapeChar) { + similarEscapeRuleChecking(sqlPattern, escapeChar); + + boolean insideCharacterEnumeration = false; + final StringBuilder javaPattern = + new StringBuilder(sqlPattern.length() * 2); + final int len = sqlPattern.length(); + for (int i = 0; i < len; i++) { + char c = sqlPattern.charAt(i); + if (c == escapeChar) { + if (i == (len - 1)) { + // It should never reach here after the escape rule + // checking. + throw invalidEscapeSequence(sqlPattern, i); + } + char nextChar = sqlPattern.charAt(i + 1); + if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) { + // special character, use \ to replace the escape char. + if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) { + javaPattern.append('\\'); + } + javaPattern.append(nextChar); + } else if (nextChar == escapeChar) { + javaPattern.append(nextChar); + } else { + // It should never reach here after the escape rule + // checking. + throw invalidEscapeSequence(sqlPattern, i); + } + i++; // we already process the next char. + } else { + switch (c) { + case '_': + javaPattern.append('.'); + break; + case '%': + javaPattern.append('.'); + javaPattern.append('*'); + break; + case '[': + javaPattern.append('['); + insideCharacterEnumeration = true; + i = sqlSimilarRewriteCharEnumeration( + sqlPattern, + javaPattern, + i, + escapeChar); + break; + case ']': + if (!insideCharacterEnumeration) { + throw invalidRegularExpression(sqlPattern, i); + } + insideCharacterEnumeration = false; + javaPattern.append(']'); + break; + case '\\': + javaPattern.append("\\\\"); + break; + case '$': + + // $ is special character in java regex, but regular in + // SQL regex. + javaPattern.append("\\$"); + break; + default: + javaPattern.append(c); + } + } + } + if (insideCharacterEnumeration) { + throw invalidRegularExpression(sqlPattern, len); + } + + return javaPattern.toString(); + } +} + +// End Like.java http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java ---------------------------------------------------------------------- diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java index b4e33ec..8b3e1e5 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java @@ -54,6 +54,7 @@ import org.apache.kylin.metadata.filter.CompareTupleFilter; import org.apache.kylin.metadata.filter.ConstantTupleFilter; import org.apache.kylin.metadata.filter.DynamicTupleFilter; import org.apache.kylin.metadata.filter.ExtractTupleFilter; +import org.apache.kylin.metadata.filter.FunctionTupleFilter; import org.apache.kylin.metadata.filter.LogicalTupleFilter; import org.apache.kylin.metadata.filter.TupleFilter; import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum; @@ -126,9 +127,13 @@ public class OLAPFilterRel extends Filter implements OLAPRel { if (op.getName().equalsIgnoreCase("extract_date")) { filter = new ExtractTupleFilter(FilterOperatorEnum.EXTRACT); } else { - throw new UnsupportedOperationException(op.getName()); + filter = new FunctionTupleFilter(op.getName()); } break; + case LIKE: + case OTHER_FUNCTION: + filter = new FunctionTupleFilter(op.getName()); + break; default: throw new UnsupportedOperationException(op.getName()); } @@ -288,6 +293,9 @@ public class OLAPFilterRel extends Filter implements OLAPRel { } private void collectColumnsRecursively(TupleFilter filter, Set<TblColRef> collector) { + if (filter == null) + return; + if (filter instanceof ColumnTupleFilter) { collector.add(((ColumnTupleFilter) filter).getColumn()); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/query/src/test/resources/query/sql/query94.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql/query94.sql b/query/src/test/resources/query/sql/query94.sql new file mode 100644 index 0000000..4e80d59 --- /dev/null +++ b/query/src/test/resources/query/sql/query94.sql @@ -0,0 +1,22 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select upper(lstg_format_name) as lstg_format_name, count(*) as cnt from test_kylin_fact +where lower(lstg_format_name)='abin' and substring(lstg_format_name,1,3) in ('ABI') and upper(lstg_format_name) > 'AAAA' and +lower(lstg_format_name) like '%b%' and char_length(lstg_format_name) < 10 and char_length(lstg_format_name) > 3 and lstg_format_name||'a'='ABINa' +group by lstg_format_name \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java index c833781..294f399 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/common/coprocessor/FilterDecorator.java @@ -8,9 +8,11 @@ import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.kv.RowKeyColumnIO; import org.apache.kylin.dict.DictCodeSystem; import org.apache.kylin.dict.IDictionaryAware; +import org.apache.kylin.dict.TupleFilterDictionaryTranslater; import org.apache.kylin.metadata.filter.ColumnTupleFilter; import org.apache.kylin.metadata.filter.CompareTupleFilter; import org.apache.kylin.metadata.filter.ConstantTupleFilter; +import org.apache.kylin.metadata.filter.ITupleFilterTranslator; import org.apache.kylin.metadata.filter.TupleFilter; import org.apache.kylin.metadata.filter.TupleFilterSerializer; import org.apache.kylin.metadata.model.TblColRef; @@ -129,6 +131,9 @@ public class FilterDecorator implements TupleFilterSerializer.Decorator { if (filter == null) return null; + ITupleFilterTranslator translator = new TupleFilterDictionaryTranslater(columnIO.getIDictionaryAware()); + filter = translator.translate(filter); + // un-evaluatable filter is replaced with TRUE if (!filter.isEvaluable()) { TupleFilter.collectColumns(filter, inevaluableColumns); http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java index 32dfc78..74d57c7 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java @@ -299,6 +299,9 @@ public class CubeStorageQuery implements ICachableStorageQuery { } private void collectColumnsRecursively(TupleFilter filter, Set<TblColRef> collector) { + if (filter == null) + return; + if (filter instanceof ColumnTupleFilter) { collectColumns(((ColumnTupleFilter) filter).getColumn(), collector); } http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java index 6e3e0d8..cdb665b 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java @@ -24,6 +24,7 @@ import org.apache.kylin.cube.gridtable.CubeGridTable; import org.apache.kylin.cube.gridtable.CuboidToGridTableMapping; import org.apache.kylin.cube.gridtable.NotEnoughGTInfoException; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dict.TupleFilterDictionaryTranslater; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.gridtable.GTScanRange; @@ -32,6 +33,7 @@ import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.GTUtil; import org.apache.kylin.gridtable.IGTScanner; import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.filter.ITupleFilterTranslator; import org.apache.kylin.metadata.filter.TupleFilter; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.TblColRef; @@ -57,6 +59,10 @@ public class CubeSegmentScanner implements IGTScanner { CuboidToGridTableMapping mapping = cuboid.getCuboidToGridTableMapping(); + // translate FunctionTupleFilter to IN clause + ITupleFilterTranslator translator = new TupleFilterDictionaryTranslater(this.cubeSeg); + filter = translator.translate(filter); + //replace the constant values in filter to dictionary codes TupleFilter gtFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, mapping.getCuboidDimensionsInGTOrder(), groups); http://git-wip-us.apache.org/repos/asf/kylin/blob/4b54ca87/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java index e988058..78bd25b 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeStorageQuery.java @@ -290,6 +290,9 @@ public class CubeStorageQuery implements ICachableStorageQuery { } private void collectColumnsRecursively(TupleFilter filter, Set<TblColRef> collector) { + if (filter == null) + return; + if (filter instanceof ColumnTupleFilter) { collectColumns(((ColumnTupleFilter) filter).getColumn(), collector); }
