[ https://issues.apache.org/jira/browse/DRILL-8526?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17986973#comment-17986973 ]
ASF GitHub Bot commented on DRILL-8526: --------------------------------------- cgivre commented on code in PR #2995: URL: https://github.com/apache/drill/pull/2995#discussion_r2175210945 ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveCompareFunctionsProcessor.java: ########## @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.CastExpression; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.expression.ValueExpressions.BooleanExpression; +import org.apache.drill.common.expression.ValueExpressions.DateExpression; +import org.apache.drill.common.expression.ValueExpressions.DoubleExpression; +import org.apache.drill.common.expression.ValueExpressions.FloatExpression; +import org.apache.drill.common.expression.ValueExpressions.IntExpression; +import org.apache.drill.common.expression.ValueExpressions.LongExpression; +import org.apache.drill.common.expression.ValueExpressions.QuotedString; +import org.apache.drill.common.expression.ValueExpressions.TimeExpression; +import org.apache.drill.common.expression.ValueExpressions.TimeStampExpression; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import java.sql.Timestamp; + +public class HiveCompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpression, RuntimeException> { + + private static final ImmutableSet<String> IS_FUNCTIONS_SET; + static { + ImmutableSet.Builder<String> builder = ImmutableSet.builder(); + IS_FUNCTIONS_SET = builder + .add(FunctionNames.IS_NOT_NULL) + .add("isNotNull") + .add("is not null") + .add(FunctionNames.IS_NULL) + .add("isNull") + .add("is null") + .add(FunctionNames.IS_TRUE) + .add(FunctionNames.IS_NOT_TRUE) + .add(FunctionNames.IS_FALSE) + .add(FunctionNames.IS_NOT_FALSE) + .build(); + + } + + private static final ImmutableMap<String, String> COMPARE_FUNCTIONS_TRANSPOSE_MAP; + static { + ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); + COMPARE_FUNCTIONS_TRANSPOSE_MAP = builder + // binary functions + .put(FunctionNames.LIKE, FunctionNames.LIKE) + .put(FunctionNames.EQ, FunctionNames.EQ) + .put(FunctionNames.NE, FunctionNames.NE) + .put(FunctionNames.GE, FunctionNames.LE) + .put(FunctionNames.GT, FunctionNames.LT) + .put(FunctionNames.LE, FunctionNames.GE) + .put(FunctionNames.LT, FunctionNames.GT) + .build(); + } + + private static final ImmutableSet<Class<? extends LogicalExpression>> VALUE_EXPRESSION_CLASSES; + static { + ImmutableSet.Builder<Class<? extends LogicalExpression>> builder = ImmutableSet.builder(); + VALUE_EXPRESSION_CLASSES = builder + .add(BooleanExpression.class) + .add(DateExpression.class) + .add(DoubleExpression.class) + .add(FloatExpression.class) + .add(IntExpression.class) + .add(LongExpression.class) + .add(QuotedString.class) + .add(TimeExpression.class) + .build(); + } + + private Object value; + private PredicateLeaf.Type valueType; + private boolean success; + private SchemaPath path; + private String functionName; + Review Comment: Nit: Move to the top of the class file. Please make any variables `final` which can be made `final`. ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveCompareFunctionsProcessor.java: ########## @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.CastExpression; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.expression.ValueExpressions.BooleanExpression; +import org.apache.drill.common.expression.ValueExpressions.DateExpression; +import org.apache.drill.common.expression.ValueExpressions.DoubleExpression; +import org.apache.drill.common.expression.ValueExpressions.FloatExpression; +import org.apache.drill.common.expression.ValueExpressions.IntExpression; +import org.apache.drill.common.expression.ValueExpressions.LongExpression; +import org.apache.drill.common.expression.ValueExpressions.QuotedString; +import org.apache.drill.common.expression.ValueExpressions.TimeExpression; +import org.apache.drill.common.expression.ValueExpressions.TimeStampExpression; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import java.sql.Timestamp; + +public class HiveCompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpression, RuntimeException> { + + private static final ImmutableSet<String> IS_FUNCTIONS_SET; + static { + ImmutableSet.Builder<String> builder = ImmutableSet.builder(); + IS_FUNCTIONS_SET = builder + .add(FunctionNames.IS_NOT_NULL) + .add("isNotNull") + .add("is not null") + .add(FunctionNames.IS_NULL) + .add("isNull") + .add("is null") + .add(FunctionNames.IS_TRUE) + .add(FunctionNames.IS_NOT_TRUE) + .add(FunctionNames.IS_FALSE) + .add(FunctionNames.IS_NOT_FALSE) + .build(); + + } + + private static final ImmutableMap<String, String> COMPARE_FUNCTIONS_TRANSPOSE_MAP; + static { + ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); + COMPARE_FUNCTIONS_TRANSPOSE_MAP = builder + // binary functions + .put(FunctionNames.LIKE, FunctionNames.LIKE) + .put(FunctionNames.EQ, FunctionNames.EQ) + .put(FunctionNames.NE, FunctionNames.NE) + .put(FunctionNames.GE, FunctionNames.LE) + .put(FunctionNames.GT, FunctionNames.LT) + .put(FunctionNames.LE, FunctionNames.GE) + .put(FunctionNames.LT, FunctionNames.GT) + .build(); + } + + private static final ImmutableSet<Class<? extends LogicalExpression>> VALUE_EXPRESSION_CLASSES; + static { + ImmutableSet.Builder<Class<? extends LogicalExpression>> builder = ImmutableSet.builder(); + VALUE_EXPRESSION_CLASSES = builder + .add(BooleanExpression.class) + .add(DateExpression.class) + .add(DoubleExpression.class) + .add(FloatExpression.class) + .add(IntExpression.class) + .add(LongExpression.class) + .add(QuotedString.class) + .add(TimeExpression.class) + .build(); + } + + private Object value; + private PredicateLeaf.Type valueType; + private boolean success; + private SchemaPath path; + private String functionName; + + public static boolean isCompareFunction(String functionName) { + return COMPARE_FUNCTIONS_TRANSPOSE_MAP.keySet().contains(functionName); + } + + public static boolean isIsFunction(String funcName) { + return IS_FUNCTIONS_SET.contains(funcName); + } + + // shows whether function is simplified IS FALSE + public static boolean isNot(FunctionCall call, String funcName) { + return !call.args().isEmpty() + && FunctionNames.NOT.equals(funcName); + } + + public static HiveCompareFunctionsProcessor createFunctionsProcessorInstance(FunctionCall call) { + String functionName = call.getName(); + HiveCompareFunctionsProcessor evaluator = new HiveCompareFunctionsProcessor(functionName); + + return createFunctionsProcessorInstanceInternal(call, evaluator); + } + + protected static <T extends HiveCompareFunctionsProcessor> T createFunctionsProcessorInstanceInternal(FunctionCall call, T evaluator) { + LogicalExpression nameArg = call.arg(0); + LogicalExpression valueArg = call.argCount() >= 2 ? call.arg(1) : null; + if (valueArg != null) { // binary function + if (VALUE_EXPRESSION_CLASSES.contains(nameArg.getClass())) { + LogicalExpression swapArg = valueArg; + valueArg = nameArg; + nameArg = swapArg; + evaluator.setFunctionName(COMPARE_FUNCTIONS_TRANSPOSE_MAP.get(evaluator.getFunctionName())); + } + evaluator.setSuccess(nameArg.accept(evaluator, valueArg)); + } else if (call.arg(0) instanceof SchemaPath) { + evaluator.setPath((SchemaPath) nameArg); + } + evaluator.setSuccess(true); + return evaluator; + } + + public HiveCompareFunctionsProcessor(String functionName) { Review Comment: Nit: Move constructor to top of class. ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveFilter.java: ########## @@ -0,0 +1,55 @@ +package org.apache.drill.exec.store.hive.readers.filter; +/* Review Comment: Nit: License header should go before the `package` declaration. ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveFilterBuilder.java: ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.BooleanOperator; +import org.apache.drill.common.expression.FieldReference; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; + +public class HiveFilterBuilder extends AbstractExprVisitor<SearchArgument.Builder, Void, + RuntimeException> { + + private static final Logger logger = LoggerFactory.getLogger(HiveFilterBuilder.class); + + private final LogicalExpression le; + + private boolean allExpressionsConverted = false; + private SearchArgument.Builder builder; + private HashMap<String, SqlTypeName> dataTypeMap; + + HiveFilterBuilder(LogicalExpression le, HashMap<String, SqlTypeName> dataTypeMap) { + this.le = le; + this.dataTypeMap = dataTypeMap; + this.builder = SearchArgumentFactory.newBuilder(); + } + + public HiveFilter parseTree() { + SearchArgument.Builder accept = le.accept(this, null); + SearchArgument searchArgument = builder.build(); + if (accept != null) { + searchArgument = accept.build(); + } + return new HiveFilter(searchArgument); + } + + public boolean isAllExpressionsConverted() { + return allExpressionsConverted; + } + + @Override + public SearchArgument.Builder visitUnknown(LogicalExpression e, Void value) throws RuntimeException { + allExpressionsConverted = false; + return null; + } + + @Override + public SearchArgument.Builder visitSchemaPath(SchemaPath path, Void value) throws RuntimeException { + if (path instanceof FieldReference) { + String fieldName = path.getAsNamePart().getName(); + SqlTypeName sqlTypeName = dataTypeMap.get(fieldName); + switch (sqlTypeName) { + case BOOLEAN: + PredicateLeaf.Type valueType = convertLeafType(sqlTypeName); + builder.startNot().equals(fieldName, valueType, false).end(); + break; + default: + // otherwise, we don't know what to do so make it a maybe or do nothing + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + } + } + return builder; + } + + @Override + public SearchArgument.Builder visitBooleanOperator(BooleanOperator op, Void value) throws RuntimeException { + return visitFunctionCall(op, value); + } + + @Override + public SearchArgument.Builder visitFunctionCall(FunctionCall call, Void value) throws RuntimeException { + String functionName = call.getName(); + List<LogicalExpression> args = call.args(); + if (HiveCompareFunctionsProcessor.isCompareFunction(functionName) || HiveCompareFunctionsProcessor.isIsFunction(functionName) || HiveCompareFunctionsProcessor.isNot(call, functionName)) { + HiveCompareFunctionsProcessor processor = HiveCompareFunctionsProcessor.createFunctionsProcessorInstance(call); + if (processor.isSuccess()) { + return buildSearchArgument(processor); + } + } else { + switch (functionName) { + case FunctionNames.AND: + builder.startAnd(); + break; + case FunctionNames.OR: + builder.startOr(); + break; + default: + logger.warn("Unsupported logical operator:{} for push down", functionName); + return builder; + } + for (int i = 0; i < args.size(); ++i) { Review Comment: Can we replace with enhanced for-loop? ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveFilterBuilder.java: ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.BooleanOperator; +import org.apache.drill.common.expression.FieldReference; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; + +public class HiveFilterBuilder extends AbstractExprVisitor<SearchArgument.Builder, Void, + RuntimeException> { + + private static final Logger logger = LoggerFactory.getLogger(HiveFilterBuilder.class); + + private final LogicalExpression le; + + private boolean allExpressionsConverted = false; + private SearchArgument.Builder builder; + private HashMap<String, SqlTypeName> dataTypeMap; Review Comment: Please make any variables `final` which can be made final. ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveCompareFunctionsProcessor.java: ########## @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.CastExpression; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.expression.ValueExpressions.BooleanExpression; +import org.apache.drill.common.expression.ValueExpressions.DateExpression; +import org.apache.drill.common.expression.ValueExpressions.DoubleExpression; +import org.apache.drill.common.expression.ValueExpressions.FloatExpression; +import org.apache.drill.common.expression.ValueExpressions.IntExpression; +import org.apache.drill.common.expression.ValueExpressions.LongExpression; +import org.apache.drill.common.expression.ValueExpressions.QuotedString; +import org.apache.drill.common.expression.ValueExpressions.TimeExpression; +import org.apache.drill.common.expression.ValueExpressions.TimeStampExpression; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import java.sql.Timestamp; + +public class HiveCompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpression, RuntimeException> { + + private static final ImmutableSet<String> IS_FUNCTIONS_SET; + static { + ImmutableSet.Builder<String> builder = ImmutableSet.builder(); + IS_FUNCTIONS_SET = builder + .add(FunctionNames.IS_NOT_NULL) + .add("isNotNull") + .add("is not null") + .add(FunctionNames.IS_NULL) + .add("isNull") + .add("is null") + .add(FunctionNames.IS_TRUE) + .add(FunctionNames.IS_NOT_TRUE) + .add(FunctionNames.IS_FALSE) + .add(FunctionNames.IS_NOT_FALSE) + .build(); + + } + + private static final ImmutableMap<String, String> COMPARE_FUNCTIONS_TRANSPOSE_MAP; + static { + ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); + COMPARE_FUNCTIONS_TRANSPOSE_MAP = builder + // binary functions + .put(FunctionNames.LIKE, FunctionNames.LIKE) + .put(FunctionNames.EQ, FunctionNames.EQ) + .put(FunctionNames.NE, FunctionNames.NE) + .put(FunctionNames.GE, FunctionNames.LE) + .put(FunctionNames.GT, FunctionNames.LT) + .put(FunctionNames.LE, FunctionNames.GE) + .put(FunctionNames.LT, FunctionNames.GT) + .build(); + } + + private static final ImmutableSet<Class<? extends LogicalExpression>> VALUE_EXPRESSION_CLASSES; + static { + ImmutableSet.Builder<Class<? extends LogicalExpression>> builder = ImmutableSet.builder(); + VALUE_EXPRESSION_CLASSES = builder + .add(BooleanExpression.class) + .add(DateExpression.class) + .add(DoubleExpression.class) + .add(FloatExpression.class) + .add(IntExpression.class) + .add(LongExpression.class) + .add(QuotedString.class) + .add(TimeExpression.class) + .build(); + } + + private Object value; + private PredicateLeaf.Type valueType; + private boolean success; + private SchemaPath path; + private String functionName; + + public static boolean isCompareFunction(String functionName) { + return COMPARE_FUNCTIONS_TRANSPOSE_MAP.keySet().contains(functionName); + } + + public static boolean isIsFunction(String funcName) { + return IS_FUNCTIONS_SET.contains(funcName); + } + + // shows whether function is simplified IS FALSE + public static boolean isNot(FunctionCall call, String funcName) { + return !call.args().isEmpty() + && FunctionNames.NOT.equals(funcName); + } + + public static HiveCompareFunctionsProcessor createFunctionsProcessorInstance(FunctionCall call) { + String functionName = call.getName(); + HiveCompareFunctionsProcessor evaluator = new HiveCompareFunctionsProcessor(functionName); + + return createFunctionsProcessorInstanceInternal(call, evaluator); + } + + protected static <T extends HiveCompareFunctionsProcessor> T createFunctionsProcessorInstanceInternal(FunctionCall call, T evaluator) { + LogicalExpression nameArg = call.arg(0); + LogicalExpression valueArg = call.argCount() >= 2 ? call.arg(1) : null; + if (valueArg != null) { // binary function + if (VALUE_EXPRESSION_CLASSES.contains(nameArg.getClass())) { + LogicalExpression swapArg = valueArg; + valueArg = nameArg; + nameArg = swapArg; + evaluator.setFunctionName(COMPARE_FUNCTIONS_TRANSPOSE_MAP.get(evaluator.getFunctionName())); + } + evaluator.setSuccess(nameArg.accept(evaluator, valueArg)); + } else if (call.arg(0) instanceof SchemaPath) { + evaluator.setPath((SchemaPath) nameArg); + } + evaluator.setSuccess(true); + return evaluator; + } + + public HiveCompareFunctionsProcessor(String functionName) { + this.success = false; + this.functionName = functionName; + } + + public boolean isSuccess() { + return success; + } + + protected void setSuccess(boolean success) { + this.success = success; + } + + public SchemaPath getPath() { + return path; + } + + protected void setPath(SchemaPath path) { + this.path = path; + } + + public String getFunctionName() { + return functionName; + } + + protected void setFunctionName(String functionName) { + this.functionName = functionName; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + + public PredicateLeaf.Type getValueType() { + return valueType; + } + + public void setValueType(PredicateLeaf.Type valueType) { + this.valueType = valueType; + } + + @Override + public Boolean visitCastExpression(CastExpression e, LogicalExpression valueArg) throws RuntimeException { + if (e.getInput() instanceof CastExpression || e.getInput() instanceof SchemaPath) { + return e.getInput().accept(this, valueArg); + } + return false; + } + @Override + public Boolean visitUnknown(LogicalExpression e, LogicalExpression valueArg) throws RuntimeException { + return false; + } + + @Override + public Boolean visitSchemaPath(SchemaPath path, LogicalExpression valueArg) throws RuntimeException { + if (valueArg instanceof QuotedString) { + this.value = ((QuotedString) valueArg).getString(); + this.path = path; + this.valueType = PredicateLeaf.Type.STRING; + return true; + } + + if (valueArg instanceof IntExpression) { + int expValue = ((IntExpression) valueArg).getInt(); + this.value = ((Integer) expValue).longValue(); + this.path = path; + this.valueType = PredicateLeaf.Type.LONG; + return true; + } + + if (valueArg instanceof LongExpression) { + this.value = ((LongExpression) valueArg).getLong(); + this.path = path; + this.valueType = PredicateLeaf.Type.LONG; + return true; + } + + if (valueArg instanceof FloatExpression) { + this.value = ((FloatExpression) valueArg).getFloat(); + this.path = path; + this.valueType = PredicateLeaf.Type.FLOAT; + return true; + } + + if (valueArg instanceof DoubleExpression) { + this.value = ((DoubleExpression) valueArg).getDouble(); + this.path = path; + this.valueType = PredicateLeaf.Type.FLOAT; + return true; + } + + if (valueArg instanceof BooleanExpression) { + this.value = ((BooleanExpression) valueArg).getBoolean(); + this.path = path; + this.valueType = PredicateLeaf.Type.BOOLEAN; + return true; + } + + if (valueArg instanceof DateExpression) { + this.value = ((DateExpression) valueArg).getDate(); + this.path = path; + this.valueType = PredicateLeaf.Type.LONG; + return true; + } + + if (valueArg instanceof TimeStampExpression) { + long timeStamp = ((TimeStampExpression) valueArg).getTimeStamp(); + this.value = new Timestamp(timeStamp); + this.path = path; + this.valueType = PredicateLeaf.Type.TIMESTAMP; + return true; + } + + if (valueArg instanceof ValueExpressions.VarDecimalExpression) { + double v = ((ValueExpressions.VarDecimalExpression) valueArg).getBigDecimal().doubleValue(); + this.value = new HiveDecimalWritable(String.valueOf(v)); + this.path = path; + this.valueType = PredicateLeaf.Type.DECIMAL; + return true; + } + return false; + } +} Review Comment: Nit: Please add a blank line at the end of all classes. Here and elsewhere. ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveFilterBuilder.java: ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.BooleanOperator; +import org.apache.drill.common.expression.FieldReference; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; + +public class HiveFilterBuilder extends AbstractExprVisitor<SearchArgument.Builder, Void, + RuntimeException> { + + private static final Logger logger = LoggerFactory.getLogger(HiveFilterBuilder.class); + + private final LogicalExpression le; + + private boolean allExpressionsConverted = false; + private SearchArgument.Builder builder; + private HashMap<String, SqlTypeName> dataTypeMap; + + HiveFilterBuilder(LogicalExpression le, HashMap<String, SqlTypeName> dataTypeMap) { Review Comment: Should this be public? ########## contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/filter/HiveFilterBuilder.java: ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hive.readers.filter; + +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.drill.common.FunctionNames; +import org.apache.drill.common.expression.BooleanOperator; +import org.apache.drill.common.expression.FieldReference; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; + +public class HiveFilterBuilder extends AbstractExprVisitor<SearchArgument.Builder, Void, + RuntimeException> { + + private static final Logger logger = LoggerFactory.getLogger(HiveFilterBuilder.class); + + private final LogicalExpression le; + + private boolean allExpressionsConverted = false; + private SearchArgument.Builder builder; + private HashMap<String, SqlTypeName> dataTypeMap; + + HiveFilterBuilder(LogicalExpression le, HashMap<String, SqlTypeName> dataTypeMap) { + this.le = le; + this.dataTypeMap = dataTypeMap; + this.builder = SearchArgumentFactory.newBuilder(); + } + + public HiveFilter parseTree() { + SearchArgument.Builder accept = le.accept(this, null); + SearchArgument searchArgument = builder.build(); + if (accept != null) { + searchArgument = accept.build(); + } + return new HiveFilter(searchArgument); + } + + public boolean isAllExpressionsConverted() { + return allExpressionsConverted; + } + + @Override + public SearchArgument.Builder visitUnknown(LogicalExpression e, Void value) throws RuntimeException { + allExpressionsConverted = false; + return null; + } + + @Override + public SearchArgument.Builder visitSchemaPath(SchemaPath path, Void value) throws RuntimeException { + if (path instanceof FieldReference) { + String fieldName = path.getAsNamePart().getName(); + SqlTypeName sqlTypeName = dataTypeMap.get(fieldName); + switch (sqlTypeName) { + case BOOLEAN: + PredicateLeaf.Type valueType = convertLeafType(sqlTypeName); + builder.startNot().equals(fieldName, valueType, false).end(); + break; + default: + // otherwise, we don't know what to do so make it a maybe or do nothing + builder.literal(SearchArgument.TruthValue.YES_NO_NULL); + } + } + return builder; + } + + @Override + public SearchArgument.Builder visitBooleanOperator(BooleanOperator op, Void value) throws RuntimeException { + return visitFunctionCall(op, value); + } + + @Override + public SearchArgument.Builder visitFunctionCall(FunctionCall call, Void value) throws RuntimeException { + String functionName = call.getName(); + List<LogicalExpression> args = call.args(); + if (HiveCompareFunctionsProcessor.isCompareFunction(functionName) || HiveCompareFunctionsProcessor.isIsFunction(functionName) || HiveCompareFunctionsProcessor.isNot(call, functionName)) { + HiveCompareFunctionsProcessor processor = HiveCompareFunctionsProcessor.createFunctionsProcessorInstance(call); + if (processor.isSuccess()) { + return buildSearchArgument(processor); + } + } else { + switch (functionName) { + case FunctionNames.AND: + builder.startAnd(); + break; + case FunctionNames.OR: + builder.startOr(); + break; + default: + logger.warn("Unsupported logical operator:{} for push down", functionName); + return builder; + } + for (int i = 0; i < args.size(); ++i) { + args.get(i).accept(this, null); + } + builder.end(); + } + return builder; + } + + private SearchArgument.Builder buildSearchArgument(HiveCompareFunctionsProcessor processor) { + String functionName = processor.getFunctionName(); + SchemaPath field = processor.getPath(); + Object fieldValue = processor.getValue(); + PredicateLeaf.Type valueType = processor.getValueType(); + SqlTypeName sqlTypeName = dataTypeMap.get(field.getAsNamePart().getName()); + if (fieldValue == null) { + valueType = convertLeafType(sqlTypeName); + } + if (valueType == null) { + return builder; + } + switch (functionName) { + case FunctionNames.EQ: + builder.startAnd().equals(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.NE: + builder.startNot().equals(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.GE: + builder.startNot().lessThan(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.GT: + builder.startNot().lessThanEquals(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.LE: + builder.startAnd().lessThanEquals(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.LT: + builder.startAnd().lessThan(field.getAsNamePart().getName(), valueType, fieldValue).end(); + break; + case FunctionNames.IS_NULL: + case "isNull": + case "is null": + builder.startAnd().isNull(field.getAsNamePart().getName(), valueType).end(); + break; + case FunctionNames.IS_NOT_NULL: + case "isNotNull": + case "is not null": + builder.startNot().isNull(field.getAsNamePart().getName(), valueType).end(); + break; + case FunctionNames.IS_NOT_TRUE: + case FunctionNames.IS_FALSE: + case FunctionNames.NOT: + builder.startNot().equals(field.getAsNamePart().getName(), valueType, true).end(); + break; + case FunctionNames.IS_TRUE: + case FunctionNames.IS_NOT_FALSE: + builder.startNot().equals(field.getAsNamePart().getName(), valueType, false).end(); + break; + } + return builder; + } + + private PredicateLeaf.Type convertLeafType(SqlTypeName sqlTypeName) { + PredicateLeaf.Type type = null; + switch (sqlTypeName) { + case BOOLEAN: Review Comment: Nit: Please indent switch statement. > Hive Predicate Push Down for ORC and Parquet > -------------------------------------------- > > Key: DRILL-8526 > URL: https://issues.apache.org/jira/browse/DRILL-8526 > Project: Apache Drill > Issue Type: Improvement > Components: Storage - Hive > Affects Versions: 1.22.0 > Reporter: shihuafeng > Priority: Major > Fix For: 1.23.0 > > Attachments: image-2025-06-24-18-08-34-427.png, > image-2025-06-24-18-08-54-768.png > > > Drill do not support filter push down for orc format. i do it and test. > When a large amount of data is filtered out, Predicate PushDown can > significantly improve the query performance of ORC format > Through comparative testing of the following TPCH SQL queries, ORC format > with filter pushdown achieves nearly a 5-20x performance improvement over > execution without pushdown. > sql : select * from hive.lineitem_o where L_ORDERKEY=1; > the data of table lineitem_o: 6001215 > with out push down > !image-2025-06-24-18-08-34-427.png! > push down > !image-2025-06-24-18-08-54-768.png! -- This message was sent by Atlassian Jira (v8.20.10#820010)