HAWQ-964. Support for OR and NOT Logical Operators Signed-off-by: Leslie Chang <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/726be6cc Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/726be6cc Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/726be6cc Branch: refs/heads/HAWQ-964 Commit: 726be6cced0edd67fd0a0308d0483ee54b8ac116 Parents: e6c7fda Author: Kavinder Dhaliwal <[email protected]> Authored: Thu Sep 15 10:56:20 2016 -0700 Committer: Kavinder Dhaliwal <[email protected]> Committed: Tue Sep 20 09:45:44 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hawq/pxf/api/FilterParser.java | 71 ++++++------ .../apache/hawq/pxf/api/FilterParserTest.java | 115 +++++++++++++++++++ .../pxf/plugins/hbase/HBaseFilterBuilder.java | 52 +++++---- .../hawq/pxf/plugins/hive/HiveAccessor.java | 5 +- .../pxf/plugins/hive/HiveDataFragmenter.java | 3 +- .../pxf/plugins/hive/HiveFilterBuilder.java | 80 ++++++------- .../hawq/pxf/plugins/hive/HiveORCAccessor.java | 39 +++++-- .../pxf/plugins/hive/HiveFilterBuilderTest.java | 46 +++++--- .../hive/HiveORCSearchArgumentExample.java | 84 ++++++++++++++ 9 files changed, 370 insertions(+), 125 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java index 00fbf2b..22c76a6 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java @@ -52,6 +52,7 @@ public class FilterParser { private FilterBuilder filterBuilder; private static Map<Integer, Operation> operatorTranslationMap = initOperatorTransMap(); + private static Map<Integer, LogicalOperation> logicalOperationTranslationMap = initLogicalOperatorTransMap(); /** Supported operations by the parser. */ public enum Operation { @@ -65,6 +66,12 @@ public class FilterParser { HDOP_LIKE } + public enum LogicalOperation { + HDOP_AND, + HDOP_OR, + HDOP_NOT + } + /** * Interface a user of FilterParser should implement. * This is used to let the user build filter expressions in the manner she sees fit. @@ -81,6 +88,8 @@ public class FilterParser { * @throws Exception if building the filter failed */ public Object build(Operation operation, Object left, Object right) throws Exception; + public Object build(LogicalOperation operation, Object left, Object right) throws Exception; + public Object build(LogicalOperation operation, Object filter) throws Exception; } /** Represents a column index. */ @@ -110,42 +119,6 @@ public class FilterParser { } /** - * Basic filter provided for cases where the target storage system does not provide it own filter - * For example: Hbase storage provides its own filter but for a Writable based record in a - * SequenceFile there is no filter provided and so we need to have a default - */ - static public class BasicFilter { - private Operation oper; - private ColumnIndex column; - private Constant constant; - - /** - * Constructs a BasicFilter. - * - * @param oper the parse operation to perform - * @param column the column index - * @param constant the constant object - */ - public BasicFilter(Operation oper, ColumnIndex column, Constant constant) { - this.oper = oper; - this.column = column; - this.constant = constant; - } - - public Operation getOperation() { - return oper; - } - - public ColumnIndex getColumn() { - return column; - } - - public Constant getConstant() { - return constant; - } - } - - /** * Thrown when a filter's parsing exception occurs. */ @SuppressWarnings("serial") @@ -220,6 +193,24 @@ public class FilterParser { // Store result on stack operandsStack.push(result); break; + case 'l': + LogicalOperation logicalOperation = logicalOperationTranslationMap.get(safeToInt(parseNumber())); + + if (logicalOperation == null) { + throw new FilterStringSyntaxException("unknown op ending at " + index); + } + + if (logicalOperation == LogicalOperation.HDOP_NOT) { + Object exp = operandsStack.pop(); + result = filterBuilder.build(logicalOperation, exp); + } else { + rightOperand = operandsStack.pop(); + leftOperand = operandsStack.pop(); + + result = filterBuilder.build(logicalOperation, leftOperand, rightOperand); + } + operandsStack.push(result); + break; default: index--; // move index back to operand location throw new FilterStringSyntaxException("unknown opcode " + op + @@ -396,4 +387,12 @@ public class FilterParser { operatorTranslationMap.put(8, Operation.HDOP_LIKE); return operatorTranslationMap; } + + static private Map<Integer, LogicalOperation> initLogicalOperatorTransMap() { + Map<Integer, LogicalOperation> integerLogicalOperationMap = new HashMap<>(); + integerLogicalOperationMap.put(0, LogicalOperation.HDOP_AND); + integerLogicalOperationMap.put(1, LogicalOperation.HDOP_OR); + integerLogicalOperationMap.put(2, LogicalOperation.HDOP_NOT); + return integerLogicalOperationMap; + } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java index 1ded4a3..83bb2dc 100644 --- a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java +++ b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java @@ -22,13 +22,17 @@ package org.apache.hawq.pxf.api; import org.apache.hawq.pxf.api.FilterParser.FilterBuilder; import org.apache.hawq.pxf.api.FilterParser.Operation; +import org.apache.hawq.pxf.api.FilterParser.LogicalOperation; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; @@ -282,6 +286,117 @@ public class FilterParserTest { assertEquals(lastOp, result); } + @Test + public void parseLogicalAndOperator() throws Exception { + filter = "l0"; + Object op = "filter with 1 AND operator"; + + when(filterBuilder.build(eq(LogicalOperation.HDOP_AND), + any(), + any())).thenReturn(op); + + Object result = filterParser.parse(filter); + + assertEquals(op, result); + } + + @Test + public void parseLogicalOrOperator() throws Exception { + filter = "l1"; + + Object op = "filter with 1 OR operator"; + + when(filterBuilder.build(eq(LogicalOperation.HDOP_OR), + any(), + any())).thenReturn(op); + + Object result = filterParser.parse(filter); + assertEquals(op, result); + } + + @Test + public void parseLogicalNotOperator() throws Exception { + filter = "l2"; + + Object op = "filter with NOT operator"; + + when(filterBuilder.build(eq(LogicalOperation.HDOP_NOT), + any(), + any())).thenReturn(op); + + Object result = filterParser.parse(filter); + assertEquals(op, result); + } + + @Rule + public ExpectedException thrown = ExpectedException.none(); + @Test + public void parseLogicalUnknownCodeError() throws Exception { + thrown.expect(FilterParser.FilterStringSyntaxException.class); + thrown.expectMessage("unknown op ending at 2"); + + filter = "l7"; + when(filterBuilder.build(eq(LogicalOperation.HDOP_AND), + any(), + any())).thenReturn(null); + + Object result = filterParser.parse(filter); + } + + @Test + public void parseLogicalOperatorWithExpressions() throws Exception { + filter = "a1c\"first\"o5a2c2o2l0"; + Object firstOp = "first operation HDOP_EQ"; + Object secondOp = "second operation HDOP_GT"; + Object lastOp = "filter with 2 operations connected by AND"; + + when(filterBuilder.build(eq(Operation.HDOP_EQ), + any(), + any())).thenReturn(firstOp); + + + when(filterBuilder.build(eq(Operation.HDOP_GT), + any(), + any())).thenReturn(secondOp); + + when(filterBuilder.build(eq(LogicalOperation.HDOP_AND), + any(), + any())).thenReturn(lastOp); + + + Object result = filterParser.parse(filter); + assertEquals(lastOp, result); + } + + @Test + public void parseLogicalOperatorNotExpression() throws Exception { + filter = "a1c\"first\"o5a2c2o2l0l2"; + Object firstOp = "first operation HDOP_EQ"; + Object secondOp = "second operation HDOP_GT"; + Object thirdOp = "filter with 2 operations connected by AND"; + Object lastOp = "filter with 1 NOT operation"; + + when(filterBuilder.build(eq(Operation.HDOP_EQ), + any(), + any())).thenReturn(firstOp); + + + when(filterBuilder.build(eq(Operation.HDOP_GT), + any(), + any())).thenReturn(secondOp); + + when(filterBuilder.build(eq(LogicalOperation.HDOP_AND), + any(), + any())).thenReturn(thirdOp); + + when(filterBuilder.build(eq(LogicalOperation.HDOP_NOT), + any())).thenReturn(lastOp); + + Object result = filterParser.parse(filter); + assertEquals(lastOp, result); + } + + /* * Helper functions */ http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java index 8eadc88..29c8686 100644 --- a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java +++ b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java @@ -52,12 +52,15 @@ import static org.apache.hawq.pxf.api.io.DataType.TEXT; */ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { private Map<FilterParser.Operation, CompareFilter.CompareOp> operatorsMap; + private Map<FilterParser.LogicalOperation, FilterList.Operator> logicalOperatorsMap; private byte[] startKey; private byte[] endKey; private HBaseTupleDescription tupleDescription; + private static final String NOT_OP = "l2"; public HBaseFilterBuilder(HBaseTupleDescription tupleDescription) { initOperatorsMap(); + initLogicalOperatorsMap(); startKey = HConstants.EMPTY_START_ROW; endKey = HConstants.EMPTY_END_ROW; this.tupleDescription = tupleDescription; @@ -71,14 +74,19 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { * @throws Exception if parsing failed */ public Filter getFilterObject(String filterString) throws Exception { - FilterParser parser = new FilterParser(this); - Object result = parser.parse(filterString); + // First check for NOT, HBase does not support this + if (filterString.contains(NOT_OP)) { + return null; + } else { + FilterParser parser = new FilterParser(this); + Object result = parser.parse(filterString); + + if (!(result instanceof Filter)) { + throw new Exception("String " + filterString + " resolved to no filter"); + } - if (!(result instanceof Filter)) { - throw new Exception("String " + filterString + " resolved to no filter"); + return (Filter) result; } - - return (Filter) result; } /** @@ -122,18 +130,6 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { public Object build(FilterParser.Operation opId, Object leftOperand, Object rightOperand) throws Exception { - if (leftOperand instanceof Filter) { - if (opId != FilterParser.Operation.HDOP_AND || - !(rightOperand instanceof Filter)) { - throw new Exception("Only AND is allowed between compound expressions"); - } - - return handleCompoundOperations((Filter) leftOperand, (Filter) rightOperand); - } - - if (!(rightOperand instanceof FilterParser.Constant)) { - throw new Exception("expressions of column-op-column are not supported"); - } // Assume column is on the left return handleSimpleOperations(opId, @@ -141,6 +137,16 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { (FilterParser.Constant) rightOperand); } + @Override + public Object build(FilterParser.LogicalOperation opId, Object leftOperand, Object rightOperand) { + return handleCompoundOperations(opId, (Filter) leftOperand, (Filter) rightOperand); + } + + @Override + public Object build(FilterParser.LogicalOperation opId, Object leftOperand) { + return null; + } + /** * Initializes the {@link #operatorsMap} with appropriate values. */ @@ -154,6 +160,12 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { operatorsMap.put(FilterParser.Operation.HDOP_NE, CompareFilter.CompareOp.NOT_EQUAL); // "!=" } + private void initLogicalOperatorsMap() { + logicalOperatorsMap = new HashMap<>(); + logicalOperatorsMap.put(FilterParser.LogicalOperation.HDOP_AND, FilterList.Operator.MUST_PASS_ALL); + logicalOperatorsMap.put(FilterParser.LogicalOperation.HDOP_OR, FilterList.Operator.MUST_PASS_ONE); + } + /** * Handles simple column-operator-constant expressions. * Creates a special filter in the case the column is the row key column. @@ -227,7 +239,7 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { * <p> * Currently, 1, 2 can occur, since no parenthesis are used. */ - private Filter handleCompoundOperations(Filter left, Filter right) { + private Filter handleCompoundOperations(FilterParser.LogicalOperation opId, Filter left, Filter right) { FilterList result; if (left instanceof FilterList) { @@ -237,7 +249,7 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { return result; } - result = new FilterList(FilterList.Operator.MUST_PASS_ALL, new Filter[] {left, right}); + result = new FilterList(logicalOperatorsMap.get(opId), new Filter[] {left, right}); return result; } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java index ab40b3c..20a1b9f 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java @@ -19,6 +19,7 @@ package org.apache.hawq.pxf.plugins.hive; * under the License. */ +import org.apache.hawq.pxf.api.BasicFilter; import org.apache.hawq.pxf.api.FilterParser; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; @@ -219,7 +220,7 @@ public class HiveAccessor extends HdfsSplittableDataAccessor { private boolean testOneFilter(List<HivePartition> partitionFields, Object filter, InputData input) { // Let's look first at the filter - FilterParser.BasicFilter bFilter = (FilterParser.BasicFilter) filter; + BasicFilter bFilter = (BasicFilter) filter; boolean isFilterOperationEqual = (bFilter.getOperation() == FilterParser.Operation.HDOP_EQ); if (!isFilterOperationEqual) /* @@ -253,7 +254,7 @@ public class HiveAccessor extends HdfsSplittableDataAccessor { } private void printOneBasicFilter(Object filter) { - FilterParser.BasicFilter bFilter = (FilterParser.BasicFilter) filter; + BasicFilter bFilter = (BasicFilter) filter; boolean isOperationEqual = (bFilter.getOperation() == FilterParser.Operation.HDOP_EQ); int columnIndex = bFilter.getColumn().index(); String value = bFilter.getConstant().constant().toString(); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveDataFragmenter.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveDataFragmenter.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveDataFragmenter.java index 2fe31cd..8446905 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveDataFragmenter.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveDataFragmenter.java @@ -46,6 +46,7 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hawq.pxf.api.BasicFilter; import org.apache.hawq.pxf.api.FilterParser; import org.apache.hawq.pxf.api.Fragment; import org.apache.hawq.pxf.api.Fragmenter; @@ -433,7 +434,7 @@ public class HiveDataFragmenter extends Fragmenter { throws Exception { // Let's look first at the filter - FilterParser.BasicFilter bFilter = (FilterParser.BasicFilter) filter; + BasicFilter bFilter = (BasicFilter) filter; // In case this is not an "equality filter", we ignore this filter (no // add to filter list) http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java index da20f74..bd82a3b 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java @@ -20,17 +20,20 @@ package org.apache.hawq.pxf.plugins.hive; */ +import org.apache.hawq.pxf.api.BasicFilter; import org.apache.hawq.pxf.api.FilterParser; +import org.apache.hawq.pxf.api.LogicalFilter; import org.apache.hawq.pxf.api.utilities.InputData; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; /** * Uses the filter parser code to build a filter object, either simple - a - * single {@link org.apache.hawq.pxf.api.FilterParser.BasicFilter} object or a + * single {@link BasicFilter} object or a * compound - a {@link java.util.List} of - * {@link org.apache.hawq.pxf.api.FilterParser.BasicFilter} objects. + * {@link BasicFilter} objects. * {@link org.apache.hawq.pxf.plugins.hive.HiveAccessor} will use the filter for * partition filtering. */ @@ -47,13 +50,13 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { } /** - * Translates a filterString into a {@link org.apache.hawq.pxf.api.FilterParser.BasicFilter} or a + * Translates a filterString into a {@link BasicFilter} or a * list of such filters. * * @param filterString the string representation of the filter - * @return a single {@link org.apache.hawq.pxf.api.FilterParser.BasicFilter} + * @return a single {@link BasicFilter} * object or a {@link java.util.List} of - * {@link org.apache.hawq.pxf.api.FilterParser.BasicFilter} objects. + * {@link BasicFilter} objects. * @throws Exception if parsing the filter failed or filter is not a basic * filter or list of basic filters */ @@ -61,7 +64,7 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { FilterParser parser = new FilterParser(this); Object result = parser.parse(filterString); - if (!(result instanceof FilterParser.BasicFilter) + if (!(result instanceof LogicalFilter) && !(result instanceof BasicFilter) && !(result instanceof List)) { throw new Exception("String " + filterString + " resolved to no filter"); @@ -71,33 +74,19 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { } @Override + public Object build(FilterParser.LogicalOperation op, Object leftOperand, Object rightOperand) { + return handleLogicalOperation(op, leftOperand, rightOperand); + } + + @Override + public Object build(FilterParser.LogicalOperation op, Object filter) { + return handleLogicalOperation(op, filter); + } + + @Override @SuppressWarnings("unchecked") public Object build(FilterParser.Operation opId, Object leftOperand, Object rightOperand) throws Exception { - if (leftOperand instanceof FilterParser.BasicFilter - || leftOperand instanceof List) { - if (opId != FilterParser.Operation.HDOP_AND - || !(rightOperand instanceof FilterParser.BasicFilter)) { - throw new Exception( - "Only AND is allowed between compound expressions"); - } - - if (leftOperand instanceof List) { - return handleCompoundOperations( - (List<FilterParser.BasicFilter>) leftOperand, - (FilterParser.BasicFilter) rightOperand); - } else { - return handleCompoundOperations( - (FilterParser.BasicFilter) leftOperand, - (FilterParser.BasicFilter) rightOperand); - } - } - - if (!(rightOperand instanceof FilterParser.Constant)) { - throw new Exception( - "expressions of column-op-column are not supported"); - } - // Assume column is on the left return handleSimpleOperations(opId, (FilterParser.ColumnIndex) leftOperand, @@ -108,10 +97,10 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { * Handles simple column-operator-constant expressions Creates a special * filter in the case the column is the row key column */ - private FilterParser.BasicFilter handleSimpleOperations(FilterParser.Operation opId, - FilterParser.ColumnIndex column, - FilterParser.Constant constant) { - return new FilterParser.BasicFilter(opId, column, constant); + private BasicFilter handleSimpleOperations(FilterParser.Operation opId, + FilterParser.ColumnIndex column, + FilterParser.Constant constant) { + return new BasicFilter(opId, column, constant); } /** @@ -131,19 +120,32 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { * @param right right hand filter * @return list of filters constructing the filter tree */ - private List<FilterParser.BasicFilter> handleCompoundOperations(List<FilterParser.BasicFilter> left, - FilterParser.BasicFilter right) { + private List<BasicFilter> handleCompoundOperations(List<BasicFilter> left, + BasicFilter right) { left.add(right); return left; } - private List<FilterParser.BasicFilter> handleCompoundOperations(FilterParser.BasicFilter left, - FilterParser.BasicFilter right) { - List<FilterParser.BasicFilter> result = new LinkedList<FilterParser.BasicFilter>(); + private List<BasicFilter> handleCompoundOperations(BasicFilter left, + BasicFilter right) { + List<BasicFilter> result = new LinkedList<BasicFilter>(); result.add(left); result.add(right); return result; } + + private Object handleLogicalOperation(FilterParser.LogicalOperation operator, Object leftOperand, Object rightOperand) { + + List<Object> result = new LinkedList<>(); + + result.add(leftOperand); + result.add(rightOperand); + return new LogicalFilter(operator, result); + } + + private Object handleLogicalOperation(FilterParser.LogicalOperation operator, Object filter) { + return new LogicalFilter(operator, Arrays.asList(filter)); + } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java index 23fc66e..195382a 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java @@ -23,11 +23,13 @@ package org.apache.hawq.pxf.plugins.hive; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; -import org.apache.hawq.pxf.api.FilterParser; +import org.apache.hawq.pxf.api.BasicFilter; +import org.apache.hawq.pxf.api.LogicalFilter; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; import org.apache.commons.lang.StringUtils; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import static org.apache.hawq.pxf.plugins.hive.HiveInputFormatFragmenter.PXF_HIVE_SERDES; @@ -97,24 +99,37 @@ public class HiveORCAccessor extends HiveAccessor { String filterStr = inputData.getFilterString(); HiveFilterBuilder eval = new HiveFilterBuilder(inputData); Object filter = eval.getFilterObject(filterStr); - SearchArgument.Builder filterBuilder = SearchArgumentFactory.newBuilder(); - filterBuilder.startAnd(); - if (filter instanceof List) { - for (Object f : (List<?>) filter) { - buildArgument(filterBuilder, f); - } - } else { - buildArgument(filterBuilder, filter); - } - filterBuilder.end(); + buildExpression(filterBuilder, Arrays.asList(filter)); SearchArgument sarg = filterBuilder.build(); jobConf.set(SARG_PUSHDOWN, sarg.toKryo()); } + private void buildExpression(SearchArgument.Builder builder, List<Object> filterList) { + for (Object f : filterList) { + if (f instanceof LogicalFilter) { + switch(((LogicalFilter) f).getOperator()) { + case HDOP_OR: + builder.startOr(); + break; + case HDOP_AND: + builder.startAnd(); + break; + case HDOP_NOT: + builder.startNot(); + break; + } + buildExpression(builder, ((LogicalFilter) f).getFilterList()); + builder.end(); + } else { + buildArgument(builder, f); + } + } + } + private void buildArgument(SearchArgument.Builder builder, Object filterObj) { /* The below functions will not be compatible and requires update with Hive 2.0 APIs */ - FilterParser.BasicFilter filter = (FilterParser.BasicFilter) filterObj; + BasicFilter filter = (BasicFilter) filterObj; int filterColumnIndex = filter.getColumn().index(); Object filterValue = filter.getConstant().constant(); ColumnDescriptor filterColumn = inputData.getColumn(filterColumnIndex); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java index bfbfaa4..e0e6536 100755 --- a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java @@ -20,11 +20,12 @@ package org.apache.hawq.pxf.plugins.hive; */ +import org.apache.hawq.pxf.api.FilterParser.LogicalOperation; +import org.apache.hawq.pxf.api.LogicalFilter; import org.junit.Test; -import java.util.List; +import org.apache.hawq.pxf.api.BasicFilter; -import static org.apache.hawq.pxf.api.FilterParser.BasicFilter; import static org.apache.hawq.pxf.api.FilterParser.Operation; import static org.apache.hawq.pxf.api.FilterParser.Operation.*; import static org.junit.Assert.assertEquals; @@ -33,18 +34,33 @@ public class HiveFilterBuilderTest { @Test public void parseFilterWithThreeOperations() throws Exception { HiveFilterBuilder builder = new HiveFilterBuilder(null); - String[] consts = new String[] {"first", "2", "3"}; - Operation[] ops = new Operation[] {HDOP_EQ, HDOP_GT, HDOP_LT}; - int[] idx = new int[] {1, 2, 3}; - - @SuppressWarnings("unchecked") - List<BasicFilter> filterList = (List) builder.getFilterObject("a1c\"first\"o5a2c2o2o7a3c3o1o7"); - assertEquals(consts.length, filterList.size()); - for (int i = 0; i < filterList.size(); i++) { - BasicFilter filter = filterList.get(i); - assertEquals(filter.getConstant().constant().toString(), consts[i]); - assertEquals(filter.getOperation(), ops[i]); - assertEquals(filter.getColumn().index(), idx[i]); - } + String[] consts = new String[] {"first", "2"}; + Operation[] ops = new Operation[] {HDOP_EQ, HDOP_GT}; + int[] idx = new int[] {1, 2}; + + LogicalFilter filterList = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0"); + assertEquals(LogicalOperation.HDOP_AND, filterList.getOperator()); + BasicFilter leftOperand = (BasicFilter) filterList.getFilterList().get(0); + assertEquals(consts[0], leftOperand.getConstant().constant()); + assertEquals(idx[0], leftOperand.getColumn().index()); + assertEquals(ops[0], leftOperand.getOperation()); + } + + @Test + public void parseFilterWithLogicalOperation() throws Exception { + HiveFilterBuilder builder = new HiveFilterBuilder(null); + LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0"); + assertEquals(LogicalOperation.HDOP_AND, filter.getOperator()); + assertEquals(2, filter.getFilterList().size()); } + + @Test + public void parseNestedExpressionWithLogicalOperation() throws Exception { + HiveFilterBuilder builder = new HiveFilterBuilder(null); + LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0a1c1o1l1"); + assertEquals(LogicalOperation.HDOP_OR, filter.getOperator()); + assertEquals(LogicalOperation.HDOP_AND, ((LogicalFilter) filter.getFilterList().get(0)).getOperator()); + assertEquals(HDOP_LT, ((BasicFilter) filter.getFilterList().get(1)).getOperation()); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/726be6cc/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentExample.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentExample.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentExample.java new file mode 100644 index 0000000..a520b94 --- /dev/null +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentExample.java @@ -0,0 +1,84 @@ +package org.apache.hawq.pxf.plugins.hive; + +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hawq.pxf.api.BasicFilter; +import org.apache.hawq.pxf.api.LogicalFilter; +import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class HiveORCSearchArgumentExample { + + @Test + public void buildLogicalOperationTree() throws Exception { + + /* Predicate pushdown configuration */ + String filterStr = "a2c1o2a3c3o3l0a4c5o1l1"; + HiveFilterBuilder eval = new HiveFilterBuilder(null); + Object filter = eval.getFilterObject(filterStr); + + Object current = filter; + SearchArgument.Builder filterBuilder = SearchArgumentFactory.newBuilder(); + buildExpression(filterBuilder, Arrays.asList(filter)); + SearchArgument sarg = filterBuilder.build(); + Assert.assertEquals("and(or(lt(col1, 5), not(lteq(col1, 1))), or(lt(col1, 5), lteq(col1, 3)))", sarg.toFilterPredicate().toString()); + } + + private void buildExpression(SearchArgument.Builder builder, List<Object> filterList) { + for (Object f : filterList) { + if (f instanceof LogicalFilter) { + switch(((LogicalFilter) f).getOperator()) { + case HDOP_OR: + builder.startOr(); + break; + case HDOP_AND: + builder.startAnd(); + break; + case HDOP_NOT: + builder.startNot(); + break; + } + buildExpression(builder, ((LogicalFilter) f).getFilterList()); + builder.end(); + } else { + buildArgument(builder, f); + } + } + } + + private void buildArgument(SearchArgument.Builder builder, Object filterObj) { + /* The below functions will not be compatible and requires update with Hive 2.0 APIs */ + BasicFilter filter = (BasicFilter) filterObj; + int filterColumnIndex = filter.getColumn().index(); + Object filterValue = filter.getConstant().constant(); + Integer[] arr = {}; + ColumnDescriptor filterColumn = new ColumnDescriptor("col1", 1,1, "Integer", arr); + String filterColumnName = filterColumn.columnName(); + + switch(filter.getOperation()) { + case HDOP_LT: + builder.lessThan(filterColumnName, filterValue); + break; + case HDOP_GT: + builder.startNot().lessThanEquals(filterColumnName, filterValue).end(); + break; + case HDOP_LE: + builder.lessThanEquals(filterColumnName, filterValue); + break; + case HDOP_GE: + builder.startNot().lessThanEquals(filterColumnName, filterValue).end(); + break; + case HDOP_EQ: + builder.equals(filterColumnName, filterValue); + break; + case HDOP_NE: + builder.startNot().equals(filterColumnName, filterValue).end(); + break; + } + return; + } +}
