Repository: incubator-hawq Updated Branches: refs/heads/master c7518832f -> fd350101a
HAWQ-963. PXF support for IS_NULL and IS_NOT_NULL filters (close #974) Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/fd350101 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/fd350101 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/fd350101 Branch: refs/heads/master Commit: fd350101a612a9c38c83573fff9cc5d7843a0cf6 Parents: c751883 Author: Alexander Denissov <[email protected]> Authored: Tue Oct 25 11:53:44 2016 -0700 Committer: Alexander Denissov <[email protected]> Committed: Tue Oct 25 14:16:04 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hawq/pxf/api/FilterParser.java | 59 ++++++---- .../apache/hawq/pxf/api/FilterParserTest.java | 17 +++ .../pxf/plugins/hbase/HBaseFilterBuilder.java | 28 +++++ .../plugins/hbase/HBaseFilterBuilderTest.java | 55 ++++++++++ .../pxf/plugins/hive/HiveFilterBuilder.java | 10 ++ .../hawq/pxf/plugins/hive/HiveORCAccessor.java | 9 +- .../pxf/plugins/hive/HiveFilterBuilderTest.java | 18 ++++ .../pxf/plugins/hive/HiveORCAccessorTest.java | 108 +++++++++++++++++++ 8 files changed, 282 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java index ec3eddc..e362eed 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java @@ -33,7 +33,7 @@ import java.util.Stack; * interface with two pop-ed operands. * <br> * The filter string is of the pattern: - * <attcode><attnum><constcode><constval><constsizecode><constsize><constdata><constvalue><opercode><opernum> + * [attcode][attnum][constcode][constval][constsizecode][constsize][constdata][constvalue][opercode][opernum] * <br> * A sample string of filters looks like this: * <code>a2c23s1d5o1a1c25s3dabco2o7</code> @@ -81,7 +81,9 @@ public class FilterParser { HDOP_GE, HDOP_EQ, HDOP_NE, - HDOP_LIKE + HDOP_LIKE, + HDOP_IS_NULL, + HDOP_IS_NOT_NULL } /** @@ -101,7 +103,7 @@ public class FilterParser { */ public interface FilterBuilder { /** - * Builds the filter for an operation + * Builds the filter for an operation with 2 operands * * @param operation the parsed operation to perform * @param left the left operand @@ -112,6 +114,16 @@ public class FilterParser { public Object build(Operation operation, Object left, Object right) throws Exception; /** + * Builds the filter for an operation with one operand + * + * @param operation the parsed operation to perform + * @param operand the single operand + * @return the built filter + * @throws Exception if building the filter failed + */ + public Object build(Operation operation, Object operand) throws Exception; + + /** * Builds the filter for a logical operation and two operands * * @param operation the parsed logical operation to perform @@ -217,24 +229,29 @@ public class FilterParser { } Object rightOperand = operandsStack.pop(); - // Pop left operand - if (operandsStack.empty()) { - throw new FilterStringSyntaxException("missing operands for op " + operation + " at " + index); - } - Object leftOperand = operandsStack.pop(); - - if (leftOperand instanceof BasicFilter || rightOperand instanceof BasicFilter) { - throw new FilterStringSyntaxException("missing logical operator before op " + operation + " at " + index); + // all operations other than null checks require 2 operands + Object result; + if (operation == Operation.HDOP_IS_NULL || operation == Operation.HDOP_IS_NOT_NULL) { + result = filterBuilder.build(operation, rightOperand); + } else { + // Pop left operand + if (operandsStack.empty()) { + throw new FilterStringSyntaxException("missing operands for op " + operation + " at " + index); + } + Object leftOperand = operandsStack.pop(); + + if (leftOperand instanceof BasicFilter || rightOperand instanceof BasicFilter) { + throw new FilterStringSyntaxException("missing logical operator before op " + operation + " at " + index); + } + + // Normalize order, evaluate + // Column should be on the left + result = (leftOperand instanceof Constant) + // column on the right, reverse expression + ? filterBuilder.build(reverseOp(operation), rightOperand, leftOperand) + // no swap, column on the left + : filterBuilder.build(operation, leftOperand, rightOperand); } - - // Normalize order, evaluate - // Column should be on the left - Object result = (leftOperand instanceof Constant) - // column on the right, reverse expression - ? filterBuilder.build(reverseOp(operation), rightOperand, leftOperand) - // no swap, column on the left - : filterBuilder.build(operation, leftOperand, rightOperand); - // Store result on stack operandsStack.push(result); break; @@ -252,7 +269,7 @@ public class FilterParser { result = filterBuilder.build(logicalOperation, exp); } else if (logicalOperation == LogicalOperation.HDOP_AND || logicalOperation == LogicalOperation.HDOP_OR){ rightOperand = operandsStack.pop(); - leftOperand = operandsStack.pop(); + Object leftOperand = operandsStack.pop(); result = filterBuilder.build(logicalOperation, leftOperand, rightOperand); } else { http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java index 9cbe7a8..46f60f1 100644 --- a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java +++ b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java @@ -263,6 +263,15 @@ public class FilterParserTest { filter = "a1c20s1d1o7"; op = Operation.HDOP_LIKE; runParseOneOperation("this filter was built from HDOP_LIKE", filter, op); + + filter = "a1o8"; + op = Operation.HDOP_IS_NULL; + runParseOneUnaryOperation("this filter was build from HDOP_IS_NULL", filter, op); + + filter = "a1o9"; + op = Operation.HDOP_IS_NOT_NULL; + runParseOneUnaryOperation("this filter was build from HDOP_IS_NULL", filter, op); + } @Test @@ -456,6 +465,14 @@ public class FilterParserTest { assertEquals(description, result); } + private void runParseOneUnaryOperation(String description, String filter, Operation op) throws Exception { + when(filterBuilder.build(eq(op), any())).thenReturn(description); + + Object result = filterParser.parse(filter.getBytes()); + + assertEquals(description, result); + } + private String filterStringMsg(String filter) { return " (filter string: '" + filter + "')"; } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java index 07e4488..3d61c4a 100644 --- a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java +++ b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java @@ -157,6 +157,11 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { } @Override + public Object build(FilterParser.Operation operation, Object operand) throws Exception { + return handleSimpleOperations(operation, (FilterParser.ColumnIndex) operand); + } + + @Override public Object build(FilterParser.LogicalOperation opId, Object leftOperand, Object rightOperand) { return handleCompoundOperations(opId, (Filter) leftOperand, (Filter) rightOperand); } @@ -185,6 +190,29 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { logicalOperatorsMap.put(FilterParser.LogicalOperation.HDOP_OR, FilterList.Operator.MUST_PASS_ONE); } + private Object handleSimpleOperations(FilterParser.Operation opId, + FilterParser.ColumnIndex column) throws Exception { + HBaseColumnDescriptor hbaseColumn = tupleDescription.getColumn(column.index()); + CompareFilter.CompareOp compareOperation; + ByteArrayComparable comparator; + switch (opId) { + case HDOP_IS_NULL: + compareOperation = CompareFilter.CompareOp.EQUAL; + comparator = new NullComparator(); + break; + case HDOP_IS_NOT_NULL: + compareOperation = CompareFilter.CompareOp.NOT_EQUAL; + comparator = new NullComparator(); + break; + default: + throw new Exception("unsupported unary operation for filtering " + opId); + } + return new SingleColumnValueFilter(hbaseColumn.columnFamilyBytes(), + hbaseColumn.qualifierBytes(), + compareOperation, + comparator); + } + /** * Handles simple column-operator-constant expressions. * Creates a special filter in the case the column is the row key column. http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java b/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java index 93d8cbc..f8867a8 100644 --- a/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java +++ b/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java @@ -19,11 +19,18 @@ package org.apache.hawq.pxf.plugins.hbase; * under the License. */ +import org.apache.hadoop.hbase.filter.CompareFilter; +import org.apache.hadoop.hbase.filter.NullComparator; +import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; +import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseColumnDescriptor; +import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseTupleDescription; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class HBaseFilterBuilderTest { @Rule @@ -56,4 +63,52 @@ public class HBaseFilterBuilderTest { assertNull(builder.getFilterObject(null)); } + @Test + public void parseISNULLExpression() throws Exception { + String filter = "a1o8"; + HBaseTupleDescription desc = mock(HBaseTupleDescription.class); + HBaseColumnDescriptor column = mock(HBaseColumnDescriptor.class); + when(desc.getColumn(1)).thenReturn(column); + + byte[] family = new byte[]{}; + byte[] qualifier = new byte[]{}; + + when(column.columnFamilyBytes()).thenReturn(family); + when(column.qualifierBytes()).thenReturn(qualifier); + + HBaseFilterBuilder builder = new HBaseFilterBuilder(desc); + SingleColumnValueFilter result = (SingleColumnValueFilter) builder.getFilterObject(filter); + + assertNotNull(result); + assertSame(family, result.getFamily()); + assertSame(qualifier, result.getQualifier()); + assertEquals(CompareFilter.CompareOp.EQUAL, result.getOperator()); + assertTrue(result.getComparator() instanceof NullComparator); + + } + + @Test + public void parseISNOTNULLExpression() throws Exception { + String filter = "a1o9"; + HBaseTupleDescription desc = mock(HBaseTupleDescription.class); + HBaseColumnDescriptor column = mock(HBaseColumnDescriptor.class); + when(desc.getColumn(1)).thenReturn(column); + + byte[] family = new byte[]{}; + byte[] qualifier = new byte[]{}; + + when(column.columnFamilyBytes()).thenReturn(family); + when(column.qualifierBytes()).thenReturn(qualifier); + + HBaseFilterBuilder builder = new HBaseFilterBuilder(desc); + SingleColumnValueFilter result = (SingleColumnValueFilter) builder.getFilterObject(filter); + + assertNotNull(result); + assertSame(family, result.getFamily()); + assertSame(qualifier, result.getQualifier()); + assertEquals(CompareFilter.CompareOp.NOT_EQUAL, result.getOperator()); + assertTrue(result.getComparator() instanceof NullComparator); + + } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java index 938be09..32de276 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java @@ -96,6 +96,16 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { (FilterParser.Constant) rightOperand); } + @Override + public Object build(FilterParser.Operation operation, Object operand) throws Exception { + if (operation == FilterParser.Operation.HDOP_IS_NULL || operation == FilterParser.Operation.HDOP_IS_NOT_NULL) { + // use null for the constant value of null comparison + return handleSimpleOperations(operation, (FilterParser.ColumnIndex) operand, null); + } else { + throw new Exception("Unsupported unary operation " + operation); + } + } + /* * Handles simple column-operator-constant expressions Creates a special * filter in the case the column is the row key column http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java index 9f588b4..9d79f97 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java @@ -161,7 +161,8 @@ public class HiveORCAccessor extends HiveAccessor { /* The below functions will not be compatible and requires update with Hive 2.0 APIs */ BasicFilter filter = (BasicFilter) filterObj; int filterColumnIndex = filter.getColumn().index(); - Object filterValue = filter.getConstant().constant(); + // filter value might be null for unary operations + Object filterValue = filter.getConstant() == null ? null : filter.getConstant().constant(); ColumnDescriptor filterColumn = inputData.getColumn(filterColumnIndex); String filterColumnName = filterColumn.columnName(); @@ -188,6 +189,12 @@ public class HiveORCAccessor extends HiveAccessor { case HDOP_NE: builder.startNot().equals(filterColumnName, filterValue).end(); break; + case HDOP_IS_NULL: + builder.isNull(filterColumnName); + break; + case HDOP_IS_NOT_NULL: + builder.startNot().isNull(filterColumnName).end(); + break; default: { LOG.debug("Filter push-down is not supported for " + filter.getOperation() + "operation."); return false; http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java index d3e5402..b75d88e 100755 --- a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java @@ -71,4 +71,22 @@ public class HiveFilterBuilderTest { assertEquals(HDOP_LT, ((BasicFilter) filter.getFilterList().get(1)).getOperation()); } + @Test + public void parseISNULLExpression() throws Exception { + HiveFilterBuilder builder = new HiveFilterBuilder(null); + BasicFilter filter = (BasicFilter) builder.getFilterObject("a1o8"); + assertEquals(Operation.HDOP_IS_NULL, filter.getOperation()); + assertEquals(1, filter.getColumn().index()); + assertNull(filter.getConstant()); + } + + @Test + public void parseISNOTNULLExpression() throws Exception { + HiveFilterBuilder builder = new HiveFilterBuilder(null); + BasicFilter filter = (BasicFilter) builder.getFilterObject("a1o9"); + assertEquals(Operation.HDOP_IS_NOT_NULL, filter.getOperation()); + assertEquals(1, filter.getColumn().index()); + assertNull(filter.getConstant()); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/fd350101/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java new file mode 100644 index 0000000..2b2d969 --- /dev/null +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessorTest.java @@ -0,0 +1,108 @@ +package org.apache.hawq.pxf.plugins.hive; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; +import org.apache.hawq.pxf.api.utilities.InputData; +import org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilities; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.core.classloader.annotations.SuppressStaticInitializationFor; +import org.powermock.modules.junit4.PowerMockRunner; + +import static org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.SARG_PUSHDOWN; +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + + +@RunWith(PowerMockRunner.class) +@PrepareForTest({HiveORCAccessor.class, HiveInputFormatFragmenter.class, HdfsUtilities.class, HiveDataFragmenter.class}) +@SuppressStaticInitializationFor({"org.apache.hadoop.mapred.JobConf", + "org.apache.hadoop.hive.metastore.api.MetaException", + "org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities"}) // Prevents static inits +public class HiveORCAccessorTest { + + @Mock InputData inputData; + @Mock OrcInputFormat orcInputFormat; + @Mock InputFormat inputFormat; + @Mock ColumnDescriptor columnDesc; + JobConf jobConf; + HiveORCAccessor accessor; + + @Before + public void setup() throws Exception { + jobConf = new JobConf(); + PowerMockito.whenNew(JobConf.class).withAnyArguments().thenReturn(jobConf); + + PowerMockito.mockStatic(HiveInputFormatFragmenter.class); + PowerMockito.when(HiveInputFormatFragmenter.parseToks(any(), any())).thenReturn(new String[]{"", HiveDataFragmenter.HIVE_NO_PART_TBL, "true"}); + PowerMockito.mockStatic(HdfsUtilities.class); + + PowerMockito.mockStatic(HiveDataFragmenter.class); + PowerMockito.when(HiveDataFragmenter.makeInputFormat(any(), any())).thenReturn(inputFormat); + + PowerMockito.whenNew(OrcInputFormat.class).withNoArguments().thenReturn(orcInputFormat); + RecordReader recordReader = mock(RecordReader.class); + PowerMockito.when(orcInputFormat.getRecordReader(any(), any(), any())).thenReturn(recordReader); + + accessor = new HiveORCAccessor(inputData); + } + + @Test + public void parseFilterWithISNULL() throws Exception { + + when(inputData.hasFilter()).thenReturn(true); + when(inputData.getFilterString()).thenReturn("a1o8"); + when(columnDesc.columnName()).thenReturn("FOO"); + when(inputData.getColumn(1)).thenReturn(columnDesc); + + accessor.openForRead(); + + SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO").end().build(); + assertEquals(sarg.toKryo(), jobConf.get(SARG_PUSHDOWN)); + } + + @Test + public void parseFilterWithISNOTNULL() throws Exception { + + when(inputData.hasFilter()).thenReturn(true); + when(inputData.getFilterString()).thenReturn("a1o9"); + when(columnDesc.columnName()).thenReturn("FOO"); + when(inputData.getColumn(1)).thenReturn(columnDesc); + + accessor.openForRead(); + + SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO").end().end().build(); + assertEquals(sarg.toKryo(), jobConf.get(SARG_PUSHDOWN)); + } + +} \ No newline at end of file
