Repository: incubator-hawq Updated Branches: refs/heads/master 3b51c2084 -> 402e08da6
HAWQ-1100. Support for new filter string - constant data type is now of the form "c<DATATYPE>s<LENGTH>d<DATA>" Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/402e08da Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/402e08da Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/402e08da Branch: refs/heads/master Commit: 402e08da670cf5d00b5c91ce14a1e16ec66eca62 Parents: 3b51c20 Author: Kavinder Dhaliwal <[email protected]> Authored: Thu Oct 13 15:53:39 2016 -0700 Committer: Kavinder Dhaliwal <[email protected]> Committed: Mon Oct 24 12:15:55 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hawq/pxf/api/FilterParser.java | 197 +++++++++++++------ .../apache/hawq/pxf/api/FilterParserTest.java | 144 +++++++++----- .../pxf/plugins/hbase/HBaseFilterBuilder.java | 29 ++- .../hbase/utilities/HBaseDoubleComparator.java | 66 +++++++ .../hbase/utilities/HBaseFloatComparator.java | 65 ++++++ .../plugins/hbase/HBaseFilterBuilderTest.java | 8 +- .../hawq/pxf/plugins/hive/HiveAccessor.java | 16 +- .../pxf/plugins/hive/HiveFilterBuilder.java | 5 +- .../hawq/pxf/plugins/hive/HiveORCAccessor.java | 7 + .../pxf/plugins/hive/HiveFilterBuilderTest.java | 14 +- .../plugins/hive/HiveORCSearchArgumentTest.java | 84 ++++++++ 11 files changed, 513 insertions(+), 122 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java index 80ab198..ec3eddc 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/FilterParser.java @@ -20,8 +20,11 @@ package org.apache.hawq.pxf.api; */ -import java.util.HashMap; -import java.util.Map; +import org.apache.hawq.pxf.api.io.DataType; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; import java.util.Stack; /** @@ -29,27 +32,45 @@ import java.util.Stack; * Once an operation is read, the evaluate function is called for the {@link FilterBuilder} * interface with two pop-ed operands. * <br> - * A string of filters looks like this: - * <code>a2c5o1a1c"abc"o2o7</code> + * The filter string is of the pattern: + * <attcode><attnum><constcode><constval><constsizecode><constsize><constdata><constvalue><opercode><opernum> + * <br> + * A sample string of filters looks like this: + * <code>a2c23s1d5o1a1c25s3dabco2o7</code> * which means {@code column#2 < 5 AND column#1 > "abc"} * <br> * It is a RPN serialized representation of a filters tree in GPDB where * <ul> * <li> a means an attribute (column)</li> - * <li>c means a constant (either string or numeric)</li> + * <li>c means a constant followed by the datatype oid</li> + * <li>s means the length of the data in bytes</li> + * <li>d denotes the start of the constant data</li> * <li>o means operator</li> * </ul> - * - * Assuming all operators are binary, RPN representation allows it to be read left to right easily. + * <br> + * For constants all three portions are required in order to parse the data type, the length of the data in bytes + * and the data itself + * <br> + * The parsing operation parses each element of the filter (constants, columns, operations) and adds them to a stack. + * When the parser sees an op code 'o' or 'l' it pops off two elements from the stack assigns them as children of the op + * and pushses itself onto the stack. After parsing is complete there should only be one element in the stack, the root + * node of the filter's tree representation which is returned from this method * <br> * FilterParser only knows about columns and constants. The rest is up to the {@link FilterBuilder} implementer. * FilterParser makes sure column objects are always on the left of the expression (when relevant). */ public class FilterParser { private int index; - private String filterString; + private byte[] filterByteArr; private Stack<Object> operandsStack; private FilterBuilder filterBuilder; + public static final char COL_OP = 'a'; + public static final char CONST_OP = 'c'; + public static final char CONST_LEN = 's'; + public static final char CONST_DATA = 'd'; + public static final char COMP_OP = 'o'; + public static final char LOG_OP = 'l'; + public static final String DEFAULT_CHARSET = "UTF-8"; /** Supported operations by the parser. */ public enum Operation { @@ -144,7 +165,7 @@ public class FilterParser { @SuppressWarnings("serial") class FilterStringSyntaxException extends Exception { FilterStringSyntaxException(String desc) { - super(desc + " (filter string: '" + filterString + "')"); + super(desc + " (filter string: '" + new String(filterByteArr) + "')"); } } @@ -165,26 +186,25 @@ public class FilterParser { * @return the parsed filter * @throws Exception if the filter string had wrong syntax */ - public Object parse(String filter) throws Exception { + public Object parse(byte[] filter) throws Exception { index = 0; - filterString = filter; + filterByteArr = filter; int opNumber; if (filter == null) { throw new FilterStringSyntaxException("filter parsing ended with no result"); } - while (index < filterString.length()) { - char op = filterString.charAt(index); - ++index; // skip op character + while (index < filterByteArr.length) { + char op = (char) filterByteArr[index++]; switch (op) { - case 'a': + case COL_OP: operandsStack.push(new ColumnIndex(safeToInt(parseNumber()))); break; - case 'c': + case CONST_OP: operandsStack.push(new Constant(parseParameter())); break; - case 'o': + case COMP_OP: opNumber = safeToInt(parseNumber()); Operation operation = opNumber < Operation.values().length ? Operation.values()[opNumber] : null; if (operation == null) { @@ -219,7 +239,7 @@ public class FilterParser { operandsStack.push(result); break; // Handle parsing logical operator (HAWQ-964) - case 'l': + case LOG_OP: opNumber = safeToInt(parseNumber()); LogicalOperation logicalOperation = opNumber < LogicalOperation.values().length ? LogicalOperation.values()[opNumber] : null; @@ -279,25 +299,110 @@ public class FilterParser { return value.intValue(); } + private int parseConstDataType() throws Exception { + if (!Character.isDigit((char) filterByteArr[index])) { + throw new FilterStringSyntaxException("datatype OID should follow at " + index); + } + + String digits = parseDigits(); + + try { + return Integer.parseInt(digits); + } catch (NumberFormatException e) { + throw new FilterStringSyntaxException("invalid numeric argument at " + digits); + } + } + + private int parseDataLength() throws Exception { + if (((char) filterByteArr[index]) != CONST_LEN) { + throw new FilterStringSyntaxException("data length delimiter 's' expected at " + index); + } + + index++; + return parseInt(); + } + + private int parseInt() throws Exception { + if (index == filterByteArr.length) { + throw new FilterStringSyntaxException("numeric argument expected at " + index); + } + + String digits = parseDigits(); + + try { + return Integer.parseInt(digits); + } catch (NumberFormatException e) { + throw new FilterStringSyntaxException("invalid numeric argument " + digits); + } + } + + private Object convertDataType(byte[] byteData, int start, int end, DataType dataType) throws Exception { + String data = new String(byteData, start, end-start, DEFAULT_CHARSET); + try { + switch (dataType) { + case BIGINT: + return Long.parseLong(data); + case INTEGER: + case SMALLINT: + return Integer.parseInt(data); + case REAL: + return Float.parseFloat(data); + case NUMERIC: + case FLOAT8: + return Double.parseDouble(data); + case TEXT: + case VARCHAR: + case BPCHAR: + return data; + case BOOLEAN: + return Boolean.parseBoolean(data); + case DATE: + return Date.valueOf(data); + case TIMESTAMP: + return Timestamp.valueOf(data); + case TIME: + return Time.valueOf(data); + case BYTEA: + return data.getBytes(); + default: + throw new FilterStringSyntaxException("DataType " + dataType.toString() + " unsupported"); + } + } catch (NumberFormatException nfe) { + throw new FilterStringSyntaxException("failed to parse number data type starting at " + index); + } + } /** * Parses either a number or a string. */ private Object parseParameter() throws Exception { - if (index == filterString.length()) { + if (index == filterByteArr.length) { throw new FilterStringSyntaxException("argument should follow at " + index); } - return senseString() - ? parseString() - : parseNumber(); - } + DataType dataType = DataType.get(parseConstDataType()); + if (dataType == DataType.UNSUPPORTED_TYPE) { + throw new FilterStringSyntaxException("invalid DataType OID at " + (index - 1)); + } + + int dataLength = parseDataLength(); + + if (index + dataLength > filterByteArr.length) { + throw new FilterStringSyntaxException("data size larger than filter string starting at " + index); + } + + if (((char) filterByteArr[index]) != CONST_DATA) { + throw new FilterStringSyntaxException("data delimiter 'd' expected at " + index); + } - private boolean senseString() { - return filterString.charAt(index) == '"'; + index++; + + Object data = convertDataType(filterByteArr, index, index+dataLength, dataType); + index += dataLength; + return data; } private Long parseNumber() throws Exception { - if (index == filterString.length()) { + if (index == filterByteArr.length) { throw new FilterStringSyntaxException("numeric argument expected at " + index); } @@ -318,17 +423,17 @@ public class FilterParser { private String parseDigits() throws Exception { String result; int i = index; - int filterLength = filterString.length(); + int filterLength = filterByteArr.length; // allow sign if (filterLength > 0) { - int chr = filterString.charAt(i); + char chr = (char) filterByteArr[i]; if (chr == '-' || chr == '+') { ++i; } } for (; i < filterLength; ++i) { - int chr = filterString.charAt(i); + char chr = (char) filterByteArr[i]; if (chr < '0' || chr > '9') { break; } @@ -338,42 +443,12 @@ public class FilterParser { throw new FilterStringSyntaxException("numeric argument expected at " + index); } - result = filterString.substring(index, i); + result = new String(filterByteArr, index, i - index); index = i; return result; } /* - * Parses a string after its beginning '"' until its ending '"' - * advances the index accordingly - * - * Currently the string cannot contain '"' itself - * TODO add support for '"' inside the string - */ - private String parseString() throws Exception { - StringBuilder result = new StringBuilder(); - boolean ended = false; - int i; - - // starting from index + 1 to skip leading " - for (i = index + 1; i < filterString.length(); ++i) { - char chr = filterString.charAt(i); - if (chr == '"') { - ended = true; - break; - } - result.append(chr); - } - - if (!ended) { - throw new FilterStringSyntaxException("string started at " + index + " not ended with \""); - } - - index = i + 1; // +1 to skip ending " - return result.toString(); - } - - /* * The function takes an operator and reverses it * e.g. > turns into < */ @@ -397,4 +472,4 @@ public class FilterParser { return operation; } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java index 53f37a0..9cbe7a8 100644 --- a/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java +++ b/pxf/pxf-api/src/test/java/org/apache/hawq/pxf/api/FilterParserTest.java @@ -32,7 +32,6 @@ import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; @@ -54,12 +53,6 @@ public class FilterParserTest { } @Test - public void parseNegativeNull() { - filter = null; - runParseNegative("null string", null, "filter parsing ended with no result"); - } - - @Test public void parseNegativeEmpty() { filter = ""; runParseNegative("empty string", filter, "filter parsing ended with no result"); @@ -112,18 +105,44 @@ public class FilterParserTest { @Test public void parseNegativeBadConst() { - filter = "c"; + filter = "cs"; int index = 1; - exception = "argument should follow at " + index; - runParseNegative("const operand with no value", filter, exception); + exception = "datatype OID should follow at " + index; + runParseNegative("const operand with no datatype", filter, exception); - filter = "cyan"; - exception = "numeric argument expected at " + index; - runParseNegative("const operand with illegal value", filter, exception); + filter = "c5"; + exception = "invalid DataType OID at " + index; + runParseNegative("const operand with invalid datatype oid", filter, exception); - filter = "c\"and that's it"; - exception = "string started at " + index + " not ended with \""; - runParseNegative("string without closing \"", filter, exception); + filter = "c20x"; + index = 3; + exception = "data length delimiter 's' expected at " + index; + runParseNegative("const operand with missing 's' delimiter", filter, exception); + + filter = "c20sd"; + index = 4; + exception = "numeric argument expected at " + index; + runParseNegative("const operand with missing numeric data length", filter, exception); + + filter = "c20s1500"; + index = 8; + exception = "data size larger than filter string starting at " + index; + runParseNegative("const operand with missing 'd' delimiter", filter, exception); + + filter = "c20s1x"; + index = 5; + exception = "data delimiter 'd' expected at " + index; + runParseNegative("const operand with missing 'd' delimiter", filter, exception); + + filter = "c20s5d"; + index = 5; + exception = "data size larger than filter string starting at " + index; + runParseNegative("const operand with missing data", filter, exception); + + filter = "c20s3ds9r"; + index = 6; + exception = "failed to parse number data type starting at " + index; + runParseNegative("const operand with an invalid value", filter, exception); } @Test @@ -143,32 +162,57 @@ public class FilterParserTest { runParseNegative("operation with out of bounds number", filter, exception); } - @Test public void parseNegativeNoOperator() { filter = "a1234567890"; runParseNegative("filter with only column", filter, "filter parsing failed, missing operators?"); - filter = "c1"; + filter = "c20s1d1"; runParseNegative("filter with only numeric const", filter, "filter parsing failed, missing operators?"); + } - filter = "c\"something in the way\""; - runParseNegative("filter with only string const", filter, "filter parsing failed, missing operators?"); + @Test + public void parseEmptyString() { + filter = "c25s0d"; + exception = "filter parsing failed, missing operators?"; + runParseNegative("const operand with empty string", filter, exception); + } + + @Test + public void parseDecimalValues() { + filter = "c700s3d9.0"; + exception = "filter parsing failed, missing operators?"; + runParseNegative("const operand with decimal value", filter, exception); + + filter = "c701s7d10.0001"; + exception = "filter parsing failed, missing operators?"; + runParseNegative("const operand with decimal value", filter, exception); + } + + @Test + public void parseNegativeValues() { + filter = "c700s3d-90"; + exception = "filter parsing failed, missing operators?"; + runParseNegative("const operand with decimal value", filter, exception); + + filter = "c701s8d-10.0001"; + exception = "filter parsing failed, missing operators?"; + runParseNegative("const operand with decimal value", filter, exception); } @Test public void parseNegativeTwoParams() { - filter = "c1c2"; + filter = "c20s1d1c20s1d1"; exception = "Stack not empty, missing operators?"; runParseNegative("filter with two consts in a row", filter, exception); - filter = "c1a1"; + filter = "c20s1d1a1"; exception = "Stack not empty, missing operators?"; runParseNegative("filter with const and attribute", filter, exception); - filter = "a1c80"; + filter = "a1c700s1d1"; exception = "Stack not empty, missing operators?"; runParseNegative("filter with attribute and const", filter, exception); } @@ -191,32 +235,32 @@ public class FilterParserTest { @Test public void parseColumnOnLeft() throws Exception { - filter = "a1c2o1"; + filter = "a1c20s1d1o1"; Operation op = Operation.HDOP_LT; runParseOneOperation("this filter was build from HDOP_LT", filter, op); - filter = "a1c2o2"; + filter = "a1c20s1d1o2"; op = Operation.HDOP_GT; runParseOneOperation("this filter was build from HDOP_GT", filter, op); - filter = "a1c2o3"; + filter = "a1c20s1d1o3"; op = Operation.HDOP_LE; runParseOneOperation("this filter was build from HDOP_LE", filter, op); - filter = "a1c2o4"; + filter = "a1c20s1d1o4"; op = Operation.HDOP_GE; runParseOneOperation("this filter was build from HDOP_GE", filter, op); - filter = "a1c2o5"; + filter = "a1c20s1d1o5"; op = Operation.HDOP_EQ; runParseOneOperation("this filter was build from HDOP_EQ", filter, op); - filter = "a1c2o6"; + filter = "a1c20s1d1o6"; op = Operation.HDOP_NE; runParseOneOperation("this filter was build from HDOP_NE", filter, op); - filter = "a1c2o7"; + filter = "a1c20s1d1o7"; op = Operation.HDOP_LIKE; runParseOneOperation("this filter was built from HDOP_LIKE", filter, op); } @@ -224,38 +268,38 @@ public class FilterParserTest { @Test public void parseColumnOnRight() throws Exception { - filter = "c2a1o1"; + filter = "c20s1d1a1o1"; Operation op = Operation.HDOP_GT; runParseOneOperation("this filter was build from HDOP_LT -> HDOP_GT using reverse!", filter, op); - filter = "c2a1o2"; + filter = "c20s1d1a1o2"; op = Operation.HDOP_LT; runParseOneOperation("this filter was build from HDOP_GT -> HDOP_LT using reverse!", filter, op); - filter = "c2a1o3"; + filter = "c20s1d1a1o3"; op = Operation.HDOP_GE; runParseOneOperation("this filter was build from HDOP_LE -> HDOP_GE using reverse!", filter, op); - filter = "c2a1o4"; + filter = "c20s1d1a1o4"; op = Operation.HDOP_LE; runParseOneOperation("this filter was build from HDOP_GE -> HDOP_LE using reverse!", filter, op); - filter = "c2a1o5"; + filter = "c20s1d1a1o5"; op = Operation.HDOP_EQ; runParseOneOperation("this filter was build from HDOP_EQ using reverse!", filter, op); - filter = "c2a1o6"; + filter = "c20s1d1a1o6"; op = Operation.HDOP_NE; runParseOneOperation("this filter was build from HDOP_NE using reverse!", filter, op); - filter = "c2a1o7"; + filter = "c20s1d1a1o7"; op = Operation.HDOP_LIKE; runParseOneOperation("this filter was build from HDOP_LIKE using reverse!", filter, op); } @Test public void parseFilterWith2Operations() throws Exception { - filter = "a1c\"first\"o5a2c2o2l0"; + filter = "a1c25s5dfirsto5a2c20s1d1o2l0"; Object firstOp = "first operation HDOP_EQ"; Object secondOp = "second operation HDOP_GT"; @@ -273,14 +317,14 @@ public class FilterParserTest { eq(firstOp), eq(secondOp))).thenReturn(lastOp); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(lastOp, result); } @Test public void parseLogicalAndOperator() throws Exception { - filter = "a1c0o5a2c3o2l0"; + filter = "a1c20s1d0o5a2c20s1d3o2l0"; Object firstOp = "first operation HDOP_EQ"; Object secondOp = "second operation HDOP_GT"; @@ -298,14 +342,14 @@ public class FilterParserTest { any(), any())).thenReturn(lastOp); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(lastOp, result); } @Test public void parseLogicalOrOperator() throws Exception { - filter = "a1c0o5a2c3o2l1"; + filter = "a1c20s1d0o5a2c20s1d3o2l1"; Object firstOp = "first operation HDOP_EQ"; Object secondOp = "second operation HDOP_GT"; @@ -323,13 +367,13 @@ public class FilterParserTest { any(), any())).thenReturn(lastOp); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(lastOp, result); } @Test public void parseLogicalNotOperator() throws Exception { - filter = "a1c0o5l2"; + filter = "a1c20s1d0o5l2"; Object firstOp = "first operation HDOP_EQ"; Object op = "filter with NOT operator"; @@ -341,7 +385,7 @@ public class FilterParserTest { when(filterBuilder.build(eq(LogicalOperation.HDOP_NOT), any())).thenReturn(op); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(op, result); } @@ -357,12 +401,12 @@ public class FilterParserTest { any(), any())).thenReturn(null); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); } @Test public void parseLogicalOperatorNotExpression() throws Exception { - filter = "a1c\"first\"o5a2c2o2l0l2"; + filter = "a1c25s5dfirsto5a2c20s1d2o2l0l2"; Object firstOp = "first operation HDOP_EQ"; Object secondOp = "second operation HDOP_GT"; Object thirdOp = "filter with 2 operations connected by AND"; @@ -384,7 +428,7 @@ public class FilterParserTest { when(filterBuilder.build(eq(LogicalOperation.HDOP_NOT), any())).thenReturn(lastOp); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(lastOp, result); } @@ -393,7 +437,7 @@ public class FilterParserTest { */ private void runParseNegative(String description, String filter, String exception) { try { - filterParser.parse(filter); + filterParser.parse(filter.getBytes()); fail(description + ": should have failed with FilterStringSyntaxException"); } catch (FilterParser.FilterStringSyntaxException e) { assertEquals(description, exception + filterStringMsg(filter), e.getMessage()); @@ -407,7 +451,7 @@ public class FilterParserTest { any(), any())).thenReturn(description); - Object result = filterParser.parse(filter); + Object result = filterParser.parse(filter.getBytes()); assertEquals(description, result); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java index 1b06f81..07e4488 100644 --- a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java +++ b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilder.java @@ -23,6 +23,8 @@ package org.apache.hawq.pxf.plugins.hbase; import org.apache.hawq.pxf.api.FilterParser; import org.apache.hawq.pxf.api.io.DataType; import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseColumnDescriptor; +import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseDoubleComparator; +import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseFloatComparator; import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseIntegerComparator; import org.apache.hawq.pxf.plugins.hbase.utilities.HBaseTupleDescription; import org.apache.hadoop.hbase.HConstants; @@ -89,12 +91,15 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { * @throws Exception if parsing failed */ public Filter getFilterObject(String filterString) throws Exception { + if (filterString == null) + return null; + // First check for NOT, HBase does not support this if (filterNotOpPresent(filterString)) return null; FilterParser parser = new FilterParser(this); - Object result = parser.parse(filterString); + Object result = parser.parse(filterString.getBytes(FilterParser.DEFAULT_CHARSET)); if (!(result instanceof Filter)) { throw new Exception("String " + filterString + " couldn't be resolved to any supported filter"); @@ -229,8 +234,28 @@ public class HBaseFilterBuilder implements FilterParser.FilterBuilder { break; case SMALLINT: case INTEGER: + result = new HBaseIntegerComparator(((Integer) data).longValue()); + break; case BIGINT: - result = new HBaseIntegerComparator((Long) data); + if (data instanceof Long) { + result = new HBaseIntegerComparator((Long) data); + } else if (data instanceof Integer) { + result = new HBaseIntegerComparator(((Integer) data).longValue()); + } else { + result = null; + } + break; + case FLOAT8: + result = new HBaseDoubleComparator((double) data); + break; + case REAL: + if (data instanceof Double) { + result = new HBaseDoubleComparator((double) data); + } else if (data instanceof Float) { + result = new HBaseFloatComparator((float) data); + } else { + result = null; + } break; default: throw new Exception("unsupported column type for filtering " + type); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java new file mode 100644 index 0000000..ec86c39 --- /dev/null +++ b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseDoubleComparator.java @@ -0,0 +1,66 @@ +package org.apache.hawq.pxf.plugins.hbase.utilities; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.filter.ByteArrayComparable; +import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.util.Bytes; + +public class HBaseDoubleComparator extends ByteArrayComparable{ + + private Double val; + + public HBaseDoubleComparator(Double inVal) { + super(Bytes.toBytes(inVal)); + this.val = inVal; + } + + @Override + public byte[] toByteArray() { + ComparatorProtos.ByteArrayComparable.Builder builder = ComparatorProtos.ByteArrayComparable.newBuilder(); + builder.setValue(ByteString.copyFrom(getValue())); + return builder.build().toByteArray(); + } + + @Override + public int compareTo(byte[] value, int offset, int length) { + if (length == 0) + return 1; + + String valueAsString = new String(value, offset, length); + Double valueAsDouble = Double.parseDouble(valueAsString); + return val.compareTo(valueAsDouble); + } + + public static ByteArrayComparable parseFrom(final byte[] pbBytes) throws DeserializationException { + ComparatorProtos.ByteArrayComparable proto; + try { + proto = ComparatorProtos.ByteArrayComparable.parseFrom(pbBytes); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException(e); + } + + return new HBaseDoubleComparator(Bytes.toDouble(proto.getValue().toByteArray())); + } +} + http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseFloatComparator.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseFloatComparator.java b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseFloatComparator.java new file mode 100644 index 0000000..22e7f75 --- /dev/null +++ b/pxf/pxf-hbase/src/main/java/org/apache/hawq/pxf/plugins/hbase/utilities/HBaseFloatComparator.java @@ -0,0 +1,65 @@ +package org.apache.hawq.pxf.plugins.hbase.utilities; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.filter.ByteArrayComparable; +import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.util.Bytes; + +public class HBaseFloatComparator extends ByteArrayComparable{ + + private Float val; + + public HBaseFloatComparator(float inVal) { + super(Bytes.toBytes(inVal)); + this.val = inVal; + } + + @Override + public byte[] toByteArray() { + ComparatorProtos.ByteArrayComparable.Builder builder = ComparatorProtos.ByteArrayComparable.newBuilder(); + builder.setValue(ByteString.copyFrom(getValue())); + return builder.build().toByteArray(); + } + + @Override + public int compareTo(byte[] value, int offset, int length) { + if (length == 0) + return 1; + + String valueAsString = new String(value, offset, length); + Float valueAsFloat = Float.parseFloat(valueAsString); + return val.compareTo(valueAsFloat); + } + + public static ByteArrayComparable parseFrom(final byte[] pbBytes) throws DeserializationException { + ComparatorProtos.ByteArrayComparable proto; + try { + proto = ComparatorProtos.ByteArrayComparable.parseFrom(pbBytes); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException(e); + } + + return new HBaseFloatComparator(Bytes.toFloat(proto.getValue().toByteArray())); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java b/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java index 6860563..93d8cbc 100644 --- a/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java +++ b/pxf/pxf-hbase/src/test/java/org/apache/hawq/pxf/plugins/hbase/HBaseFilterBuilderTest.java @@ -42,7 +42,7 @@ public class HBaseFilterBuilderTest { @Test public void parseNOTOpCodeInConstant() throws Exception { - String filter = "a1c\"l2\"o1a1c2o2l0"; + String filter = "a1c25s2dl2o1a1c20s1d2o2l0"; HBaseFilterBuilder builder = new HBaseFilterBuilder(null); //Testing that we get past the parsing stage //Very crude but it avoids instantiating all the necessary dependencies @@ -50,4 +50,10 @@ public class HBaseFilterBuilderTest { builder.getFilterObject(filter); } + @Test + public void parseNullFilter() throws Exception { + HBaseFilterBuilder builder = new HBaseFilterBuilder(null); + assertNull(builder.getFilterObject(null)); + } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java index 1722093..b757371 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveAccessor.java @@ -186,10 +186,8 @@ public class HiveAccessor extends HdfsSplittableDataAccessor { LOG.debug("segmentId: " + inputData.getSegmentId() + " " + inputData.getDataSource() + "--" + filterStr + " returnData: " + returnData); - if (filter instanceof List) { - for (Object f : (List<?>) filter) { - printOneBasicFilter(f); - } + if (filter instanceof LogicalFilter) { + printLogicalFilter((LogicalFilter) filter); } else { printOneBasicFilter(filter); } @@ -349,6 +347,16 @@ public class HiveAccessor extends HdfsSplittableDataAccessor { return testForPartitionEquality(partitionFields, Arrays.asList(filter), input); } + private void printLogicalFilter(LogicalFilter filter) { + for (Object f : filter.getFilterList()) { + if (f instanceof LogicalFilter) { + printLogicalFilter((LogicalFilter) f); + } else { + printOneBasicFilter(f); + } + } + } + private void printOneBasicFilter(Object filter) { BasicFilter bFilter = (BasicFilter) filter; boolean isOperationEqual = (bFilter.getOperation() == FilterParser.Operation.HDOP_EQ); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java index bd82a3b..938be09 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilder.java @@ -61,8 +61,11 @@ public class HiveFilterBuilder implements FilterParser.FilterBuilder { * filter or list of basic filters */ public Object getFilterObject(String filterString) throws Exception { + if (filterString == null) + return null; + FilterParser parser = new FilterParser(this); - Object result = parser.parse(filterString); + Object result = parser.parse(filterString.getBytes(FilterParser.DEFAULT_CHARSET)); if (!(result instanceof LogicalFilter) && !(result instanceof BasicFilter) && !(result instanceof List)) { http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java index 1cea9c7..9f588b4 100644 --- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java +++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java @@ -25,11 +25,14 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hawq.pxf.api.BasicFilter; import org.apache.hawq.pxf.api.LogicalFilter; import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; import org.apache.hawq.pxf.api.utilities.InputData; import org.apache.commons.lang.StringUtils; + +import java.sql.Date; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -162,6 +165,10 @@ public class HiveORCAccessor extends HiveAccessor { ColumnDescriptor filterColumn = inputData.getColumn(filterColumnIndex); String filterColumnName = filterColumn.columnName(); + /* Need to convert java.sql.Date to Hive's DateWritable Format */ + if (filterValue instanceof Date) + filterValue= new DateWritable((Date) filterValue); + switch(filter.getOperation()) { case HDOP_LT: builder.lessThan(filterColumnName, filterValue); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java index e0e6536..d3e5402 100755 --- a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveFilterBuilderTest.java @@ -29,6 +29,7 @@ import org.apache.hawq.pxf.api.BasicFilter; import static org.apache.hawq.pxf.api.FilterParser.Operation; import static org.apache.hawq.pxf.api.FilterParser.Operation.*; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; public class HiveFilterBuilderTest { @Test @@ -38,7 +39,7 @@ public class HiveFilterBuilderTest { Operation[] ops = new Operation[] {HDOP_EQ, HDOP_GT}; int[] idx = new int[] {1, 2}; - LogicalFilter filterList = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0"); + LogicalFilter filterList = (LogicalFilter) builder.getFilterObject("a1c25s5dfirsto5a2c20s1d2o2l0"); assertEquals(LogicalOperation.HDOP_AND, filterList.getOperator()); BasicFilter leftOperand = (BasicFilter) filterList.getFilterList().get(0); assertEquals(consts[0], leftOperand.getConstant().constant()); @@ -47,9 +48,16 @@ public class HiveFilterBuilderTest { } @Test + public void parseNullFilter() throws Exception { + HiveFilterBuilder builder = new HiveFilterBuilder(null); + LogicalFilter filterList = (LogicalFilter) builder.getFilterObject(null); + assertNull(builder.getFilterObject(null)); + } + + @Test public void parseFilterWithLogicalOperation() throws Exception { HiveFilterBuilder builder = new HiveFilterBuilder(null); - LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0"); + LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c25s5dfirsto5a2c20s1d2o2l0"); assertEquals(LogicalOperation.HDOP_AND, filter.getOperator()); assertEquals(2, filter.getFilterList().size()); } @@ -57,7 +65,7 @@ public class HiveFilterBuilderTest { @Test public void parseNestedExpressionWithLogicalOperation() throws Exception { HiveFilterBuilder builder = new HiveFilterBuilder(null); - LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c\"first\"o5a2c2o2l0a1c1o1l1"); + LogicalFilter filter = (LogicalFilter) builder.getFilterObject("a1c25s5dfirsto5a2c20s1d2o2l0a1c20s1d1o1l1"); assertEquals(LogicalOperation.HDOP_OR, filter.getOperator()); assertEquals(LogicalOperation.HDOP_AND, ((LogicalFilter) filter.getFilterList().get(0)).getOperator()); assertEquals(HDOP_LT, ((BasicFilter) filter.getFilterList().get(1)).getOperation()); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/402e08da/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentTest.java new file mode 100644 index 0000000..6d2a259 --- /dev/null +++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/HiveORCSearchArgumentTest.java @@ -0,0 +1,84 @@ +package org.apache.hawq.pxf.plugins.hive; + +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hawq.pxf.api.BasicFilter; +import org.apache.hawq.pxf.api.LogicalFilter; +import org.apache.hawq.pxf.api.utilities.ColumnDescriptor; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class HiveORCSearchArgumentTest { + + @Test + public void buildLogicalOperationTree() throws Exception { + + /* Predicate pushdown configuration */ + String filterStr = "a2c23s1d1o2a3c23s1d3o3l0a4c23s1d5o1l1"; + HiveFilterBuilder eval = new HiveFilterBuilder(null); + Object filter = eval.getFilterObject(filterStr); + + Object current = filter; + SearchArgument.Builder filterBuilder = SearchArgumentFactory.newBuilder(); + buildExpression(filterBuilder, Arrays.asList(filter)); + SearchArgument sarg = filterBuilder.build(); + Assert.assertEquals("and(or(lt(col1, 5), not(lteq(col1, 1))), or(lt(col1, 5), lteq(col1, 3)))", sarg.toFilterPredicate().toString()); + } + + private void buildExpression(SearchArgument.Builder builder, List<Object> filterList) { + for (Object f : filterList) { + if (f instanceof LogicalFilter) { + switch(((LogicalFilter) f).getOperator()) { + case HDOP_OR: + builder.startOr(); + break; + case HDOP_AND: + builder.startAnd(); + break; + case HDOP_NOT: + builder.startNot(); + break; + } + buildExpression(builder, ((LogicalFilter) f).getFilterList()); + builder.end(); + } else { + buildArgument(builder, f); + } + } + } + + private void buildArgument(SearchArgument.Builder builder, Object filterObj) { + /* The below functions will not be compatible and requires update with Hive 2.0 APIs */ + BasicFilter filter = (BasicFilter) filterObj; + int filterColumnIndex = filter.getColumn().index(); + Object filterValue = filter.getConstant().constant(); + Integer[] arr = {}; + ColumnDescriptor filterColumn = new ColumnDescriptor("col1", 1,1, "Integer", arr); + String filterColumnName = filterColumn.columnName(); + + switch(filter.getOperation()) { + case HDOP_LT: + builder.lessThan(filterColumnName, filterValue); + break; + case HDOP_GT: + builder.startNot().lessThanEquals(filterColumnName, filterValue).end(); + break; + case HDOP_LE: + builder.lessThanEquals(filterColumnName, filterValue); + break; + case HDOP_GE: + builder.startNot().lessThanEquals(filterColumnName, filterValue).end(); + break; + case HDOP_EQ: + builder.equals(filterColumnName, filterValue); + break; + case HDOP_NE: + builder.startNot().equals(filterColumnName, filterValue).end(); + break; + } + return; + } +}
