Repository: kudu Updated Branches: refs/heads/master eb4b46b80 -> 835cf6c1e
[java-client] KuduPredicate simplification for inequalities on boundary values Adds special casing for inequality predicates on minimum and maximum values. This tightens partition pruning in boundary cases. New tests are added to cover the specific transformations. We already have good test coverage of boundary values in TestScanPredicate.java. Change-Id: Id3779029d95532e91b235b4ed33a77e2a32ad072 Reviewed-on: http://gerrit.cloudera.org:8080/5272 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d74888c5 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d74888c5 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d74888c5 Branch: refs/heads/master Commit: d74888c56d175b37f2b3c1ac90d28607653bc023 Parents: eb4b46b Author: Dan Burkert <[email protected]> Authored: Tue Nov 29 14:57:57 2016 -0800 Committer: Dan Burkert <[email protected]> Committed: Wed Nov 30 17:25:54 2016 +0000 ---------------------------------------------------------------------- .../org/apache/kudu/client/KuduPredicate.java | 89 ++++++++++++++------ .../org/apache/kudu/client/TestKuduClient.java | 6 +- .../apache/kudu/client/TestKuduPredicate.java | 81 +++++++++++++++--- .../apache/kudu/client/TestPartitionPruner.java | 5 +- 4 files changed, 137 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/d74888c5/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java index eea1d8c..ae404b4 100644 --- a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java +++ b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java @@ -120,7 +120,7 @@ public class KuduPredicate { if (value) { return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(true), null); } else { - return newIsNotNullPredicate(column); + return isNotNull(column); } } case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, @@ -138,7 +138,7 @@ public class KuduPredicate { // b <= true -> b IS NOT NULL // b <= false -> b = false if (value) { - return newIsNotNullPredicate(column); + return isNotNull(column); } else { return new KuduPredicate(PredicateType.EQUALITY, column, Bytes.fromBoolean(false), null); } @@ -157,26 +157,29 @@ public class KuduPredicate { ComparisonOp op, long value) { checkColumn(column, Type.INT8, Type.INT16, Type.INT32, Type.INT64, Type.UNIXTIME_MICROS); - Preconditions.checkArgument(value <= maxIntValue(column.getType()) && - value >= minIntValue(column.getType()), + long minValue = minIntValue(column.getType()); + long maxValue = maxIntValue(column.getType()); + Preconditions.checkArgument(value <= maxValue && value >= minValue, "integer value out of range for %s column: %s", column.getType(), value); if (op == ComparisonOp.LESS_EQUAL) { - if (value == maxIntValue(column.getType())) { + if (value == maxValue) { // If the value can't be incremented because it is at the top end of the // range, then substitute the predicate with an IS NOT NULL predicate. // This has the same effect as an inclusive upper bound on the maximum // value. If the column is not nullable then the IS NOT NULL predicate // is ignored. - return newIsNotNullPredicate(column); + return isNotNull(column); } value += 1; + op = ComparisonOp.LESS; } else if (op == ComparisonOp.GREATER) { - if (value == maxIntValue(column.getType())) { + if (value == maxValue) { return none(column); } value += 1; + op = ComparisonOp.GREATER_EQUAL; } byte[] bytes; @@ -202,13 +205,19 @@ public class KuduPredicate { throw new RuntimeException("already checked"); } switch (op) { - case GREATER: case GREATER_EQUAL: + if (value == minValue) { + return isNotNull(column); + } else if (value == maxValue) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } return new KuduPredicate(PredicateType.RANGE, column, bytes, null); case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); case LESS: - case LESS_EQUAL: + if (value == minValue) { + return none(column); + } return new KuduPredicate(PredicateType.RANGE, column, null, bytes); default: throw new RuntimeException("unknown comparison op"); @@ -227,25 +236,33 @@ public class KuduPredicate { checkColumn(column, Type.FLOAT); if (op == ComparisonOp.LESS_EQUAL) { if (value == Float.POSITIVE_INFINITY) { - return newIsNotNullPredicate(column); + return isNotNull(column); } value = Math.nextAfter(value, Float.POSITIVE_INFINITY); + op = ComparisonOp.LESS; } else if (op == ComparisonOp.GREATER) { if (value == Float.POSITIVE_INFINITY) { return none(column); } value = Math.nextAfter(value, Float.POSITIVE_INFINITY); + op = ComparisonOp.GREATER_EQUAL; } byte[] bytes = Bytes.fromFloat(value); switch (op) { - case GREATER: case GREATER_EQUAL: + if (value == Float.NEGATIVE_INFINITY) { + return isNotNull(column); + } else if (value == Float.POSITIVE_INFINITY) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } return new KuduPredicate(PredicateType.RANGE, column, bytes, null); case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); case LESS: - case LESS_EQUAL: + if (value == Float.NEGATIVE_INFINITY) { + return none(column); + } return new KuduPredicate(PredicateType.RANGE, column, null, bytes); default: throw new RuntimeException("unknown comparison op"); @@ -264,25 +281,33 @@ public class KuduPredicate { checkColumn(column, Type.DOUBLE); if (op == ComparisonOp.LESS_EQUAL) { if (value == Double.POSITIVE_INFINITY) { - return newIsNotNullPredicate(column); + return isNotNull(column); } value = Math.nextAfter(value, Double.POSITIVE_INFINITY); + op = ComparisonOp.LESS; } else if (op == ComparisonOp.GREATER) { if (value == Double.POSITIVE_INFINITY) { return none(column); } value = Math.nextAfter(value, Double.POSITIVE_INFINITY); + op = ComparisonOp.GREATER_EQUAL; } byte[] bytes = Bytes.fromDouble(value); switch (op) { - case GREATER: case GREATER_EQUAL: + if (value == Double.NEGATIVE_INFINITY) { + return isNotNull(column); + } else if (value == Double.POSITIVE_INFINITY) { + return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); + } return new KuduPredicate(PredicateType.RANGE, column, bytes, null); case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); case LESS: - case LESS_EQUAL: + if (value == Double.NEGATIVE_INFINITY) { + return none(column); + } return new KuduPredicate(PredicateType.RANGE, column, null, bytes); default: throw new RuntimeException("unknown comparison op"); @@ -301,18 +326,26 @@ public class KuduPredicate { checkColumn(column, Type.STRING); byte[] bytes = Bytes.fromString(value); - if (op == ComparisonOp.LESS_EQUAL || op == ComparisonOp.GREATER) { + if (op == ComparisonOp.LESS_EQUAL) { bytes = Arrays.copyOf(bytes, bytes.length + 1); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + bytes = Arrays.copyOf(bytes, bytes.length + 1); + op = ComparisonOp.GREATER_EQUAL; } switch (op) { - case GREATER: case GREATER_EQUAL: + if (bytes.length == 0) { + return isNotNull(column); + } return new KuduPredicate(PredicateType.RANGE, column, bytes, null); case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, bytes, null); case LESS: - case LESS_EQUAL: + if (bytes.length == 0) { + return none(column); + } return new KuduPredicate(PredicateType.RANGE, column, null, bytes); default: throw new RuntimeException("unknown comparison op"); @@ -330,18 +363,26 @@ public class KuduPredicate { byte[] value) { checkColumn(column, Type.BINARY); - if (op == ComparisonOp.LESS_EQUAL || op == ComparisonOp.GREATER) { + if (op == ComparisonOp.LESS_EQUAL) { value = Arrays.copyOf(value, value.length + 1); + op = ComparisonOp.LESS; + } else if (op == ComparisonOp.GREATER) { + value = Arrays.copyOf(value, value.length + 1); + op = ComparisonOp.GREATER_EQUAL; } switch (op) { - case GREATER: case GREATER_EQUAL: + if (value.length == 0) { + return isNotNull(column); + } return new KuduPredicate(PredicateType.RANGE, column, value, null); case EQUAL: return new KuduPredicate(PredicateType.EQUALITY, column, value, null); case LESS: - case LESS_EQUAL: + if (value.length == 0) { + return none(column); + } return new KuduPredicate(PredicateType.RANGE, column, null, value); default: throw new RuntimeException("unknown comparison op"); @@ -469,7 +510,7 @@ public class KuduPredicate { * @return a {@code IS NOT NULL} predicate */ @VisibleForTesting - static KuduPredicate newIsNotNullPredicate(ColumnSchema column) { + static KuduPredicate isNotNull(ColumnSchema column) { return new KuduPredicate(PredicateType.IS_NOT_NULL, column, null, null); } @@ -585,7 +626,7 @@ public class KuduPredicate { private static KuduPredicate buildInList(ColumnSchema column, Collection<byte[]> values) { // IN (true, false) predicates can be simplified to IS NOT NULL. if (column.getType().getDataType() == Common.DataType.BOOL && values.size() > 1) { - return KuduPredicate.newIsNotNullPredicate(column); + return isNotNull(column); } switch (values.size()) { @@ -687,7 +728,7 @@ public class KuduPredicate { range.hasUpper() ? range.getUpper().toByteArray() : null); } case IS_NOT_NULL: - return newIsNotNullPredicate(column); + return isNotNull(column); case IN_LIST: { Common.ColumnPredicatePB.InList inList = pb.getInList(); http://git-wip-us.apache.org/repos/asf/kudu/blob/d74888c5/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java index 06f3a07..aeba4bb 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java @@ -417,8 +417,8 @@ public class TestKuduClient extends BaseKuduTest { // IS NOT NULL assertEquals(100, scanTableToStrings(table, - KuduPredicate.newIsNotNullPredicate(schema.getColumn("c2")), - KuduPredicate.newIsNotNullPredicate(schema.getColumn("key")) + KuduPredicate.isNotNull(schema.getColumn("c2")), + KuduPredicate.isNotNull(schema.getColumn("key")) ).size()); // IN list @@ -433,7 +433,7 @@ public class TestKuduClient extends BaseKuduTest { assertEquals(2, scanTableToStrings(table, KuduPredicate.newInListPredicate(schema.getColumn("c2"), ImmutableList.of("c2_30", "c2_1", "invalid", "c2_99")), - KuduPredicate.newIsNotNullPredicate(schema.getColumn("c2")), + KuduPredicate.isNotNull(schema.getColumn("c2")), KuduPredicate.newInListPredicate(schema.getColumn("key"), ImmutableList.of("key_30", "key_45", "invalid", "key_99")) ).size()); http://git-wip-us.apache.org/repos/asf/kudu/blob/d74888c5/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java index ca48ee8..5afdd11 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduPredicate.java @@ -383,11 +383,11 @@ public class TestKuduPredicate { // IN list + NOT NULL testMerge(intInList(10), - KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.isNotNull(intCol), KuduPredicate.newComparisonPredicate(intCol, EQUAL, 10)); testMerge(intInList(10, -100), - KuduPredicate.newIsNotNullPredicate(intCol), + KuduPredicate.isNotNull(intCol), intInList(-100, 10)); // IN list + Equality @@ -567,7 +567,7 @@ public class TestKuduPredicate { // | | | | // = // [--) - testMerge(KuduPredicate.newIsNotNullPredicate(stringCol), + testMerge(KuduPredicate.isNotNull(stringCol), stringInList("a", "c", "b", ""), stringInList("", "a", "b", "c")); } @@ -576,7 +576,7 @@ public class TestKuduPredicate { public void testBoolean() { // b >= false - Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + Assert.assertEquals(KuduPredicate.isNotNull(boolCol), KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, false)); // b > false Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true), @@ -604,7 +604,7 @@ public class TestKuduPredicate { Assert.assertEquals(KuduPredicate.newComparisonPredicate(boolCol, EQUAL, false), KuduPredicate.newComparisonPredicate(boolCol, LESS, true)); // b <= true - Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + Assert.assertEquals(KuduPredicate.isNotNull(boolCol), KuduPredicate.newComparisonPredicate(boolCol, LESS_EQUAL, true)); // b IN () @@ -619,7 +619,7 @@ public class TestKuduPredicate { boolInList(false)); // b IN (false, true) - Assert.assertEquals(KuduPredicate.newIsNotNullPredicate(boolCol), + Assert.assertEquals(KuduPredicate.isNotNull(boolCol), boolInList(false, true, false, true)); } @@ -638,7 +638,7 @@ public class TestKuduPredicate { testMerge(KuduPredicate.newComparisonPredicate(boolCol, GREATER_EQUAL, false), KuduPredicate.newComparisonPredicate(boolCol, LESS_EQUAL, true), - KuduPredicate.newIsNotNullPredicate(boolCol)); + KuduPredicate.isNotNull(boolCol)); testMerge(KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, 0), KuduPredicate.newComparisonPredicate(byteCol, LESS, 10), @@ -728,21 +728,21 @@ public class TestKuduPredicate { KuduPredicate.newComparisonPredicate(binaryCol, LESS, new byte[] { (byte) 10, (byte) 0 })); Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, LESS_EQUAL, Byte.MAX_VALUE), - KuduPredicate.newIsNotNullPredicate(byteCol)); + KuduPredicate.isNotNull(byteCol)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, LESS_EQUAL, Short.MAX_VALUE), - KuduPredicate.newIsNotNullPredicate(shortCol)); + KuduPredicate.isNotNull(shortCol)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, LESS_EQUAL, Integer.MAX_VALUE), - KuduPredicate.newIsNotNullPredicate(intCol)); + KuduPredicate.isNotNull(intCol)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, LESS_EQUAL, Long.MAX_VALUE), - KuduPredicate.newIsNotNullPredicate(longCol)); + KuduPredicate.isNotNull(longCol)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, LESS_EQUAL, Float.MAX_VALUE), KuduPredicate.newComparisonPredicate(floatCol, LESS, Float.POSITIVE_INFINITY)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, LESS_EQUAL, Float.POSITIVE_INFINITY), - KuduPredicate.newIsNotNullPredicate(floatCol)); + KuduPredicate.isNotNull(floatCol)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, LESS_EQUAL, Double.MAX_VALUE), KuduPredicate.newComparisonPredicate(doubleCol, LESS, Double.POSITIVE_INFINITY)); Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, LESS_EQUAL, Double.POSITIVE_INFINITY), - KuduPredicate.newIsNotNullPredicate(doubleCol)); + KuduPredicate.isNotNull(doubleCol)); } @Test @@ -783,6 +783,59 @@ public class TestKuduPredicate { } @Test + public void testLess() { + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, LESS, Byte.MIN_VALUE), + KuduPredicate.none(byteCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, LESS, Short.MIN_VALUE), + KuduPredicate.none(shortCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, LESS, Integer.MIN_VALUE), + KuduPredicate.none(intCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, LESS, Long.MIN_VALUE), + KuduPredicate.none(longCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, LESS, Float.NEGATIVE_INFINITY), + KuduPredicate.none(floatCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, LESS, Double.NEGATIVE_INFINITY), + KuduPredicate.none(doubleCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, LESS, ""), + KuduPredicate.none(stringCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(binaryCol, LESS, new byte[] {}), + KuduPredicate.none(binaryCol)); + } + + @Test + public void testGreaterEqual() { + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, Byte.MIN_VALUE), + KuduPredicate.isNotNull(byteCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, Short.MIN_VALUE), + KuduPredicate.isNotNull(shortCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, Integer.MIN_VALUE), + KuduPredicate.isNotNull(intCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, Long.MIN_VALUE), + KuduPredicate.isNotNull(longCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, Float.NEGATIVE_INFINITY), + KuduPredicate.isNotNull(floatCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, Double.NEGATIVE_INFINITY), + KuduPredicate.isNotNull(doubleCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(stringCol, GREATER_EQUAL, ""), + KuduPredicate.isNotNull(stringCol)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(binaryCol, GREATER_EQUAL, new byte[] {}), + KuduPredicate.isNotNull(binaryCol)); + + Assert.assertEquals(KuduPredicate.newComparisonPredicate(byteCol, GREATER_EQUAL, Byte.MAX_VALUE), + KuduPredicate.newComparisonPredicate(byteCol, EQUAL, Byte.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(shortCol, GREATER_EQUAL, Short.MAX_VALUE), + KuduPredicate.newComparisonPredicate(shortCol, EQUAL, Short.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(intCol, GREATER_EQUAL, Integer.MAX_VALUE), + KuduPredicate.newComparisonPredicate(intCol, EQUAL, Integer.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(longCol, GREATER_EQUAL, Long.MAX_VALUE), + KuduPredicate.newComparisonPredicate(longCol, EQUAL, Long.MAX_VALUE)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(floatCol, GREATER_EQUAL, Float.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(floatCol, EQUAL, Float.POSITIVE_INFINITY)); + Assert.assertEquals(KuduPredicate.newComparisonPredicate(doubleCol, GREATER_EQUAL, Double.POSITIVE_INFINITY), + KuduPredicate.newComparisonPredicate(doubleCol, EQUAL, Double.POSITIVE_INFINITY)); + } + + @Test public void testToString() { Assert.assertEquals("`bool` = true", KuduPredicate.newComparisonPredicate(boolCol, EQUAL, true).toString()); @@ -805,7 +858,7 @@ public class TestKuduPredicate { Assert.assertEquals("`int` IN (-10, 0, 10)", intInList(10, 0, -10).toString()); Assert.assertEquals("`string` IS NOT NULL", - KuduPredicate.newIsNotNullPredicate(stringCol).toString()); + KuduPredicate.isNotNull(stringCol).toString()); Assert.assertEquals("`bool` = true", KuduPredicate.newInListPredicate( boolCol, ImmutableList.of(true)).toString()); http://git-wip-us.apache.org/repos/asf/kudu/blob/d74888c5/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java ---------------------------------------------------------------------- diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java index 457457e..2408adb 100644 --- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java +++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestPartitionPruner.java @@ -292,8 +292,7 @@ public class TestPartitionPruner extends BaseKuduTest { KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, 100))); // c < MIN - // TODO(dan): this should prune all partitions. - assertEquals(1, countPartitions(table, partitions, + assertEquals(0, countPartitions(table, partitions, KuduPredicate.newComparisonPredicate(c, ComparisonOp.LESS, Byte.MIN_VALUE))); // c < MAX assertEquals(3, countPartitions(table, partitions, @@ -630,6 +629,6 @@ public class TestPartitionPruner extends BaseKuduTest { // timestamp IS NOT NULL assertEquals(4, countPartitions(table, partitions, - KuduPredicate.newIsNotNullPredicate(timestamp))); + KuduPredicate.isNotNull(timestamp))); } }
