[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373703 ## File path: api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java ## @@ -362,4 +368,197 @@ public void testCharSeqValue() { Assert.assertFalse("string(abc) == utf8(abcd) => false", evaluator.eval(TestHelpers.Row.of(new Utf8("abcd"; } + + @Test + public void testIn() { +Assert.assertEquals(3, in("s", 7, 8, 9).literals().size()); +Assert.assertEquals(3, in("s", 7, 8.1, Long.MAX_VALUE).literals().size()); +Assert.assertEquals(2, in("s", "abc", "abd", "abc").literals().size()); +Assert.assertEquals(0, in("s").literals().size()); +Assert.assertEquals(1, in("s", 5).literals().size()); +Assert.assertEquals(1, in("s", 5, 5).literals().size()); +Assert.assertEquals(1, in("s", Arrays.asList(5, 5)).literals().size()); +Assert.assertEquals(0, in("s", Collections.emptyList()).literals().size()); + +Evaluator evaluator = new Evaluator(STRUCT, in("x", 7, 8, Long.MAX_VALUE)); +Assert.assertTrue("7 in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(7, 8, null))); +Assert.assertFalse("9 in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(9, 8, null))); + +Evaluator longEvaluator = new Evaluator(STRUCT, +in("x", Long.MAX_VALUE, Integer.MAX_VALUE, Long.MIN_VALUE)); +Assert.assertTrue("Integer.MAX_VALUE in [Integer.MAX_VALUE] => true", +longEvaluator.eval(TestHelpers.Row.of(Integer.MAX_VALUE, 7.0, null))); +Assert.assertFalse("6 in [Integer.MAX_VALUE] => false", +longEvaluator.eval(TestHelpers.Row.of(6, 6.8, null))); + +Evaluator integerEvaluator = new Evaluator(STRUCT, in("y", 7, 8, 9.1)); +Assert.assertTrue("7.0 in [7, 8, 9.1] => true", integerEvaluator.eval(TestHelpers.Row.of(7, 7.0, null))); +Assert.assertTrue("9.1 in [7, 8, 9.1] => true", integerEvaluator.eval(TestHelpers.Row.of(7, 9.1, null))); +Assert.assertFalse("6.8 in [7, 8, 9] => false", integerEvaluator.eval(TestHelpers.Row.of(6, 6.8, null))); + +Evaluator structEvaluator = new Evaluator(STRUCT, in("s1.s2.s3.s4.i", 7, 8, 9)); +Assert.assertTrue("7 in [7, 8, 9] => true", +structEvaluator.eval(TestHelpers.Row.of(7, 8, null, +TestHelpers.Row.of( +TestHelpers.Row.of( +TestHelpers.Row.of( +TestHelpers.Row.of(7))); +Assert.assertFalse("6 in [7, 8, 9] => false", +structEvaluator.eval(TestHelpers.Row.of(6, 8, null, +TestHelpers.Row.of( +TestHelpers.Row.of( +TestHelpers.Row.of( +TestHelpers.Row.of(6))); + +StructType charSeqStruct = StructType.of(required(34, "s", Types.StringType.get())); +Evaluator charSeqEvaluator = new Evaluator(charSeqStruct, in("s", "abc", "abd", "abc")); +Assert.assertTrue("utf8(abc) in [string(abc), string(abd)] => true", +charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abc"; +Assert.assertFalse("utf8(abcd) in [string(abc), string(abd)] => false", +charSeqEvaluator.eval(TestHelpers.Row.of(new Utf8("abcd"; + } + + @Test + public void testInExceptions() { +TestHelpers.assertThrows( +"Throw exception if value is null", +NullPointerException.class, +"Cannot create expression literal from null", +() -> in("x", (Literal) null)); + +TestHelpers.assertThrows( +"Throw exception if value is null", +NullPointerException.class, +"Values cannot be null for IN predicate", +() -> in("x", (Collection) null)); + +TestHelpers.assertThrows( +"Throw exception if calling literals() for EQ predicate", +IllegalArgumentException.class, +"EQ predicate cannot return a list of literals", +() -> equal("x", 5).literals()); Review comment: I don't think this is necessary. This should return `ImmutableList.of(5)`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373662 ## File path: api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java ## @@ -362,4 +368,197 @@ public void testCharSeqValue() { Assert.assertFalse("string(abc) == utf8(abcd) => false", evaluator.eval(TestHelpers.Row.of(new Utf8("abcd"; } + + @Test + public void testIn() { +Assert.assertEquals(3, in("s", 7, 8, 9).literals().size()); +Assert.assertEquals(3, in("s", 7, 8.1, Long.MAX_VALUE).literals().size()); +Assert.assertEquals(2, in("s", "abc", "abd", "abc").literals().size()); +Assert.assertEquals(0, in("s").literals().size()); +Assert.assertEquals(1, in("s", 5).literals().size()); +Assert.assertEquals(1, in("s", 5, 5).literals().size()); +Assert.assertEquals(1, in("s", Arrays.asList(5, 5)).literals().size()); +Assert.assertEquals(0, in("s", Collections.emptyList()).literals().size()); + +Evaluator evaluator = new Evaluator(STRUCT, in("x", 7, 8, Long.MAX_VALUE)); +Assert.assertTrue("7 in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(7, 8, null))); +Assert.assertFalse("9 in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(9, 8, null))); + +Evaluator longEvaluator = new Evaluator(STRUCT, +in("x", Long.MAX_VALUE, Integer.MAX_VALUE, Long.MIN_VALUE)); +Assert.assertTrue("Integer.MAX_VALUE in [Integer.MAX_VALUE] => true", +longEvaluator.eval(TestHelpers.Row.of(Integer.MAX_VALUE, 7.0, null))); +Assert.assertFalse("6 in [Integer.MAX_VALUE] => false", +longEvaluator.eval(TestHelpers.Row.of(6, 6.8, null))); + +Evaluator integerEvaluator = new Evaluator(STRUCT, in("y", 7, 8, 9.1)); +Assert.assertTrue("7.0 in [7, 8, 9.1] => true", integerEvaluator.eval(TestHelpers.Row.of(7, 7.0, null))); Review comment: It is confusing to have `x=7` here because 7.0 (`y`) is the value you're testing in the set. Can you change these to `0`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373638 ## File path: api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java ## @@ -362,4 +368,197 @@ public void testCharSeqValue() { Assert.assertFalse("string(abc) == utf8(abcd) => false", evaluator.eval(TestHelpers.Row.of(new Utf8("abcd"; } + + @Test + public void testIn() { +Assert.assertEquals(3, in("s", 7, 8, 9).literals().size()); +Assert.assertEquals(3, in("s", 7, 8.1, Long.MAX_VALUE).literals().size()); +Assert.assertEquals(2, in("s", "abc", "abd", "abc").literals().size()); +Assert.assertEquals(0, in("s").literals().size()); +Assert.assertEquals(1, in("s", 5).literals().size()); +Assert.assertEquals(1, in("s", 5, 5).literals().size()); +Assert.assertEquals(1, in("s", Arrays.asList(5, 5)).literals().size()); +Assert.assertEquals(0, in("s", Collections.emptyList()).literals().size()); + +Evaluator evaluator = new Evaluator(STRUCT, in("x", 7, 8, Long.MAX_VALUE)); +Assert.assertTrue("7 in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(7, 8, null))); +Assert.assertFalse("9 in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(9, 8, null))); + +Evaluator longEvaluator = new Evaluator(STRUCT, +in("x", Long.MAX_VALUE, Integer.MAX_VALUE, Long.MIN_VALUE)); +Assert.assertTrue("Integer.MAX_VALUE in [Integer.MAX_VALUE] => true", +longEvaluator.eval(TestHelpers.Row.of(Integer.MAX_VALUE, 7.0, null))); +Assert.assertFalse("6 in [Integer.MAX_VALUE] => false", +longEvaluator.eval(TestHelpers.Row.of(6, 6.8, null))); + +Evaluator integerEvaluator = new Evaluator(STRUCT, in("y", 7, 8, 9.1)); +Assert.assertTrue("7.0 in [7, 8, 9.1] => true", integerEvaluator.eval(TestHelpers.Row.of(7, 7.0, null))); +Assert.assertTrue("9.1 in [7, 8, 9.1] => true", integerEvaluator.eval(TestHelpers.Row.of(7, 9.1, null))); +Assert.assertFalse("6.8 in [7, 8, 9] => false", integerEvaluator.eval(TestHelpers.Row.of(6, 6.8, null))); Review comment: This should also be `[7, 8, 9.1]` because the predicate hasn't changed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373623 ## File path: api/src/test/java/org/apache/iceberg/expressions/TestEvaluator.java ## @@ -362,4 +368,197 @@ public void testCharSeqValue() { Assert.assertFalse("string(abc) == utf8(abcd) => false", evaluator.eval(TestHelpers.Row.of(new Utf8("abcd"; } + + @Test + public void testIn() { +Assert.assertEquals(3, in("s", 7, 8, 9).literals().size()); +Assert.assertEquals(3, in("s", 7, 8.1, Long.MAX_VALUE).literals().size()); +Assert.assertEquals(2, in("s", "abc", "abd", "abc").literals().size()); +Assert.assertEquals(0, in("s").literals().size()); +Assert.assertEquals(1, in("s", 5).literals().size()); +Assert.assertEquals(1, in("s", 5, 5).literals().size()); +Assert.assertEquals(1, in("s", Arrays.asList(5, 5)).literals().size()); +Assert.assertEquals(0, in("s", Collections.emptyList()).literals().size()); + +Evaluator evaluator = new Evaluator(STRUCT, in("x", 7, 8, Long.MAX_VALUE)); +Assert.assertTrue("7 in [7, 8] => true", evaluator.eval(TestHelpers.Row.of(7, 8, null))); +Assert.assertFalse("9 in [7, 8] => false", evaluator.eval(TestHelpers.Row.of(9, 8, null))); + +Evaluator longEvaluator = new Evaluator(STRUCT, Review comment: This name is misleading because the predicate is on an integer field in the schema. Can you rename it to `intSetEvaluator` or something more clear? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373269 ## File path: parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java ## @@ -19,19 +19,14 @@ package org.apache.iceberg.parquet; -import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import java.util.Map; +import java.util.Set; import java.util.function.Function; import org.apache.iceberg.Schema; -import org.apache.iceberg.expressions.Binder; -import org.apache.iceberg.expressions.BoundReference; -import org.apache.iceberg.expressions.Expression; -import org.apache.iceberg.expressions.ExpressionVisitors; +import org.apache.iceberg.expressions.*; Review comment: Please clean up the imports in this file and remove the wildcard import. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373153 ## File path: api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java ## @@ -125,13 +149,40 @@ public Expression bind(Types.StructType struct, boolean caseSensitive) { case LT_EQ: case EQ: return Expressions.alwaysFalse(); -//case IN: -// break; -//case NOT_IN: -// break; } } return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(), -schema.accessorForField(field.fieldId())), lit); +schema.accessorForField(field.fieldId())), lit); + } + + @SuppressWarnings("unchecked") + private Expression bindInOperation(Types.NestedField field, Schema schema) { +final Set> lits = literals().stream().map( +l -> { + Literal lit = l.to(field.type()); + if (lit == null) { +throw new ValidationException(String.format( +"Invalid value for comparison inclusive type %s: %s (%s)", +field.type(), l.value(), l.value().getClass().getName())); + } + return lit; +}) +.filter(l -> l != Literals.aboveMax() && l != Literals.belowMin()) +.collect(Collectors.toSet()); + +if (lits.isEmpty()) { + return Expressions.alwaysFalse(); +} else if (lits.size() == 1) { + return new BoundPredicate<>(Operation.EQ, new BoundReference<>(field.fieldId(), + schema.accessorForField(field.fieldId())), lits.iterator().next()); Review comment: To get the literal, use `Iterables.getOnlyElement` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373107 ## File path: api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java ## @@ -125,13 +149,40 @@ public Expression bind(Types.StructType struct, boolean caseSensitive) { case LT_EQ: case EQ: return Expressions.alwaysFalse(); -//case IN: -// break; -//case NOT_IN: -// break; } } return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(), -schema.accessorForField(field.fieldId())), lit); +schema.accessorForField(field.fieldId())), lit); Review comment: Please revert the indentation change on this line. It is causing this to get picked up in the changes, but I don't think it has actually changed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317373006 ## File path: api/src/main/java/org/apache/iceberg/expressions/Predicate.java ## @@ -39,37 +37,35 @@ public R ref() { return ref; } - public Literal literal() { -return literal; - } + abstract String literalString(); @Override public String toString() { -switch (op) { +switch (op()) { Review comment: This change is unnecessary, can you revert it? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317372831 ## File path: api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java ## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.iceberg.util.CharSequenceWrapper; + +public class BoundSetPredicate extends Predicate> { + private final LiteralSet literalSet; + + BoundSetPredicate(Operation op, BoundReference ref, Set> lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a set of literals", op); +this.literalSet = new LiteralSet<>(lits); + } + + BoundSetPredicate(Operation op, BoundReference ref, LiteralSet lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a literal set", op); +this.literalSet = lits; + } + + @Override + public Expression negate() { +return new BoundSetPredicate<>(op().negate(), ref(), literalSet); + } + + public Set literalSet() { +return literalSet; + } + + @Override + String literalString() { +return literalSet.toString(); + } + + /** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ + private static class LiteralSet implements Set, Serializable { +private final Set values; + +@SuppressWarnings("unchecked") +LiteralSet(Set> lits) { + Preconditions.checkArgument(lits == null || lits.size() > 1, + "The input literal set must include more than 1 element."); + values = ImmutableSet.builder().addAll( + lits.stream().map( + lit -> { +if (lit instanceof Literals.StringLiteral) { + return (T) CharSequenceWrapper.wrap(((Literals.StringLiteral) lit).value()); +} else { + return lit.value(); +} + } + ).iterator()).build(); +} + +@Override +public String toString() { + return Joiner.on(", ").join(values); +} + +@Override +public boolean contains(Object object) { + if (object instanceof CharSequence) { +return values.contains(CharSequenceWrapper.wrap((CharSequence) object)); + } + return values.contains(object); +} + +@Override +public int size() { + return values.size(); +} + +@Override +public boolean isEmpty() { + return values.isEmpty(); +} + +@Override +@SuppressWarnings("unchecked") +public Iterator iterator() { + return values.stream().map( + val -> { +if (val instanceof CharSequenceWrapper) { + return (T) ((CharSequenceWrapper) val).get(); +} else { + return val; +} + }).iterator(); +} + +@Override +public boolean containsAll(Collection c) { + throw new UnsupportedOperationException( + "LiteralSet currently only supports checking if a single item is contained in it."); +} + +@Override +public Object[] toArray() { + throw new UnsupportedOperationException( + "Please use iterator() to visit the elements in the set."); +} + +@Override +public X[] toArray(X[] a) { + throw new UnsupportedOperationException( + "Please use iterator() to visit the elements in the set."); +} + +@Override +public boolean add(T t) { + throw new UnsupportedOperationException( + "The set is immutable and cannot add an element."); +} + +@Override +public boolean remove(Object o) { + throw new UnsupportedOperationException( + "The set is immutable
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317372784 ## File path: api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java ## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.iceberg.util.CharSequenceWrapper; + +public class BoundSetPredicate extends Predicate> { + private final LiteralSet literalSet; + + BoundSetPredicate(Operation op, BoundReference ref, Set> lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a set of literals", op); +this.literalSet = new LiteralSet<>(lits); + } + + BoundSetPredicate(Operation op, BoundReference ref, LiteralSet lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a literal set", op); +this.literalSet = lits; + } + + @Override + public Expression negate() { +return new BoundSetPredicate<>(op().negate(), ref(), literalSet); + } + + public Set literalSet() { +return literalSet; + } + + @Override + String literalString() { +return literalSet.toString(); + } + + /** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ + private static class LiteralSet implements Set, Serializable { +private final Set values; + +@SuppressWarnings("unchecked") +LiteralSet(Set> lits) { + Preconditions.checkArgument(lits == null || lits.size() > 1, + "The input literal set must include more than 1 element."); + values = ImmutableSet.builder().addAll( + lits.stream().map( + lit -> { +if (lit instanceof Literals.StringLiteral) { + return (T) CharSequenceWrapper.wrap(((Literals.StringLiteral) lit).value()); +} else { + return lit.value(); +} + } + ).iterator()).build(); +} + +@Override +public String toString() { + return Joiner.on(", ").join(values); +} + +@Override +public boolean contains(Object object) { Review comment: `CharSeqLiteralSet` should override this. If the object isn't a `CharSequence` then it should return false. If it is, then it should use a thread-local wrapper: ```java private static final ThreadLocal wrapper = ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null)); @Override public boolean contains(Object object) { if (object instanceof CharSequence) { return values.contains(wrapper.get().set((CharSequence) object)); } return false; } ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317372657 ## File path: api/src/main/java/org/apache/iceberg/expressions/BoundSetPredicate.java ## @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.iceberg.util.CharSequenceWrapper; + +public class BoundSetPredicate extends Predicate> { + private final LiteralSet literalSet; + + BoundSetPredicate(Operation op, BoundReference ref, Set> lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a set of literals", op); +this.literalSet = new LiteralSet<>(lits); + } + + BoundSetPredicate(Operation op, BoundReference ref, LiteralSet lits) { +super(op, ref); +Preconditions.checkArgument(op == Operation.IN || op == Operation.NOT_IN, +"%s predicate does not support a literal set", op); +this.literalSet = lits; + } + + @Override + public Expression negate() { +return new BoundSetPredicate<>(op().negate(), ref(), literalSet); + } + + public Set literalSet() { +return literalSet; + } + + @Override + String literalString() { +return literalSet.toString(); + } + + /** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ + private static class LiteralSet implements Set, Serializable { Review comment: I think it would be cleaner to have two implementations: `LiteralSet` and `CharSeqLiteralSet` that extends `LiteralSet`. There are too many methods here that check for `CharSeq` and change behavior. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r317372625 ## File path: api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java ## @@ -51,6 +53,10 @@ public R or(R leftResult, R rightResult) { return null; } +public R predicate(BoundSetPredicate pred) { + return null; Review comment: This should also throw `UnsupportedOperationException` to fail any implementation that is missing support. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314875570 ## File path: api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java ## @@ -125,13 +154,38 @@ public Expression bind(Types.StructType struct, boolean caseSensitive) { case LT_EQ: case EQ: return Expressions.alwaysFalse(); -//case IN: -// break; -//case NOT_IN: -// break; } } return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(), -schema.accessorForField(field.fieldId())), lit); +schema.accessorForField(field.fieldId())), lit); + } + + @SuppressWarnings("unchecked") + private Expression bindInOperation(Types.NestedField field, Schema schema) { +final Set> lits = literalSet().stream().map( +val -> { + if (val instanceof LiteralSet.CharSeqWrapper) { +val = (T) ((LiteralSet.CharSeqWrapper) val).unWrap(); + } + Literal lit = Literals.from(val).to(field.type()); + if (lit == null) { +throw new ValidationException(String.format( +"Invalid value for comparison inclusive type %s: %s (%s)", +field.type(), val, val.getClass().getName())); + } + return lit; +}) +.filter(l -> l != Literals.aboveMax() && l != Literals.belowMin()) +.collect(Collectors.toSet()); Review comment: There are several places where these values are deduplicated. I think it would be better if `UnboundPredicate` was always created with a `Collection>` (that is a set when created by `in` and `notIn`) and stored as an immutable list. That would simplify tracking literals because we can use the same storage for null/notNull (empty list), normal predicates (list of one value), and in/notIn (list of values). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314873793 ## File path: api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java ## @@ -125,13 +180,65 @@ public Expression bind(Types.StructType struct, boolean caseSensitive) { case LT_EQ: case EQ: return Expressions.alwaysFalse(); -//case IN: -// break; -//case NOT_IN: -// break; } } return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(), -schema.accessorForField(field.fieldId())), lit); +schema.accessorForField(field.fieldId())), lit); + } + + @SuppressWarnings("unchecked") + private Expression bindInOperation(Types.NestedField field, Schema schema) { +final Set> lits = literals().stream().map( +l -> { + Literal lit = l.to(field.type()); + if (lit == null) { +throw new ValidationException(String.format( +"Invalid value for comparison inclusive type %s: %s (%s)", +field.type(), l.value(), l.value().getClass().getName())); + } + return lit; +}) +.filter(l -> l != Literals.aboveMax() && l != Literals.belowMin()) +.collect(Collectors.toSet()); + +if (lits.isEmpty()) { + return Expressions.alwaysFalse(); +} else if (lits.size() == 1) { + return new BoundPredicate<>(Operation.EQ, new BoundReference<>(field.fieldId(), + schema.accessorForField(field.fieldId())), lits.iterator().next()); +} else { + return new BoundSetPredicate<>(Operation.IN, new BoundReference<>(field.fieldId(), + schema.accessorForField(field.fieldId())), lits); +} + } + + @Override + public String toString() { Review comment: This could be implemented in `Predicate` by adding an abstract protected method `literalString` that converts a literal or literalSet to a string. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314869628 ## File path: api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java ## @@ -49,21 +50,22 @@ public boolean equals(Object other) { if (this == other) { return true; } -if (other == null || getClass() != other.getClass()) { +if (other == null) { return false; } -CharSequenceWrapper that = (CharSequenceWrapper) other; -return Comparators.charSequences().compare(wrapped, that.wrapped) == 0; +if (other instanceof CharSequence) { + return Comparators.charSequences().compare(wrapped, (CharSequence) other) == 0; +} else if (other instanceof CharSequenceWrapper) { + CharSequenceWrapper that = (CharSequenceWrapper) other; + return Comparators.charSequences().compare(wrapped, that.wrapped) == 0; +} else { + return false; +} } @Override public int hashCode() { -int result = 177; -for (int i = 0; i < wrapped.length(); i += 1) { - char ch = wrapped.charAt(i); - result = 31 * result + (int) ch; -} -return result; +return wrapped.hashCode(); Review comment: Why did this change? There is no guarantee that implementations of `CharSequence` produce the same `hashCode`, so this breaks the contract of equals/hashCode. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314868900 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import org.apache.iceberg.util.CharSequenceWrapper; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +class LiteralSet implements Set, Serializable { + private final Set values; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +values = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) CharSequenceWrapper.wrap((CharSequence) lit.value()); Review comment: If you cast `lit` to `StringLiteral` instead of casting the value, this won't require suppressing unchecked warnings because the `StringLiteral` cast is checked. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314867459 ## File path: api/src/main/java/org/apache/iceberg/expressions/Binder.java ## @@ -140,6 +140,11 @@ public Expression or(Expression leftResult, Expression rightResult) { throw new IllegalStateException("Found already bound predicate: " + pred); } +@Override +public Expression predicate(BoundSetPredicate pred) { Review comment: Let's remove this method. Right now, all of the implementations of it are identical to `predicate(BoundPredicate)`. I'm not sure that we will need this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314831880 ## File path: parquet/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java ## @@ -285,12 +284,12 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { +public Boolean in(BoundReference ref, LiteralSet literalSet) { return ROWS_MIGHT_MATCH; Review comment: Implementing these in a follow-up sounds good to me. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314831740 ## File path: api/src/main/java/org/apache/iceberg/expressions/Expressions.java ## @@ -109,16 +111,46 @@ public static Expression not(Expression child) { return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value); } + public static UnboundPredicate in(String name, T value, T... values) { +return predicate(Operation.IN, name, +Stream.concat(Stream.of(value), Stream.of(values)) +.map(Literals::from).collect(Collectors.toSet())); + } + + public static UnboundPredicate notIn(String name, T value, T... values) { Review comment: Also, can you add a variant that accepts `Collection`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314831603 ## File path: api/src/main/java/org/apache/iceberg/expressions/Expressions.java ## @@ -109,16 +111,46 @@ public static Expression not(Expression child) { return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value); } + public static UnboundPredicate in(String name, T value, T... values) { +return predicate(Operation.IN, name, +Stream.concat(Stream.of(value), Stream.of(values)) +.map(Literals::from).collect(Collectors.toSet())); + } + + public static UnboundPredicate notIn(String name, T value, T... values) { Review comment: I'd rather return `Expression` because there are cases where the caller might not know what is in the set and simply pass through. Forcing the caller to check whether the set is empty instead of returning an equivalent wouldn't be as easy to use. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314830695 ## File path: api/src/main/java/org/apache/iceberg/expressions/Literals.java ## @@ -109,6 +110,26 @@ public T value() { public String toString() { return String.valueOf(value); } + +@Override +@SuppressWarnings("unchecked") +public boolean equals(Object other) { Review comment: Used for deduplication where? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r314830563 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { Review comment: That makes sense, so I don't mind leaving this in the PR. Let's just make sure we don't add `equals` to the predicate methods. `TestExpressionSerialization` implements its own equality check to avoid adding `equals` to `Expression` because it isn't clear what equality means for a predicate and my be misinterpreted to be that two predicates accept the same set of values. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313978735 ## File path: api/src/main/java/org/apache/iceberg/expressions/Expressions.java ## @@ -109,16 +111,46 @@ public static Expression not(Expression child) { return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value); } + public static UnboundPredicate in(String name, T value, T... values) { +return predicate(Operation.IN, name, +Stream.concat(Stream.of(value), Stream.of(values)) +.map(Literals::from).collect(Collectors.toSet())); + } + + public static UnboundPredicate notIn(String name, T value, T... values) { Review comment: I'd rather have the signature of these factory methods use just `T...`. I see why you'd do this to ensure that there is at least one value, but this is the primary way to create predicates and this is difficult to use if you're converting from something else because when you have to separate the first element from a set of values. So to make it simpler for callers, this should accept any `T... values`. If values is empty, then it should return `alwaysTrue` (or `alwaysFalse` for `in`), and return an UnboundPredicate for anything else. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313978864 ## File path: api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java ## @@ -246,12 +246,12 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { +public Boolean in(BoundReference ref, LiteralSet literalSet) { return ROWS_MIGHT_MATCH; Review comment: This should be implemented as well. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313970266 ## File path: parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java ## @@ -330,12 +324,12 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { +public Boolean in(BoundReference ref, LiteralSet literalSet) { return ROWS_MIGHT_MATCH; Review comment: Same here This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313970180 ## File path: parquet/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java ## @@ -285,12 +284,12 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { +public Boolean in(BoundReference ref, LiteralSet literalSet) { return ROWS_MIGHT_MATCH; Review comment: I think this should be implemented as well. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313966518 ## File path: api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java ## @@ -125,13 +154,38 @@ public Expression bind(Types.StructType struct, boolean caseSensitive) { case LT_EQ: case EQ: return Expressions.alwaysFalse(); -//case IN: -// break; -//case NOT_IN: -// break; } } return new BoundPredicate<>(op(), new BoundReference<>(field.fieldId(), -schema.accessorForField(field.fieldId())), lit); +schema.accessorForField(field.fieldId())), lit); + } + + @SuppressWarnings("unchecked") + private Expression bindInOperation(Types.NestedField field, Schema schema) { +final Set> lits = literalSet().stream().map( +val -> { + if (val instanceof LiteralSet.CharSeqWrapper) { +val = (T) ((LiteralSet.CharSeqWrapper) val).unWrap(); + } + Literal lit = Literals.from(val).to(field.type()); Review comment: I don't think this should reconstruct literals. Instead, I think `UnboundPredicate` should have a `List> literals` that gets transformed. That would work for all operations. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313964026 ## File path: api/src/main/java/org/apache/iceberg/expressions/Predicate.java ## @@ -19,15 +19,49 @@ package org.apache.iceberg.expressions; +import com.google.common.base.Preconditions; +import java.util.Set; + public abstract class Predicate implements Expression { private final Operation op; private final R ref; private final Literal literal; + private final LiteralSet literalSet; Review comment: I think the only place where these are used is in `toString`, right? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313963715 ## File path: api/src/main/java/org/apache/iceberg/expressions/Predicate.java ## @@ -19,15 +19,49 @@ package org.apache.iceberg.expressions; +import com.google.common.base.Preconditions; +import java.util.Set; + public abstract class Predicate implements Expression { private final Operation op; private final R ref; private final Literal literal; + private final LiteralSet literalSet; Review comment: Why are these stored in the superclass? It is strange to have both `literal` and `literalSet` in all implementations. Can these be fields in the concrete implementations? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313963009 ## File path: api/src/main/java/org/apache/iceberg/expressions/ManifestEvaluator.java ## @@ -245,12 +245,12 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { +public Boolean in(BoundReference ref, LiteralSet literalSet) { return ROWS_MIGHT_MATCH; Review comment: Since we can translate an `in` predicate to equality predicates or'ed together, this should call `eq` and or the results together. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313961855 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { +if (this == other) { + return true; +} +if (other == null || getClass() != other.getClass()) { + return false; +} +LiteralSet that = (LiteralSet) other; +return literals.equals(that.literals); + } + + @Override + public int hashCode() { +return Objects.hashCode(literals); + } + + @Override + public boolean contains(Object object) { +return literals.contains(object); + } + + @Override + public int size() { +return literals.size(); + } + + @Override + public boolean isEmpty() { +return literals.isEmpty(); + } + + @Override + public Iterator iterator() { +return literals.iterator(); + } + + @Override + public Object[] toArray() { +return literals.toArray(); + } + + @Override + public X[] toArray(X[] a) { +return literals.toArray(a); Review comment: Should this return the original `CharSequence` if it contains wrappers? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313961395 ## File path: api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java ## @@ -89,12 +93,12 @@ public R or(R leftResult, R rightResult) { return null; } -public R in(BoundReference ref, Literal lit) { - return null; +public R in(BoundReference ref, LiteralSet literalSet) { Review comment: I don't think this needs to leak the `LiteralSet` implementation class through the public API. That should be a package-private class and this can use `Set` instead. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313961073 ## File path: api/src/main/java/org/apache/iceberg/expressions/Literals.java ## @@ -109,6 +110,26 @@ public T value() { public String toString() { return String.valueOf(value); } + +@Override +@SuppressWarnings("unchecked") +public boolean equals(Object other) { Review comment: Is this still needed? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313959744 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { +if (this == other) { + return true; +} +if (other == null || getClass() != other.getClass()) { + return false; +} +LiteralSet that = (LiteralSet) other; +return literals.equals(that.literals); + } + + @Override + public int hashCode() { +return Objects.hashCode(literals); + } + + @Override + public boolean contains(Object object) { +return literals.contains(object); + } + + @Override + public int size() { +return literals.size(); + } + + @Override + public boolean isEmpty() { +return literals.isEmpty(); + } + + @Override + public Iterator iterator() { +return literals.iterator(); + } + + @Override + public Object[] toArray() { +return literals.toArray(); + } + + @Override + public X[] toArray(X[] a) { +return literals.toArray(a); + } + + @Override + public boolean containsAll(Collection c) { +return literals.containsAll(c); + } + + @Override + public boolean add(T t) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean remove(Object o) { +throw new UnsupportedOperationException(); + } + + + @Override + public boolean addAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean retainAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean removeAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public void clear() { +throw new UnsupportedOperationException(); + } + + static class CharSeqWrapper implements CharSequence, Serializable { +private static final Comparator CMP = + Comparators.nullsFirst().thenComparing(Comparators.charSequences()); + +private final CharSequence chars; + +CharSeqWrapper(CharSequence charSequence) { + Preconditions.checkNotNull(charSequence, "String literal values cannot be null"); + this.chars = charSequence; +} + +@Override +public int length() { + return chars.length(); +} + +@Override +public char charAt(int index) { + return chars.charAt(index); +} + +@Override +public CharSequence subSequence(int start, int end) { + return
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313959518 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { +if (this == other) { + return true; +} +if (other == null || getClass() != other.getClass()) { + return false; +} +LiteralSet that = (LiteralSet) other; +return literals.equals(that.literals); + } + + @Override + public int hashCode() { +return Objects.hashCode(literals); + } + + @Override + public boolean contains(Object object) { +return literals.contains(object); + } + + @Override + public int size() { +return literals.size(); + } + + @Override + public boolean isEmpty() { +return literals.isEmpty(); + } + + @Override + public Iterator iterator() { +return literals.iterator(); + } + + @Override + public Object[] toArray() { +return literals.toArray(); + } + + @Override + public X[] toArray(X[] a) { +return literals.toArray(a); + } + + @Override + public boolean containsAll(Collection c) { +return literals.containsAll(c); + } + + @Override + public boolean add(T t) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean remove(Object o) { +throw new UnsupportedOperationException(); + } + + + @Override + public boolean addAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean retainAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean removeAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public void clear() { +throw new UnsupportedOperationException(); + } + + static class CharSeqWrapper implements CharSequence, Serializable { Review comment: You can add the `CharSequence` methods to the other wrapper. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail:
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313958393 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { +if (this == other) { + return true; +} +if (other == null || getClass() != other.getClass()) { + return false; +} +LiteralSet that = (LiteralSet) other; +return literals.equals(that.literals); + } + + @Override + public int hashCode() { +return Objects.hashCode(literals); + } + + @Override + public boolean contains(Object object) { +return literals.contains(object); + } + + @Override + public int size() { +return literals.size(); + } + + @Override + public boolean isEmpty() { +return literals.isEmpty(); + } + + @Override + public Iterator iterator() { +return literals.iterator(); + } + + @Override + public Object[] toArray() { +return literals.toArray(); + } + + @Override + public X[] toArray(X[] a) { +return literals.toArray(a); + } + + @Override + public boolean containsAll(Collection c) { +return literals.containsAll(c); + } + + @Override + public boolean add(T t) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean remove(Object o) { +throw new UnsupportedOperationException(); + } + + + @Override + public boolean addAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean retainAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public boolean removeAll(Collection c) { +throw new UnsupportedOperationException(); + } + + @Override + public void clear() { +throw new UnsupportedOperationException(); + } + + static class CharSeqWrapper implements CharSequence, Serializable { Review comment: I think this should use the existing `CharSequenceWrapper`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail:
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313957904 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); +if (!it.hasNext()) { + return "{}"; +} + +StringBuilder sb = new StringBuilder(); +sb.append('{'); +while (true) { + sb.append(it.next()); + if (!it.hasNext()) { +return sb.append('}').toString(); + } + sb.append(',').append(' '); +} + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object other) { Review comment: Is it necessary to implement `equals` and `hashCode`? Why are these sets compared? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313957552 ## File path: api/src/main/java/org/apache/iceberg/expressions/LiteralSet.java ## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.expressions; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.io.Serializable; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.types.Comparators; + +/** + * Represents a set of literal values in an IN or NOT_IN predicate + * @param The Java type of the value, which can be wrapped by a {@link Literal} + */ +public class LiteralSet implements Set, Serializable { + private final Set literals; + + @SuppressWarnings("unchecked") + LiteralSet(Set> lits) { +Preconditions.checkArgument(lits == null || lits.size() > 1, +"The input literal set must include more than 1 element."); +literals = ImmutableSet.builder().addAll( +lits.stream().map( +lit -> { + if (lit instanceof Literals.StringLiteral) { +return (T) new CharSeqWrapper((CharSequence) lit.value()); + } else { +return lit.value(); + } +} +).iterator()).build(); + } + + @Override + public String toString() { +Iterator it = literals.iterator(); Review comment: I recommend using a Guava `Joiner` here to make this simpler: ``` return "{ " + Joiner.on(", ").join(literals) + " }"; ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313633929 ## File path: api/src/main/java/org/apache/iceberg/expressions/Evaluator.java ## @@ -134,18 +134,18 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { - throw new UnsupportedOperationException("In is not supported yet"); +public Boolean in(BoundReference ref, LiteralSet literalSet) { + return literalSet.contains(ref.get(struct)); } @Override -public Boolean notIn(BoundReference ref, Literal lit) { - return !in(ref, lit); +public Boolean notIn(BoundReference ref, LiteralSet literalSet) { + return !in(ref, literalSet); } @Override public Boolean startsWith(BoundReference ref, Literal lit) { - return ((String) ref.get(struct)).startsWith((String) lit.value()); + return ((String) ref.get(struct)).startsWith(lit.value().toString()); Review comment: Why did the type used by the literal change? I don't think it needs to. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313499455 ## File path: api/src/main/java/org/apache/iceberg/expressions/Evaluator.java ## @@ -134,18 +134,18 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { - throw new UnsupportedOperationException("In is not supported yet"); +public Boolean in(BoundReference ref, LiteralSet literalSet) { + return literalSet.contains(ref.get(struct)); } @Override -public Boolean notIn(BoundReference ref, Literal lit) { - return !in(ref, lit); +public Boolean notIn(BoundReference ref, LiteralSet literalSet) { + return !in(ref, literalSet); } @Override public Boolean startsWith(BoundReference ref, Literal lit) { - return ((String) ref.get(struct)).startsWith((String) lit.value()); + return ((String) ref.get(struct)).startsWith(lit.value().toString()); Review comment: This isn't a related change, and the literal is guaranteed to be a string at this point. I don't think this should be included in this PR. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313498614 ## File path: api/src/main/java/org/apache/iceberg/expressions/Predicate.java ## @@ -19,15 +19,44 @@ package org.apache.iceberg.expressions; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + public abstract class Predicate implements Expression { private final Operation op; private final R ref; private final Literal literal; + private final Set> literalSet; Review comment: > UnboundPredicate for IN predicate with a single item should return an EQ predicate. The separation will complicate it. I disagree because that over-complicates the predicate classes. The factory methods in `Expressions` are allowed to change the predicate that is returned; for example, `and(true, p)` returns just `p`. That's what we should use to return an equality predicate. The predicate class itself should not include logic for changing itself. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313093437 ## File path: api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java ## @@ -89,11 +91,11 @@ public R or(R leftResult, R rightResult) { return null; } -public R in(BoundReference ref, Literal lit) { +public R in(BoundReference ref, Set> lits) { return null; Review comment: Let's make that change separately, not in this commit. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [incubator-iceberg] rdblue commented on a change in pull request #357: Add in and not in predicates
rdblue commented on a change in pull request #357: Add in and not in predicates URL: https://github.com/apache/incubator-iceberg/pull/357#discussion_r313093251 ## File path: api/src/main/java/org/apache/iceberg/expressions/Evaluator.java ## @@ -134,13 +135,13 @@ public Boolean or(Boolean leftResult, Boolean rightResult) { } @Override -public Boolean in(BoundReference ref, Literal lit) { - throw new UnsupportedOperationException("In is not supported yet"); +public Boolean in(BoundReference ref, Set> lits) { + return lits.contains(Literals.from(ref.get(struct))); Review comment: Be careful with strings. You'll probably need to use `Set` so that all `CharSequence` instances are compared correctly. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org