rdblue commented on code in PR #7886:
URL: https://github.com/apache/iceberg/pull/7886#discussion_r1257536110
##########
spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/SparkV2Filters.java:
##########
@@ -360,10 +399,73 @@ private static boolean hasNoInFilter(Predicate predicate)
{
}
private static boolean isSupportedInPredicate(Predicate predicate) {
- if (!isRef(childAtIndex(predicate, 0))) {
+ if (!couldConvert(childAtIndex(predicate, 0))) {
return false;
} else {
return
Arrays.stream(predicate.children()).skip(1).allMatch(SparkV2Filters::isLiteral);
}
}
+
+ /** Should be called after {@link #couldConvert} passed */
+ private static <T> UnboundTerm<Object> toTerm(T input) {
+ if (input instanceof NamedReference) {
+ return Expressions.ref(SparkUtil.toColumnName((NamedReference) input));
+ } else {
+ return udfToTerm((UserDefinedScalarFunc) input);
+ }
+ }
+
+ private static UnboundTerm<Object> udfToTerm(UserDefinedScalarFunc udf) {
+ switch (udf.name().toLowerCase(Locale.ROOT)) {
+ case "years":
+ Preconditions.checkArgument(
+ udf.children().length == 1, "years function should have only one
children (column)");
+ Preconditions.checkArgument(
+ isRef(udf.children()[0]),
+ "The child of years function should be type of NamedReference");
+ return year(SparkUtil.toColumnName((NamedReference)
udf.children()[0]));
+ case "months":
+ Preconditions.checkArgument(
+ udf.children().length == 1, "months function should have only one
children (column)");
+ Preconditions.checkArgument(
+ isRef(udf.children()[0]),
+ "The child of months function should be type of NamedReference");
+ return month(SparkUtil.toColumnName((NamedReference)
udf.children()[0]));
+ case "days":
+ Preconditions.checkArgument(
+ udf.children().length == 1, "days function should have only one
children (column)");
+ Preconditions.checkArgument(
+ isRef(udf.children()[0]),
+ "The child of days function should be type of NamedReference");
+ return day(SparkUtil.toColumnName((NamedReference) udf.children()[0]));
+ case "hours":
+ Preconditions.checkArgument(
+ udf.children().length == 1, "hours function should have only one
children (colum)");
+ Preconditions.checkArgument(
+ isRef(udf.children()[0]),
+ "The child of hours function should be type of NamedReference");
+ return hour(SparkUtil.toColumnName((NamedReference)
udf.children()[0]));
+ case "bucket":
+ Preconditions.checkArgument(
+ udf.children().length == 2,
+ "bucket function should have two children (numBuckets and
column)");
+ Preconditions.checkArgument(
+ isLiteral(udf.children()[0]) && isRef(udf.children()[1]),
+ "The children's type of bucket function should be Literal and
NamedReference");
+ int numBuckets = (Integer) convertLiteral((Literal<?>)
udf.children()[0]);
+ return bucket(SparkUtil.toColumnName((NamedReference)
udf.children()[1]), numBuckets);
+ case "truncate":
+ Preconditions.checkArgument(
+ udf.children().length == 2,
+ "truncate function should have two children (width and column)");
+ Preconditions.checkArgument(
+ isLiteral(udf.children()[0]) && isRef(udf.children()[1]),
+ "The children's type of truncate function should be Literal and
NamedReference");
+ int width = (Integer) convertLiteral((Literal<?>) udf.children()[0]);
+ return truncate(SparkUtil.toColumnName((NamedReference)
udf.children()[1]), width);
+ default:
+ // Should not reach here
+ throw new RuntimeException("Unsupported system function: " +
udf.canonicalName());
Review Comment:
This should return `null` and be handled like other places where conversion
fails. Conversion failure should prevent pushdown and should never cause the
query to fail with an exception.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]