This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new b9eaf05115 [core] Support IsNaN predicate pushdown with Parquet (#7875)
b9eaf05115 is described below

commit b9eaf051153357257e3634b50e4a7976bc023f5d
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon May 18 11:42:19 2026 +0530

    [core] Support IsNaN predicate pushdown with Parquet (#7875)
    
    - Paimon has no IsNaN predicate today, add support for the same with
    Paimon evaluation and parquet pushdown
    - Add IsNaN as a new LeafUnaryFunction, which can evaluate rows and
    pushdown to Parquet for double and float
     - ParquetFilters pushes down to Parquet via new UserDefinedPredicate.
---
 .../apache/paimon/predicate/FunctionVisitor.java   |  4 ++
 .../java/org/apache/paimon/predicate/IsNaN.java    | 68 ++++++++++++++++++++++
 .../apache/paimon/predicate/PredicateBuilder.java  |  8 +++
 .../org/apache/paimon/predicate/PredicateTest.java | 28 +++++++++
 .../parquet/filter2/predicate/ParquetFilters.java  | 57 ++++++++++++++++++
 .../paimon/format/parquet/ParquetFiltersTest.java  | 32 ++++++++++
 6 files changed, 197 insertions(+)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
index 5aa4ca1373..f7040dae06 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
@@ -54,6 +54,10 @@ public interface FunctionVisitor<T> extends 
PredicateVisitor<T> {
 
     T visitIsNull(FieldRef fieldRef);
 
+    default T visitIsNaN(FieldRef fieldRef) {
+        throw new UnsupportedOperationException();
+    }
+
     // ----------------- Binary functions ------------------------
 
     T visitStartsWith(FieldRef fieldRef, Object literal);
diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java
new file mode 100644
index 0000000000..42d3a40832
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.predicate;
+
+import org.apache.paimon.types.DataType;
+
+import 
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.util.List;
+import java.util.Optional;
+
+/** A LeafUnaryFunction to evaluate field is nan for float and double columns. 
*/
+public class IsNaN extends LeafUnaryFunction {
+
+    public static final String NAME = "IS_NAN";
+
+    public static final IsNaN INSTANCE = new IsNaN();
+
+    @JsonCreator
+    private IsNaN() {}
+
+    @Override
+    public boolean test(DataType type, Object field) {
+        if (field instanceof Float) {
+            return Float.isNaN((Float) field);
+        }
+        if (field instanceof Double) {
+            return Double.isNaN((Double) field);
+        }
+        return false;
+    }
+
+    @Override
+    public boolean test(DataType type, long rowCount, Object min, Object max, 
Long nullCount) {
+        return true;
+    }
+
+    @Override
+    public Optional<LeafFunction> negate() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, 
List<Object> literals) {
+        return visitor.visitIsNaN(fieldRef);
+    }
+
+    @Override
+    public String toJson() {
+        return NAME;
+    }
+}
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
index c4343475b4..05acce1729 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
@@ -138,6 +138,14 @@ public class PredicateBuilder {
         return leaf(IsNotNull.INSTANCE, transform);
     }
 
+    public Predicate isNaN(int idx) {
+        return leaf(IsNaN.INSTANCE, idx);
+    }
+
+    public Predicate isNaN(Transform transform) {
+        return leaf(IsNaN.INSTANCE, transform);
+    }
+
     public Predicate startsWith(int idx, Object patternLiteral) {
         return leaf(StartsWith.INSTANCE, idx, patternLiteral);
     }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java 
b/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java
index 5bece36654..0e67372d72 100644
--- a/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java
+++ b/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java
@@ -23,6 +23,8 @@ import org.apache.paimon.data.GenericRow;
 import org.apache.paimon.format.SimpleColStats;
 import org.apache.paimon.types.CharType;
 import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.DoubleType;
+import org.apache.paimon.types.FloatType;
 import org.apache.paimon.types.IntType;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.types.VarCharType;
@@ -295,6 +297,32 @@ public class PredicateTest {
         
assertThat(predicate.negate().orElse(null)).isEqualTo(builder.isNull(0));
     }
 
+    @Test
+    public void testIsNaNDouble() {
+        PredicateBuilder builder = new PredicateBuilder(RowType.of(new 
DoubleType()));
+        Predicate predicate = builder.isNaN(0);
+
+        assertThat(predicate.test(GenericRow.of(Double.NaN))).isEqualTo(true);
+        assertThat(predicate.test(GenericRow.of(1.5))).isEqualTo(false);
+        
assertThat(predicate.test(GenericRow.of(Double.POSITIVE_INFINITY))).isEqualTo(false);
+        assertThat(predicate.test(GenericRow.of((Object) 
null))).isEqualTo(false);
+
+        assertThat(test(predicate, 3, new SimpleColStats[] {new 
SimpleColStats(0.0, 1.0, 0L)}))
+                .isEqualTo(true);
+
+        assertThat(predicate.negate()).isEmpty();
+    }
+
+    @Test
+    public void testIsNaNFloat() {
+        PredicateBuilder builder = new PredicateBuilder(RowType.of(new 
FloatType()));
+        Predicate predicate = builder.isNaN(0);
+
+        assertThat(predicate.test(GenericRow.of(Float.NaN))).isEqualTo(true);
+        assertThat(predicate.test(GenericRow.of(1.5f))).isEqualTo(false);
+        assertThat(predicate.test(GenericRow.of((Object) 
null))).isEqualTo(false);
+    }
+
     @Test
     public void testIn() {
         PredicateBuilder builder = new PredicateBuilder(RowType.of(new 
IntType()));
diff --git 
a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
 
b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
index dacd12f492..29feeb1b5e 100644
--- 
a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
+++ 
b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
@@ -52,8 +52,11 @@ import org.apache.paimon.types.VariantType;
 import org.apache.paimon.types.VectorType;
 
 import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.predicate.Operators.DoubleColumn;
+import org.apache.parquet.filter2.predicate.Operators.FloatColumn;
 import org.apache.parquet.io.api.Binary;
 
+import java.io.Serializable;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -97,6 +100,18 @@ public class ParquetFilters {
             return new Operators.Eq<>(toParquetColumn(fieldRef), null);
         }
 
+        @Override
+        public FilterPredicate visitIsNaN(FieldRef fieldRef) {
+            Operators.Column<?> column = toParquetColumn(fieldRef);
+            if (column instanceof DoubleColumn) {
+                return FilterApi.userDefined((DoubleColumn) column, new 
IsNaNDoublePredicate());
+            }
+            if (column instanceof FloatColumn) {
+                return FilterApi.userDefined((FloatColumn) column, new 
IsNaNFloatPredicate());
+            }
+            throw new UnsupportedOperationException();
+        }
+
         @Override
         public FilterPredicate visitLessThan(FieldRef fieldRef, Object 
literal) {
             return new Operators.Lt(
@@ -441,4 +456,46 @@ public class ParquetFilters {
             throw new UnsupportedOperationException();
         }
     }
+
+    /** user defined predicate that keeps double rows where the value is nan. 
*/
+    public static class IsNaNDoublePredicate extends 
UserDefinedPredicate<Double>
+            implements Serializable {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public boolean keep(Double value) {
+            return value != null && Double.isNaN(value);
+        }
+
+        @Override
+        public boolean canDrop(Statistics<Double> statistics) {
+            return false;
+        }
+
+        @Override
+        public boolean inverseCanDrop(Statistics<Double> statistics) {
+            return false;
+        }
+    }
+
+    /** user defined predicate that keeps float rows where the value is nan. */
+    public static class IsNaNFloatPredicate extends UserDefinedPredicate<Float>
+            implements Serializable {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public boolean keep(Float value) {
+            return value != null && Float.isNaN(value);
+        }
+
+        @Override
+        public boolean canDrop(Statistics<Float> statistics) {
+            return false;
+        }
+
+        @Override
+        public boolean inverseCanDrop(Statistics<Float> statistics) {
+            return false;
+        }
+    }
 }
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java
index 18fe1ef28c..4fdd1e3927 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java
@@ -198,6 +198,38 @@ class ParquetFiltersTest {
                 true);
     }
 
+    @Test
+    public void testIsNaNDouble() {
+        PredicateBuilder builder =
+                new PredicateBuilder(
+                        new RowType(
+                                Collections.singletonList(
+                                        new DataField(0, "d1", new 
DoubleType()))));
+
+        FilterCompat.Filter filter =
+                
ParquetFilters.convert(Collections.singletonList(builder.isNaN(0)));
+        FilterPredicateCompat compat = (FilterPredicateCompat) filter;
+        assertThat(compat.getFilterPredicate().toString())
+                .contains(
+                        "userdefinedbyinstance(d1, 
org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNDoublePredicate");
+    }
+
+    @Test
+    public void testIsNaNFloat() {
+        PredicateBuilder builder =
+                new PredicateBuilder(
+                        new RowType(
+                                Collections.singletonList(
+                                        new DataField(0, "f1", new 
FloatType()))));
+
+        FilterCompat.Filter filter =
+                
ParquetFilters.convert(Collections.singletonList(builder.isNaN(0)));
+        FilterPredicateCompat compat = (FilterPredicateCompat) filter;
+        assertThat(compat.getFilterPredicate().toString())
+                .contains(
+                        "userdefinedbyinstance(f1, 
org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNFloatPredicate");
+    }
+
     @Test
     public void testInFilterFloat() {
         PredicateBuilder builder =

Reply via email to