This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 9caf2e115 [spark] Support push down StringContains (#3918)
9caf2e115 is described below

commit 9caf2e115c68a290b56a15c9eb3ca470ebcd4075
Author: Xiduo You <[email protected]>
AuthorDate: Thu Aug 8 19:09:03 2024 +0800

    [spark] Support push down StringContains (#3918)
---
 .../apache/paimon/fileindex/FileIndexReader.java   |  5 ++
 .../fileindex/empty/EmptyFileIndexReader.java      |  5 ++
 .../java/org/apache/paimon/predicate/Contains.java | 60 ++++++++++++++++++++++
 .../apache/paimon/predicate/FunctionVisitor.java   |  2 +
 .../predicate/OnlyPartitionKeyEqualVisitor.java    |  5 ++
 .../apache/paimon/predicate/PredicateBuilder.java  |  4 ++
 .../orc/filter/OrcPredicateFunctionVisitor.java    |  5 ++
 .../parquet/filter2/predicate/ParquetFilters.java  |  5 ++
 .../apache/paimon/spark/SparkFilterConverter.java  |  9 +++-
 .../paimon/spark/SparkFilterConverterTest.java     |  8 +++
 10 files changed, 107 insertions(+), 1 deletion(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexReader.java 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexReader.java
index e6aab8da5..d715e6465 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexReader.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexReader.java
@@ -51,6 +51,11 @@ public abstract class FileIndexReader implements 
FunctionVisitor<FileIndexResult
         return REMAIN;
     }
 
+    @Override
+    public FileIndexResult visitContains(FieldRef fieldRef, Object literal) {
+        return REMAIN;
+    }
+
     @Override
     public FileIndexResult visitLessThan(FieldRef fieldRef, Object literal) {
         return REMAIN;
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/empty/EmptyFileIndexReader.java
 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/empty/EmptyFileIndexReader.java
index b417b9764..7d1f03520 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/fileindex/empty/EmptyFileIndexReader.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/fileindex/empty/EmptyFileIndexReader.java
@@ -52,6 +52,11 @@ public class EmptyFileIndexReader extends FileIndexReader {
         return SKIP;
     }
 
+    @Override
+    public FileIndexResult visitContains(FieldRef fieldRef, Object literal) {
+        return SKIP;
+    }
+
     @Override
     public FileIndexResult visitLessThan(FieldRef fieldRef, Object literal) {
         return SKIP;
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/Contains.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/Contains.java
new file mode 100644
index 000000000..bf97e2447
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/predicate/Contains.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.predicate;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.types.DataType;
+
+import java.util.List;
+import java.util.Optional;
+
+/** A {@link NullFalseLeafBinaryFunction} to evaluate {@code filter like 
'%abc%'}. */
+public class Contains extends NullFalseLeafBinaryFunction {
+
+    public static final Contains INSTANCE = new Contains();
+
+    private Contains() {}
+
+    @Override
+    public boolean test(DataType type, Object field, Object patternLiteral) {
+        BinaryString fieldString = (BinaryString) field;
+        return fieldString.contains((BinaryString) patternLiteral);
+    }
+
+    @Override
+    public boolean test(
+            DataType type,
+            long rowCount,
+            Object min,
+            Object max,
+            Long nullCount,
+            Object patternLiteral) {
+        return true;
+    }
+
+    @Override
+    public Optional<LeafFunction> negate() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, 
List<Object> literals) {
+        return visitor.visitContains(fieldRef, literals.get(0));
+    }
+}
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
index c29b001f0..49207cce5 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java
@@ -52,6 +52,8 @@ public interface FunctionVisitor<T> extends 
PredicateVisitor<T> {
 
     T visitEndsWith(FieldRef fieldRef, Object literal);
 
+    T visitContains(FieldRef fieldRef, Object literal);
+
     T visitLessThan(FieldRef fieldRef, Object literal);
 
     T visitGreaterOrEqual(FieldRef fieldRef, Object literal);
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
 
b/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
index 83670c99a..1eda670db 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
@@ -62,6 +62,11 @@ public class OnlyPartitionKeyEqualVisitor implements 
FunctionVisitor<Boolean> {
         return false;
     }
 
+    @Override
+    public Boolean visitContains(FieldRef fieldRef, Object literal) {
+        return false;
+    }
+
     @Override
     public Boolean visitLessThan(FieldRef fieldRef, Object literal) {
         return false;
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java 
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
index c54fc31b1..f3d0b42bd 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java
@@ -112,6 +112,10 @@ public class PredicateBuilder {
         return leaf(EndsWith.INSTANCE, idx, patternLiteral);
     }
 
+    public Predicate contains(int idx, Object patternLiteral) {
+        return leaf(Contains.INSTANCE, idx, patternLiteral);
+    }
+
     public Predicate leaf(NullFalseLeafBinaryFunction function, int idx, 
Object literal) {
         DataField field = rowType.getFields().get(idx);
         return new LeafPredicate(function, field.type(), idx, field.name(), 
singletonList(literal));
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
index 81226dad5..935b48210 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcPredicateFunctionVisitor.java
@@ -69,6 +69,11 @@ public class OrcPredicateFunctionVisitor
         return Optional.empty();
     }
 
+    @Override
+    public Optional<OrcFilters.Predicate> visitContains(FieldRef fieldRef, 
Object literal) {
+        return Optional.empty();
+    }
+
     @Override
     public Optional<OrcFilters.Predicate> visitLessThan(FieldRef fieldRef, 
Object literal) {
         return convertBinary(fieldRef, literal, OrcFilters.LessThan::new);
diff --git 
a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
 
b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
index 27850ae9a..cacc241fd 100644
--- 
a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
+++ 
b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java
@@ -146,6 +146,11 @@ public class ParquetFilters {
             throw new UnsupportedOperationException();
         }
 
+        @Override
+        public FilterPredicate visitContains(FieldRef fieldRef, Object 
literal) {
+            throw new UnsupportedOperationException();
+        }
+
         @Override
         public FilterPredicate visitIn(FieldRef fieldRef, List<Object> 
literals) {
             throw new UnsupportedOperationException();
diff --git 
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkFilterConverter.java
 
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkFilterConverter.java
index 7d0499489..ea61f6f7d 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkFilterConverter.java
+++ 
b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkFilterConverter.java
@@ -36,6 +36,7 @@ import org.apache.spark.sql.sources.LessThan;
 import org.apache.spark.sql.sources.LessThanOrEqual;
 import org.apache.spark.sql.sources.Not;
 import org.apache.spark.sql.sources.Or;
+import org.apache.spark.sql.sources.StringContains;
 import org.apache.spark.sql.sources.StringEndsWith;
 import org.apache.spark.sql.sources.StringStartsWith;
 
@@ -63,7 +64,8 @@ public class SparkFilterConverter {
                     "Or",
                     "Not",
                     "StringStartsWith",
-                    "StringEndsWith");
+                    "StringEndsWith",
+                    "StringContains");
 
     private final RowType rowType;
     private final PredicateBuilder builder;
@@ -148,6 +150,11 @@ public class SparkFilterConverter {
             int index = fieldIndex(endsWith.attribute());
             Object literal = convertLiteral(index, endsWith.value());
             return builder.endsWith(index, literal);
+        } else if (filter instanceof StringContains) {
+            StringContains contains = (StringContains) filter;
+            int index = fieldIndex(contains.attribute());
+            Object literal = convertLiteral(index, contains.value());
+            return builder.contains(index, literal);
         }
 
         // TODO: AlwaysTrue, AlwaysFalse
diff --git 
a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java
 
b/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java
index 3242c696c..8ddb96569 100644
--- 
a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java
+++ 
b/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java
@@ -40,6 +40,7 @@ import org.apache.spark.sql.sources.IsNull;
 import org.apache.spark.sql.sources.LessThan;
 import org.apache.spark.sql.sources.LessThanOrEqual;
 import org.apache.spark.sql.sources.Not;
+import org.apache.spark.sql.sources.StringContains;
 import org.apache.spark.sql.sources.StringEndsWith;
 import org.apache.spark.sql.sources.StringStartsWith;
 import org.junit.jupiter.api.Test;
@@ -167,6 +168,13 @@ public class SparkFilterConverterTest {
         boolean test1 = endsWithPre.test(10, min, max, new 
GenericArray(nullCount));
         assertThat(test).isEqualTo(true);
         assertThat(test1).isEqualTo(true);
+
+        // StringContains
+        StringContains stringContains = StringContains.apply("id", "aa");
+        Predicate contains = converter01.convert(stringContains);
+        assertThat(contains.test(row)).isEqualTo(true);
+        assertThat(contains.test(max)).isEqualTo(false);
+        assertThat(contains.test(min)).isEqualTo(true);
     }
 
     @Test

Reply via email to