This is an automated email from the ASF dual-hosted git repository.
leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 7efab4f feat: add predicate leaf functions (#33)
7efab4f is described below
commit 7efab4fbfcea6fe372ff6b0ba3aa74ed830c4bfb
Author: lxy <[email protected]>
AuthorDate: Mon Jun 1 17:59:33 2026 +0800
feat: add predicate leaf functions (#33)
* feat: add predicate leaf functions
* fix review for like
* fix clang tidy
---
src/paimon/common/predicate/and.cpp | 29 ++++
src/paimon/common/predicate/and.h | 116 ++++++++++++++++
src/paimon/common/predicate/contains.cpp | 26 ++++
src/paimon/common/predicate/contains.h | 48 +++++++
src/paimon/common/predicate/ends_with.cpp | 28 ++++
src/paimon/common/predicate/ends_with.h | 48 +++++++
src/paimon/common/predicate/equal.cpp | 30 ++++
src/paimon/common/predicate/equal.h | 66 +++++++++
src/paimon/common/predicate/greater_or_equal.cpp | 30 ++++
src/paimon/common/predicate/greater_or_equal.h | 62 +++++++++
src/paimon/common/predicate/greater_than.cpp | 34 +++++
src/paimon/common/predicate/greater_than.h | 60 ++++++++
src/paimon/common/predicate/in.cpp | 30 ++++
src/paimon/common/predicate/in.h | 82 +++++++++++
src/paimon/common/predicate/is_not_null.cpp | 30 ++++
src/paimon/common/predicate/is_not_null.h | 60 ++++++++
src/paimon/common/predicate/is_null.cpp | 30 ++++
src/paimon/common/predicate/is_null.h | 60 ++++++++
src/paimon/common/predicate/less_or_equal.cpp | 30 ++++
src/paimon/common/predicate/less_or_equal.h | 62 +++++++++
src/paimon/common/predicate/less_than.cpp | 34 +++++
src/paimon/common/predicate/less_than.h | 58 ++++++++
src/paimon/common/predicate/like.cpp | 169 +++++++++++++++++++++++
src/paimon/common/predicate/like.h | 48 +++++++
src/paimon/common/predicate/not_equal.cpp | 30 ++++
src/paimon/common/predicate/not_equal.h | 64 +++++++++
src/paimon/common/predicate/not_in.cpp | 30 ++++
src/paimon/common/predicate/not_in.h | 82 +++++++++++
src/paimon/common/predicate/or.cpp | 33 +++++
src/paimon/common/predicate/or.h | 111 +++++++++++++++
src/paimon/common/predicate/starts_with.cpp | 40 ++++++
src/paimon/common/predicate/starts_with.h | 53 +++++++
32 files changed, 1713 insertions(+)
diff --git a/src/paimon/common/predicate/and.cpp
b/src/paimon/common/predicate/and.cpp
new file mode 100644
index 0000000..d3d38e8
--- /dev/null
+++ b/src/paimon/common/predicate/and.cpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/and.h"
+
+#include "paimon/common/predicate/or.h"
+
+namespace paimon {
+
+const CompoundFunction& And::Negate() const {
+ return Or::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/and.h
b/src/paimon/common/predicate/and.h
new file mode 100644
index 0000000..b3663ea
--- /dev/null
+++ b/src/paimon/common/predicate/and.h
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "fmt/format.h"
+#include "paimon/common/predicate/compound_function.h"
+#include "paimon/common/predicate/predicate_filter.h"
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace arrow {
+class Schema;
+} // namespace arrow
+
+namespace paimon {
+class InternalArray;
+class InternalRow;
+
+/// A `CompoundFunction` to eval and.
+class And : public CompoundFunction {
+ public:
+ static const And& Instance() {
+ static const And instance = And();
+ return instance;
+ }
+
+ Result<std::vector<char>> Test(
+ const arrow::Array& array,
+ const std::vector<std::shared_ptr<Predicate>>& children) const
override {
+ std::vector<char> is_valid(array.length(), true);
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(std::vector<char> child_valid,
child_filter->Test(array));
+ for (size_t i = 0; i < is_valid.size(); i++) {
+ is_valid[i] = (is_valid[i] & child_valid[i]);
+ }
+ }
+ return is_valid;
+ }
+
+ Result<bool> Test(const std::shared_ptr<arrow::Schema>& schema, const
InternalRow& row,
+ const std::vector<std::shared_ptr<Predicate>>& children)
const override {
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(bool is_valid, child_filter->Test(schema,
row));
+ if (!is_valid) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ Result<bool> Test(const std::shared_ptr<arrow::Schema>& schema, int64_t
row_count,
+ const InternalRow& min_values, const InternalRow&
max_values,
+ const InternalArray& null_counts,
+ const std::vector<std::shared_ptr<Predicate>>& children)
const override {
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(bool is_valid, child_filter->Test(schema,
row_count, min_values,
+
max_values, null_counts));
+ if (!is_valid) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ Type GetType() const override {
+ return Type::AND;
+ }
+ const CompoundFunction& Negate() const override;
+ std::string ToString() const override {
+ return "And";
+ }
+
+ private:
+ And() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/contains.cpp
b/src/paimon/common/predicate/contains.cpp
new file mode 100644
index 0000000..333d9a7
--- /dev/null
+++ b/src/paimon/common/predicate/contains.cpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/contains.h"
+
+namespace paimon {
+
+Result<bool> Contains::TestString(const std::string& field, const std::string&
pattern) const {
+ return field.find(pattern) != std::string::npos;
+}
+} // namespace paimon
diff --git a/src/paimon/common/predicate/contains.h
b/src/paimon/common/predicate/contains.h
new file mode 100644
index 0000000..d620829
--- /dev/null
+++ b/src/paimon/common/predicate/contains.h
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "paimon/common/predicate/string_leaf_binary_function.h"
+#include "paimon/result.h"
+
+namespace paimon {
+/// A `StringLeafBinaryFunction` to eval filter like '%abc%'.
+class Contains : public StringLeafBinaryFunction {
+ public:
+ static const Contains& Instance() {
+ static const Contains instance = Contains();
+ return instance;
+ }
+
+ Type GetType() const override {
+ return Type::CONTAINS;
+ }
+
+ std::string ToString() const override {
+ return "Contains";
+ }
+
+ Result<bool> TestString(const std::string& field, const std::string&
pattern) const override;
+
+ private:
+ Contains() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/ends_with.cpp
b/src/paimon/common/predicate/ends_with.cpp
new file mode 100644
index 0000000..9020558
--- /dev/null
+++ b/src/paimon/common/predicate/ends_with.cpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/ends_with.h"
+
+#include "paimon/common/utils/string_utils.h"
+
+namespace paimon {
+
+Result<bool> EndsWith::TestString(const std::string& field, const std::string&
pattern) const {
+ return StringUtils::EndsWith(field, pattern);
+}
+} // namespace paimon
diff --git a/src/paimon/common/predicate/ends_with.h
b/src/paimon/common/predicate/ends_with.h
new file mode 100644
index 0000000..2d13a0f
--- /dev/null
+++ b/src/paimon/common/predicate/ends_with.h
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "paimon/common/predicate/string_leaf_binary_function.h"
+#include "paimon/result.h"
+
+namespace paimon {
+/// A `StringLeafBinaryFunction` to eval filter like '%abc' or filter like
'_abc'.
+class EndsWith : public StringLeafBinaryFunction {
+ public:
+ static const EndsWith& Instance() {
+ static const EndsWith instance = EndsWith();
+ return instance;
+ }
+
+ Type GetType() const override {
+ return Type::ENDS_WITH;
+ }
+
+ std::string ToString() const override {
+ return "EndsWith";
+ }
+
+ Result<bool> TestString(const std::string& field, const std::string&
pattern) const override;
+
+ private:
+ EndsWith() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/equal.cpp
b/src/paimon/common/predicate/equal.cpp
new file mode 100644
index 0000000..4260808
--- /dev/null
+++ b/src/paimon/common/predicate/equal.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/equal.h"
+
+#include "paimon/common/predicate/not_equal.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* Equal::Negate() const {
+ return &NotEqual::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/equal.h
b/src/paimon/common/predicate/equal.h
new file mode 100644
index 0000000..2c9b12c
--- /dev/null
+++ b/src/paimon/common/predicate/equal.h
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval equal.
+class Equal : public NullFalseLeafBinaryFunction {
+ public:
+ static const Equal& Instance() {
+ static const Equal instance = Equal();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res == 0;
+ }
+
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t min_res, literal.CompareTo(min_value));
+ PAIMON_ASSIGN_OR_RAISE(int32_t max_res, literal.CompareTo(max_value));
+ return min_res >= 0 && max_res <= 0;
+ }
+
+ Type GetType() const override {
+ return Type::EQUAL;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "Equal";
+ }
+
+ private:
+ Equal() = default;
+};
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/greater_or_equal.cpp
b/src/paimon/common/predicate/greater_or_equal.cpp
new file mode 100644
index 0000000..4b200e6
--- /dev/null
+++ b/src/paimon/common/predicate/greater_or_equal.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/greater_or_equal.h"
+
+#include "paimon/common/predicate/less_than.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* GreaterOrEqual::Negate() const {
+ return &LessThan::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/greater_or_equal.h
b/src/paimon/common/predicate/greater_or_equal.h
new file mode 100644
index 0000000..a0a205e
--- /dev/null
+++ b/src/paimon/common/predicate/greater_or_equal.h
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval greater or equal.
+class GreaterOrEqual : public NullFalseLeafBinaryFunction {
+ public:
+ static const GreaterOrEqual& Instance() {
+ static const GreaterOrEqual instance = GreaterOrEqual();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res >= 0;
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t res, literal.CompareTo(max_value));
+ return res <= 0;
+ }
+ Type GetType() const override {
+ return Type::GREATER_OR_EQUAL;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "GreaterOrEqual";
+ }
+
+ private:
+ GreaterOrEqual() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/greater_than.cpp
b/src/paimon/common/predicate/greater_than.cpp
new file mode 100644
index 0000000..7c678c1
--- /dev/null
+++ b/src/paimon/common/predicate/greater_than.cpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/greater_than.h"
+
+#include "paimon/common/predicate/less_or_equal.h"
+
+namespace paimon {
+class LeafFunction;
+
+const GreaterThan& GreaterThan::Instance() {
+ static const GreaterThan kInstance{};
+ return kInstance;
+}
+
+const LeafFunction* GreaterThan::Negate() const {
+ return &LessOrEqual::Instance();
+}
+} // namespace paimon
diff --git a/src/paimon/common/predicate/greater_than.h
b/src/paimon/common/predicate/greater_than.h
new file mode 100644
index 0000000..00db4f6
--- /dev/null
+++ b/src/paimon/common/predicate/greater_than.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval greater.
+class GreaterThan : public NullFalseLeafBinaryFunction {
+ public:
+ static const GreaterThan& Instance();
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res > 0;
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t res, literal.CompareTo(max_value));
+ return res < 0;
+ }
+ Type GetType() const override {
+ return Type::GREATER_THAN;
+ }
+ const LeafFunction* Negate() const override;
+
+ std::string ToString() const override {
+ return "GreaterThan";
+ }
+
+ private:
+ GreaterThan() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/in.cpp
b/src/paimon/common/predicate/in.cpp
new file mode 100644
index 0000000..e6ef547
--- /dev/null
+++ b/src/paimon/common/predicate/in.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/in.h"
+
+#include "paimon/common/predicate/not_in.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* In::Negate() const {
+ return &NotIn::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/in.h b/src/paimon/common/predicate/in.h
new file mode 100644
index 0000000..c2301ad
--- /dev/null
+++ b/src/paimon/common/predicate/in.h
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paimon/common/predicate/multi_literals_leaf_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `LeafFunction` to eval in.
+class In : public MultiLiteralsLeafFunction {
+ public:
+ static const In& Instance() {
+ static const In instance = In();
+ return instance;
+ }
+ Result<bool> InnerTest(const Literal& field,
+ const std::vector<Literal>& literals) const
override {
+ for (const auto& literal : literals) {
+ if (!literal.IsNull()) {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res,
field.CompareTo(literal));
+ if (compare_res == 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ Result<bool> InnerTest(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const std::vector<Literal>& literals) const
override {
+ for (const auto& literal : literals) {
+ if (!literal.IsNull()) {
+ PAIMON_ASSIGN_OR_RAISE(int32_t min_ret,
literal.CompareTo(min_value));
+ PAIMON_ASSIGN_OR_RAISE(int32_t max_ret,
literal.CompareTo(max_value));
+ if (min_ret >= 0 && max_ret <= 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ Type GetType() const override {
+ return Type::IN;
+ }
+
+ const LeafFunction* Negate() const override;
+
+ std::string ToString() const override {
+ return "In";
+ }
+
+ private:
+ In() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/is_not_null.cpp
b/src/paimon/common/predicate/is_not_null.cpp
new file mode 100644
index 0000000..7cb7def
--- /dev/null
+++ b/src/paimon/common/predicate/is_not_null.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/is_not_null.h"
+
+#include "paimon/common/predicate/is_null.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* IsNotNull::Negate() const {
+ return &IsNull::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/is_not_null.h
b/src/paimon/common/predicate/is_not_null.h
new file mode 100644
index 0000000..5340e00
--- /dev/null
+++ b/src/paimon/common/predicate/is_not_null.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/leaf_unary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `LeafUnaryFunction` to eval is not null.
+class IsNotNull : public LeafUnaryFunction {
+ public:
+ static const IsNotNull& Instance() {
+ static const IsNotNull instance = IsNotNull();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field) const override {
+ return !field.IsNull();
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count) const override
{
+ return null_count == std::nullopt || null_count.value() < row_count;
+ }
+
+ Type GetType() const override {
+ return Type::IS_NOT_NULL;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "IsNotNull";
+ }
+
+ private:
+ IsNotNull() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/is_null.cpp
b/src/paimon/common/predicate/is_null.cpp
new file mode 100644
index 0000000..79b296a
--- /dev/null
+++ b/src/paimon/common/predicate/is_null.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/is_null.h"
+
+#include "paimon/common/predicate/is_not_null.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* IsNull::Negate() const {
+ return &IsNotNull::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/is_null.h
b/src/paimon/common/predicate/is_null.h
new file mode 100644
index 0000000..07b5c66
--- /dev/null
+++ b/src/paimon/common/predicate/is_null.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/leaf_unary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `LeafUnaryFunction` to eval is null.
+class IsNull : public LeafUnaryFunction {
+ public:
+ static const IsNull& Instance() {
+ static const IsNull instance = IsNull();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field) const override {
+ return field.IsNull();
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count) const override
{
+ return null_count == std::nullopt || null_count.value() > 0;
+ }
+
+ Type GetType() const override {
+ return Type::IS_NULL;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "IsNull";
+ }
+
+ private:
+ IsNull() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/less_or_equal.cpp
b/src/paimon/common/predicate/less_or_equal.cpp
new file mode 100644
index 0000000..d50ff33
--- /dev/null
+++ b/src/paimon/common/predicate/less_or_equal.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/less_or_equal.h"
+
+#include "paimon/common/predicate/greater_than.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* LessOrEqual::Negate() const {
+ return &GreaterThan::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/less_or_equal.h
b/src/paimon/common/predicate/less_or_equal.h
new file mode 100644
index 0000000..d783076
--- /dev/null
+++ b/src/paimon/common/predicate/less_or_equal.h
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval less or equal.
+class LessOrEqual : public NullFalseLeafBinaryFunction {
+ public:
+ static const LessOrEqual& Instance() {
+ static const LessOrEqual instance = LessOrEqual();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res <= 0;
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t res, literal.CompareTo(min_value));
+ return res >= 0;
+ }
+ Type GetType() const override {
+ return Type::LESS_OR_EQUAL;
+ }
+ const LeafFunction* Negate() const override;
+
+ std::string ToString() const override {
+ return "LessOrEqual";
+ }
+
+ private:
+ LessOrEqual() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/less_than.cpp
b/src/paimon/common/predicate/less_than.cpp
new file mode 100644
index 0000000..c3c4a42
--- /dev/null
+++ b/src/paimon/common/predicate/less_than.cpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/less_than.h"
+
+#include "paimon/common/predicate/greater_or_equal.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LessThan& LessThan::Instance() {
+ static const LessThan kInstance{};
+ return kInstance;
+}
+const LeafFunction* LessThan::Negate() const {
+ return &GreaterOrEqual::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/less_than.h
b/src/paimon/common/predicate/less_than.h
new file mode 100644
index 0000000..c3dc24f
--- /dev/null
+++ b/src/paimon/common/predicate/less_than.h
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval less or equal.
+class LessThan : public NullFalseLeafBinaryFunction {
+ public:
+ static const LessThan& Instance();
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res < 0;
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t ret, literal.CompareTo(min_value));
+ return ret > 0;
+ }
+ Type GetType() const override {
+ return Type::LESS_THAN;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "LessThan";
+ }
+
+ private:
+ LessThan() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/like.cpp
b/src/paimon/common/predicate/like.cpp
new file mode 100644
index 0000000..3e6d6ef
--- /dev/null
+++ b/src/paimon/common/predicate/like.cpp
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/like.h"
+
+#include <string>
+#include <vector>
+
+#include "fmt/format.h"
+namespace paimon {
+
+namespace {
+
+/// Returns the byte length of a UTF-8 leading byte's code point.
+/// Returns 1 for ASCII, 2-4 for multi-byte sequences, 1 for invalid bytes.
+inline size_t Utf8CodePointLength(unsigned char leading_byte) {
+ if (leading_byte < 0x80) {
+ return 1;
+ }
+ if ((leading_byte & 0xE0) == 0xC0) {
+ return 2;
+ }
+ if ((leading_byte & 0xF0) == 0xE0) {
+ return 3;
+ }
+ if ((leading_byte & 0xF8) == 0xF0) {
+ return 4;
+ }
+ return 1; // invalid continuation byte, treat as single byte
+}
+
+inline bool IsJavaRegexLineTerminator(const std::string& code_point) {
+ return code_point == "\n" || code_point == "\r" || code_point ==
"\xC2\x85" ||
+ code_point == "\xE2\x80\xA8" || code_point == "\xE2\x80\xA9";
+}
+
+} // namespace
+
+Result<bool> Like::TestString(const std::string& field, const std::string&
pattern) const {
+ if (pattern.empty()) {
+ return field.empty();
+ }
+
+ // Phase 1: Parse pattern with escape handling (Java-compatible).
+ // Only \_, \%, \\ are valid escape sequences.
+ std::vector<std::string> pat_chars; // each element is a literal string
segment or wildcard
+ std::vector<bool> is_wild;
+
+ for (size_t i = 0; i < pattern.size();) {
+ if (pattern[i] == '\\') {
+ if (i + 1 >= pattern.size()) {
+ return Status::Invalid(fmt::format("Invalid escape sequence
'{}', index={}",
+ pattern,
std::to_string(i)));
+ }
+ char next_char = pattern[i + 1];
+ if (next_char != '_' && next_char != '%' && next_char != '\\') {
+ return Status::Invalid(fmt::format("Invalid escape sequence
'{}', index={}",
+ pattern,
std::to_string(i)));
+ }
+ pat_chars.emplace_back(1, next_char);
+ is_wild.push_back(false);
+ i += 2;
+ } else if (pattern[i] == '_' || pattern[i] == '%') {
+ pat_chars.emplace_back(1, pattern[i]);
+ is_wild.push_back(true);
+ ++i;
+ } else {
+ // Read one UTF-8 code point from pattern as a literal element.
+ size_t cp_len = Utf8CodePointLength(static_cast<unsigned
char>(pattern[i]));
+ if (i + cp_len > pattern.size()) {
+ cp_len = 1;
+ }
+ pat_chars.push_back(pattern.substr(i, cp_len));
+ is_wild.push_back(false);
+ i += cp_len;
+ }
+ }
+
+ // Phase 2: Merge consecutive '%' wildcards.
+ std::vector<std::string> simp_pat;
+ std::vector<bool> simp_wild;
+ for (size_t i = 0; i < pat_chars.size(); ++i) {
+ if (is_wild[i] && pat_chars[i] == "%" && !simp_pat.empty() &&
simp_wild.back() &&
+ simp_pat.back() == "%") {
+ continue;
+ }
+ simp_pat.push_back(pat_chars[i]);
+ simp_wild.push_back(is_wild[i]);
+ }
+
+ // Phase 3: Decompose field into UTF-8 code points for character-level
matching.
+ std::vector<std::string> field_chars;
+ for (size_t i = 0; i < field.size();) {
+ size_t cp_len = Utf8CodePointLength(static_cast<unsigned
char>(field[i]));
+ if (i + cp_len > field.size()) {
+ cp_len = 1; // truncated sequence, treat byte as single char
+ }
+ field_chars.push_back(field.substr(i, cp_len));
+ i += cp_len;
+ }
+
+ const size_t m = field_chars.size();
+ const size_t n = simp_pat.size();
+
+ if (m == 0) {
+ return n == 1 && simp_wild[0] && simp_pat[0] == "%";
+ }
+
+ // Quick reject: count minimum required characters (non-wildcard pattern
elements).
+ size_t min_len = 0;
+ for (size_t i = 0; i < n; ++i) {
+ if (!simp_wild[i]) {
+ min_len++;
+ } else if (simp_pat[i] == "_") {
+ min_len++;
+ }
+ }
+ if (min_len > m) {
+ return false;
+ }
+
+ // Phase 4: DP matching at character (code point) level.
+ std::vector<bool> dp(n + 1, false);
+ dp[0] = true;
+ for (size_t j = 1; j <= n && simp_wild[j - 1] && simp_pat[j - 1] == "%";
++j) {
+ dp[j] = true;
+ }
+
+ for (size_t i = 0; i < m; ++i) {
+ const std::string& field_char = field_chars[i];
+ bool prev = dp[0];
+ dp[0] = false;
+ bool has_match = false;
+ for (size_t j = 1; j <= n; ++j) {
+ const bool temp = dp[j];
+ const std::string& pc = simp_pat[j - 1];
+ const bool wild = simp_wild[j - 1];
+ if (wild && pc == "%") {
+ dp[j] = dp[j - 1] || dp[j];
+ } else if (wild && pc == "_") {
+ dp[j] = prev && !IsJavaRegexLineTerminator(field_char);
+ } else {
+ dp[j] = (pc == field_char) ? prev : false;
+ }
+ has_match |= dp[j];
+ prev = temp;
+ }
+ if (!has_match) {
+ return false;
+ }
+ }
+ return static_cast<bool>(dp[n]);
+}
+} // namespace paimon
diff --git a/src/paimon/common/predicate/like.h
b/src/paimon/common/predicate/like.h
new file mode 100644
index 0000000..1434ccf
--- /dev/null
+++ b/src/paimon/common/predicate/like.h
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "paimon/common/predicate/string_leaf_binary_function.h"
+#include "paimon/result.h"
+
+namespace paimon {
+/// A `StringLeafBinaryFunction` to eval filter like.
+class Like : public StringLeafBinaryFunction {
+ public:
+ static const Like& Instance() {
+ static const Like instance = Like();
+ return instance;
+ }
+
+ Type GetType() const override {
+ return Type::LIKE;
+ }
+
+ std::string ToString() const override {
+ return "Like";
+ }
+
+ Result<bool> TestString(const std::string& field, const std::string&
pattern) const override;
+
+ private:
+ Like() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/not_equal.cpp
b/src/paimon/common/predicate/not_equal.cpp
new file mode 100644
index 0000000..53024d2
--- /dev/null
+++ b/src/paimon/common/predicate/not_equal.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/not_equal.h"
+
+#include "paimon/common/predicate/equal.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* NotEqual::Negate() const {
+ return &Equal::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/not_equal.h
b/src/paimon/common/predicate/not_equal.h
new file mode 100644
index 0000000..d60aed3
--- /dev/null
+++ b/src/paimon/common/predicate/not_equal.h
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "paimon/common/predicate/null_false_leaf_binary_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `NullFalseLeafBinaryFunction` to eval not equal.
+class NotEqual : public NullFalseLeafBinaryFunction {
+ public:
+ static const NotEqual& Instance() {
+ static const NotEqual instance = NotEqual();
+ return instance;
+ }
+
+ Result<bool> Test(const Literal& field, const Literal& literal) const
override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res, field.CompareTo(literal));
+ return compare_res != 0;
+ }
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& literal) const override {
+ PAIMON_ASSIGN_OR_RAISE(int32_t min_res, literal.CompareTo(min_value));
+ PAIMON_ASSIGN_OR_RAISE(int32_t max_res, literal.CompareTo(max_value));
+ return min_res != 0 || max_res != 0;
+ }
+ Type GetType() const override {
+ return Type::NOT_EQUAL;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "NotEqual";
+ }
+
+ private:
+ NotEqual() = default;
+};
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/not_in.cpp
b/src/paimon/common/predicate/not_in.cpp
new file mode 100644
index 0000000..f8e199e
--- /dev/null
+++ b/src/paimon/common/predicate/not_in.cpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/not_in.h"
+
+#include "paimon/common/predicate/in.h"
+
+namespace paimon {
+class LeafFunction;
+
+const LeafFunction* NotIn::Negate() const {
+ return &In::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/not_in.h
b/src/paimon/common/predicate/not_in.h
new file mode 100644
index 0000000..5de8962
--- /dev/null
+++ b/src/paimon/common/predicate/not_in.h
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paimon/common/predicate/multi_literals_leaf_function.h"
+#include "paimon/predicate/literal.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class LeafFunction;
+
+/// A `LeafFunction` to eval not in.
+class NotIn : public MultiLiteralsLeafFunction {
+ public:
+ static const NotIn& Instance() {
+ static const NotIn instance = NotIn();
+ return instance;
+ }
+
+ Result<bool> InnerTest(const Literal& field,
+ const std::vector<Literal>& literals) const
override {
+ for (const auto& literal : literals) {
+ if (literal.IsNull()) {
+ return false;
+ }
+ PAIMON_ASSIGN_OR_RAISE(int32_t compare_res,
field.CompareTo(literal));
+ if (compare_res == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ Result<bool> InnerTest(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const std::vector<Literal>& literals) const
override {
+ for (const auto& literal : literals) {
+ if (literal.IsNull()) {
+ return false;
+ }
+ PAIMON_ASSIGN_OR_RAISE(int32_t min_ret,
literal.CompareTo(min_value));
+ PAIMON_ASSIGN_OR_RAISE(int32_t max_ret,
literal.CompareTo(max_value));
+ if (min_ret == 0 && max_ret == 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ Type GetType() const override {
+ return Type::NOT_IN;
+ }
+ const LeafFunction* Negate() const override;
+ std::string ToString() const override {
+ return "NotIn";
+ }
+
+ private:
+ NotIn() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/or.cpp
b/src/paimon/common/predicate/or.cpp
new file mode 100644
index 0000000..57129eb
--- /dev/null
+++ b/src/paimon/common/predicate/or.cpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/or.h"
+
+#include "paimon/common/predicate/and.h"
+namespace paimon {
+
+const Or& Or::Instance() {
+ static const Or kInstance{};
+ return kInstance;
+}
+
+const CompoundFunction& Or::Negate() const {
+ return And::Instance();
+}
+
+} // namespace paimon
diff --git a/src/paimon/common/predicate/or.h b/src/paimon/common/predicate/or.h
new file mode 100644
index 0000000..fb18091
--- /dev/null
+++ b/src/paimon/common/predicate/or.h
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "fmt/format.h"
+#include "paimon/common/predicate/compound_function.h"
+#include "paimon/common/predicate/predicate_filter.h"
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace arrow {
+class Schema;
+} // namespace arrow
+
+namespace paimon {
+class InternalArray;
+class InternalRow;
+
+/// A `CompoundFunction` to eval or.
+class Or : public CompoundFunction {
+ public:
+ static const Or& Instance();
+
+ Result<std::vector<char>> Test(
+ const arrow::Array& array,
+ const std::vector<std::shared_ptr<Predicate>>& children) const
override {
+ std::vector<char> is_valid(array.length(), false);
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(std::vector<char> child_valid,
child_filter->Test(array));
+ for (size_t i = 0; i < is_valid.size(); i++) {
+ is_valid[i] = (is_valid[i] | child_valid[i]);
+ }
+ }
+ return is_valid;
+ }
+
+ Result<bool> Test(const std::shared_ptr<arrow::Schema>& schema, const
InternalRow& row,
+ const std::vector<std::shared_ptr<Predicate>>& children)
const override {
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(bool is_valid, child_filter->Test(schema,
row));
+ if (is_valid) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ Result<bool> Test(const std::shared_ptr<arrow::Schema>& schema, int64_t
row_count,
+ const InternalRow& min_values, const InternalRow&
max_values,
+ const InternalArray& null_counts,
+ const std::vector<std::shared_ptr<Predicate>>& children)
const override {
+ for (const auto& child : children) {
+ auto child_filter =
std::dynamic_pointer_cast<PredicateFilter>(child);
+ if (!child_filter) {
+ return Status::Invalid(
+ fmt::format("child filter {} does not support Test",
child->ToString()));
+ }
+ PAIMON_ASSIGN_OR_RAISE(bool is_valid, child_filter->Test(schema,
row_count, min_values,
+
max_values, null_counts));
+ if (is_valid) {
+ return true;
+ }
+ }
+ return false;
+ }
+ Type GetType() const override {
+ return Type::OR;
+ }
+ const CompoundFunction& Negate() const override;
+ std::string ToString() const override {
+ return "Or";
+ }
+
+ private:
+ Or() = default;
+};
+} // namespace paimon
diff --git a/src/paimon/common/predicate/starts_with.cpp
b/src/paimon/common/predicate/starts_with.cpp
new file mode 100644
index 0000000..0796932
--- /dev/null
+++ b/src/paimon/common/predicate/starts_with.cpp
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/common/predicate/starts_with.h"
+
+#include "paimon/common/utils/string_utils.h"
+
+namespace paimon {
+
+Result<bool> StartsWith::TestString(const std::string& field, const
std::string& pattern) const {
+ return StringUtils::StartsWith(field, pattern);
+}
+
+Result<bool> StartsWith::Test(int64_t row_count, const Literal& min_value,
const Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& pattern_literal) const {
+ const auto min_str = min_value.GetValue<std::string>();
+ const auto max_str = max_value.GetValue<std::string>();
+ const auto pattern_str = pattern_literal.GetValue<std::string>();
+ PAIMON_ASSIGN_OR_RAISE(const auto min_test, TestString(min_str,
pattern_str));
+ PAIMON_ASSIGN_OR_RAISE(const auto max_test, TestString(max_str,
pattern_str));
+ return (min_test || min_str.compare(pattern_str) <= 0) &&
+ (max_test || max_str.compare(pattern_str) >= 0);
+}
+} // namespace paimon
diff --git a/src/paimon/common/predicate/starts_with.h
b/src/paimon/common/predicate/starts_with.h
new file mode 100644
index 0000000..caba8ef
--- /dev/null
+++ b/src/paimon/common/predicate/starts_with.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "paimon/common/predicate/string_leaf_binary_function.h"
+#include "paimon/common/utils/string_utils.h"
+#include "paimon/result.h"
+
+namespace paimon {
+/// A `StringLeafBinaryFunction` to eval filter like 'abc%' or filter like
'abc_'.
+class StartsWith : public StringLeafBinaryFunction {
+ public:
+ static const StartsWith& Instance() {
+ static const StartsWith instance = StartsWith();
+ return instance;
+ }
+
+ Type GetType() const override {
+ return Type::STARTS_WITH;
+ }
+
+ std::string ToString() const override {
+ return "StartsWith";
+ }
+
+ Result<bool> TestString(const std::string& field, const std::string&
pattern) const override;
+
+ Result<bool> Test(int64_t row_count, const Literal& min_value, const
Literal& max_value,
+ const std::optional<int64_t>& null_count,
+ const Literal& pattern_literal) const override;
+
+ private:
+ StartsWith() = default;
+};
+} // namespace paimon