This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a4571dd78 [GLUTEN-6834][CORE] feat: add other join types from the 
official Substrait (#6835)
a4571dd78 is described below

commit a4571dd78befdfde4fdba61adf49ad733995bb60
Author: David Sisson <[email protected]>
AuthorDate: Sun Sep 8 18:49:08 2024 -0700

    [GLUTEN-6834][CORE] feat: add other join types from the official Substrait 
(#6835)
---
 .../gluten/execution/CHHashJoinExecTransformer.scala   |  2 +-
 .../execution/ShuffledHashJoinExecTransformer.scala    |  4 ++--
 cpp-ch/local-engine/Common/CHUtil.cpp                  |  2 +-
 cpp-ch/local-engine/Parser/JoinRelParser.cpp           |  2 +-
 cpp/velox/substrait/SubstraitToVeloxPlan.cc            |  2 +-
 cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc   |  4 ++--
 .../resources/substrait/proto/substrait/algebra.proto  | 18 +++++++++++++-----
 .../execution/SortMergeJoinExecTransformer.scala       |  2 +-
 .../scala/org/apache/gluten/utils/SubstraitUtil.scala  |  2 +-
 9 files changed, 23 insertions(+), 15 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
index 41def4d42..43f19c30e 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
@@ -67,7 +67,7 @@ object JoinTypeTransform {
         if (!buildRight) {
           throw new IllegalArgumentException("LeftAnti join should not switch 
children")
         }
-        JoinRel.JoinType.JOIN_TYPE_ANTI
+        JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
       case _ =>
         // TODO: Support cross join with Cross Rel
         JoinRel.JoinType.UNRECOGNIZED
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
index d78e6c5b3..e3c93848d 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
@@ -69,7 +69,7 @@ case class ShuffledHashJoinExecTransformer(
         JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
       }
     case LeftAnti =>
-      JoinRel.JoinType.JOIN_TYPE_ANTI
+      JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
     case _ =>
       JoinRel.JoinType.UNRECOGNIZED
   }
@@ -112,7 +112,7 @@ case class BroadcastHashJoinExecTransformer(
     case LeftSemi | ExistenceJoin(_) =>
       JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
     case LeftAnti =>
-      JoinRel.JoinType.JOIN_TYPE_ANTI
+      JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
     case _ =>
       // TODO: Support cross join with Cross Rel
       JoinRel.JoinType.UNRECOGNIZED
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp 
b/cpp-ch/local-engine/Common/CHUtil.cpp
index e29584d4c..b787888f5 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -1105,7 +1105,7 @@ 
JoinUtil::getJoinKindAndStrictness(substrait::JoinRel_JoinType join_type, bool i
                 return {DB::JoinKind::Left, DB::JoinStrictness::Any};
             return {DB::JoinKind::Left, DB::JoinStrictness::Semi};
         }
-        case substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+        case substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
             return {DB::JoinKind::Left, DB::JoinStrictness::Anti};
         case substrait::JoinRel_JoinType_JOIN_TYPE_LEFT:
             return {DB::JoinKind::Left, DB::JoinStrictness::All};
diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp 
b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
index 6f8877523..46b68a4d3 100644
--- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
@@ -269,7 +269,7 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const 
substrait::JoinRel & join, DB::Q
 
     if (storage_join)
     {
-        if (join_opt_info.is_null_aware_anti_join && join.type() == 
substrait::JoinRel_JoinType_JOIN_TYPE_ANTI)
+        if (join_opt_info.is_null_aware_anti_join && join.type() == 
substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI)
         {
             if (storage_join->has_null_key_value)
             {
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 67676b035..0dab6b280 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -309,7 +309,7 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
         joinType = core::JoinType::kRightSemiFilter;
       }
       break;
-    case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_ANTI: {
+    case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI: {
       // Determine the anti join type based on extracted information.
       if (sJoin.has_advanced_extension() &&
           SubstraitParser::configSetInOptimization(sJoin.advanced_extension(), 
"isNullAwareAntiJoin=")) {
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 60a8d38d1..42d91bd48 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -899,7 +899,7 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::JoinRel& joinRel
       case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT:
       case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_SEMI:
       case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT_SEMI:
-      case ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+      case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
         break;
       default:
         LOG_VALIDATION_MSG("Sort merge join type is not supported: " + 
std::to_string(joinRel.type()));
@@ -913,7 +913,7 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::JoinRel& joinRel
     case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT:
     case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_SEMI:
     case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT_SEMI:
-    case ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+    case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
       break;
     default:
       LOG_VALIDATION_MSG("Join type is not supported: " + 
std::to_string(joinRel.type()));
diff --git 
a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto 
b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
index 3813de868..0abb50b32 100644
--- 
a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
+++ 
b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
@@ -234,11 +234,11 @@ message JoinRel {
     JOIN_TYPE_LEFT = 3;
     JOIN_TYPE_RIGHT = 4;
     JOIN_TYPE_LEFT_SEMI = 5;
-    JOIN_TYPE_RIGHT_SEMI = 6;
-    JOIN_TYPE_ANTI = 7;
-    // This join is useful for nested sub-queries where we need exactly one 
tuple in output (or throw exception)
-    // See Section 3.2 of 
https://15721.courses.cs.cmu.edu/spring2018/papers/16-optimizer2/hyperjoins-btw2017.pdf
-    JOIN_TYPE_SINGLE = 8;
+    JOIN_TYPE_LEFT_ANTI = 6;
+    JOIN_TYPE_LEFT_SINGLE = 7;
+    JOIN_TYPE_RIGHT_SEMI = 8;
+    JOIN_TYPE_RIGHT_ANTI = 9;
+    JOIN_TYPE_RIGHT_SINGLE = 10;
   }
 
   substrait.extensions.AdvancedExtension advanced_extension = 10;
@@ -253,6 +253,7 @@ message CrossRel {
 
   JoinType type = 5;
 
+  // TODO -- Remove this unnecessary type.
   enum JoinType {
     JOIN_TYPE_UNSPECIFIED = 0;
     JOIN_TYPE_INNER = 1;
@@ -260,6 +261,11 @@ message CrossRel {
     JOIN_TYPE_LEFT = 3;
     JOIN_TYPE_RIGHT = 4;
     JOIN_TYPE_LEFT_SEMI = 5;
+    JOIN_TYPE_LEFT_ANTI = 6;
+    JOIN_TYPE_LEFT_SINGLE = 7;
+    JOIN_TYPE_RIGHT_SEMI = 8;
+    JOIN_TYPE_RIGHT_ANTI = 9;
+    JOIN_TYPE_RIGHT_SINGLE = 10;
   }
 
   substrait.extensions.AdvancedExtension advanced_extension = 10;
@@ -649,6 +655,8 @@ message HashJoinRel {
     JOIN_TYPE_RIGHT_SEMI = 6;
     JOIN_TYPE_LEFT_ANTI = 7;
     JOIN_TYPE_RIGHT_ANTI = 8;
+    JOIN_TYPE_LEFT_SINGLE = 9;
+    JOIN_TYPE_RIGHT_SINGLE = 10;
   }
 
   substrait.extensions.AdvancedExtension advanced_extension = 10;
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
index c96789569..5c57e5b62 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
@@ -152,7 +152,7 @@ abstract class SortMergeJoinExecTransformerBase(
     case LeftSemi =>
       JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
     case LeftAnti =>
-      JoinRel.JoinType.JOIN_TYPE_ANTI
+      JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
     case _ =>
       // TODO: Support cross join with Cross Rel
       // TODO: Support existence join
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala 
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
index c641cb448..c15039143 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
@@ -43,7 +43,7 @@ object SubstraitUtil {
     case LeftSemi =>
       JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
     case LeftAnti =>
-      JoinRel.JoinType.JOIN_TYPE_ANTI
+      JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
     case _ =>
       // TODO: Support existence join
       JoinRel.JoinType.UNRECOGNIZED


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to