This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a4571dd78 [GLUTEN-6834][CORE] feat: add other join types from the
official Substrait (#6835)
a4571dd78 is described below
commit a4571dd78befdfde4fdba61adf49ad733995bb60
Author: David Sisson <[email protected]>
AuthorDate: Sun Sep 8 18:49:08 2024 -0700
[GLUTEN-6834][CORE] feat: add other join types from the official Substrait
(#6835)
---
.../gluten/execution/CHHashJoinExecTransformer.scala | 2 +-
.../execution/ShuffledHashJoinExecTransformer.scala | 4 ++--
cpp-ch/local-engine/Common/CHUtil.cpp | 2 +-
cpp-ch/local-engine/Parser/JoinRelParser.cpp | 2 +-
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 2 +-
cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc | 4 ++--
.../resources/substrait/proto/substrait/algebra.proto | 18 +++++++++++++-----
.../execution/SortMergeJoinExecTransformer.scala | 2 +-
.../scala/org/apache/gluten/utils/SubstraitUtil.scala | 2 +-
9 files changed, 23 insertions(+), 15 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
index 41def4d42..43f19c30e 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashJoinExecTransformer.scala
@@ -67,7 +67,7 @@ object JoinTypeTransform {
if (!buildRight) {
throw new IllegalArgumentException("LeftAnti join should not switch
children")
}
- JoinRel.JoinType.JOIN_TYPE_ANTI
+ JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
case _ =>
// TODO: Support cross join with Cross Rel
JoinRel.JoinType.UNRECOGNIZED
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
b/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
index d78e6c5b3..e3c93848d 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/execution/ShuffledHashJoinExecTransformer.scala
@@ -69,7 +69,7 @@ case class ShuffledHashJoinExecTransformer(
JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
}
case LeftAnti =>
- JoinRel.JoinType.JOIN_TYPE_ANTI
+ JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
case _ =>
JoinRel.JoinType.UNRECOGNIZED
}
@@ -112,7 +112,7 @@ case class BroadcastHashJoinExecTransformer(
case LeftSemi | ExistenceJoin(_) =>
JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
case LeftAnti =>
- JoinRel.JoinType.JOIN_TYPE_ANTI
+ JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
case _ =>
// TODO: Support cross join with Cross Rel
JoinRel.JoinType.UNRECOGNIZED
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp
b/cpp-ch/local-engine/Common/CHUtil.cpp
index e29584d4c..b787888f5 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -1105,7 +1105,7 @@
JoinUtil::getJoinKindAndStrictness(substrait::JoinRel_JoinType join_type, bool i
return {DB::JoinKind::Left, DB::JoinStrictness::Any};
return {DB::JoinKind::Left, DB::JoinStrictness::Semi};
}
- case substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+ case substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
return {DB::JoinKind::Left, DB::JoinStrictness::Anti};
case substrait::JoinRel_JoinType_JOIN_TYPE_LEFT:
return {DB::JoinKind::Left, DB::JoinStrictness::All};
diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp
b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
index 6f8877523..46b68a4d3 100644
--- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
@@ -269,7 +269,7 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const
substrait::JoinRel & join, DB::Q
if (storage_join)
{
- if (join_opt_info.is_null_aware_anti_join && join.type() ==
substrait::JoinRel_JoinType_JOIN_TYPE_ANTI)
+ if (join_opt_info.is_null_aware_anti_join && join.type() ==
substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI)
{
if (storage_join->has_null_key_value)
{
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 67676b035..0dab6b280 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -309,7 +309,7 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
joinType = core::JoinType::kRightSemiFilter;
}
break;
- case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_ANTI: {
+ case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI: {
// Determine the anti join type based on extracted information.
if (sJoin.has_advanced_extension() &&
SubstraitParser::configSetInOptimization(sJoin.advanced_extension(),
"isNullAwareAntiJoin=")) {
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 60a8d38d1..42d91bd48 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -899,7 +899,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::JoinRel& joinRel
case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT:
case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_SEMI:
case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT_SEMI:
- case ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+ case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
break;
default:
LOG_VALIDATION_MSG("Sort merge join type is not supported: " +
std::to_string(joinRel.type()));
@@ -913,7 +913,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::JoinRel& joinRel
case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT:
case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_SEMI:
case ::substrait::JoinRel_JoinType_JOIN_TYPE_RIGHT_SEMI:
- case ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI:
+ case ::substrait::JoinRel_JoinType_JOIN_TYPE_LEFT_ANTI:
break;
default:
LOG_VALIDATION_MSG("Join type is not supported: " +
std::to_string(joinRel.type()));
diff --git
a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
index 3813de868..0abb50b32 100644
---
a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
+++
b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
@@ -234,11 +234,11 @@ message JoinRel {
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
- JOIN_TYPE_RIGHT_SEMI = 6;
- JOIN_TYPE_ANTI = 7;
- // This join is useful for nested sub-queries where we need exactly one
tuple in output (or throw exception)
- // See Section 3.2 of
https://15721.courses.cs.cmu.edu/spring2018/papers/16-optimizer2/hyperjoins-btw2017.pdf
- JOIN_TYPE_SINGLE = 8;
+ JOIN_TYPE_LEFT_ANTI = 6;
+ JOIN_TYPE_LEFT_SINGLE = 7;
+ JOIN_TYPE_RIGHT_SEMI = 8;
+ JOIN_TYPE_RIGHT_ANTI = 9;
+ JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
@@ -253,6 +253,7 @@ message CrossRel {
JoinType type = 5;
+ // TODO -- Remove this unnecessary type.
enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
@@ -260,6 +261,11 @@ message CrossRel {
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
+ JOIN_TYPE_LEFT_ANTI = 6;
+ JOIN_TYPE_LEFT_SINGLE = 7;
+ JOIN_TYPE_RIGHT_SEMI = 8;
+ JOIN_TYPE_RIGHT_ANTI = 9;
+ JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
@@ -649,6 +655,8 @@ message HashJoinRel {
JOIN_TYPE_RIGHT_SEMI = 6;
JOIN_TYPE_LEFT_ANTI = 7;
JOIN_TYPE_RIGHT_ANTI = 8;
+ JOIN_TYPE_LEFT_SINGLE = 9;
+ JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
index c96789569..5c57e5b62 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala
@@ -152,7 +152,7 @@ abstract class SortMergeJoinExecTransformerBase(
case LeftSemi =>
JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
case LeftAnti =>
- JoinRel.JoinType.JOIN_TYPE_ANTI
+ JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
case _ =>
// TODO: Support cross join with Cross Rel
// TODO: Support existence join
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
index c641cb448..c15039143 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala
@@ -43,7 +43,7 @@ object SubstraitUtil {
case LeftSemi =>
JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
case LeftAnti =>
- JoinRel.JoinType.JOIN_TYPE_ANTI
+ JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
case _ =>
// TODO: Support existence join
JoinRel.JoinType.UNRECOGNIZED
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]