westonpace commented on code in PR #13078:
URL: https://github.com/apache/arrow/pull/13078#discussion_r877199514
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
+ if (!callptr) {
+ return Status::Invalid(
+ "Only support call expressions as the join key comparison.");
Review Comment:
```suggestion
"A join rel's expression must be a simple equality between keys
but got ", expression.ToString());
```
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
Review Comment:
What is stopping us from supporting these joins? Is it just a matter of
mapping to the appropriate Arrow equivalent enum? Or a matter of testing? Is
there a follow-up PR?
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
+ if (!callptr) {
+ return Status::Invalid(
+ "Only support call expressions as the join key comparison.");
+ }
+
+ compute::JoinKeyCmp join_key_cmp;
+ if (callptr->function_name == "equal") {
+ join_key_cmp = compute::JoinKeyCmp::EQ;
+ } else if (callptr->function_name == "is_not_distinct_from") {
+ join_key_cmp = compute::JoinKeyCmp::IS;
+ } else {
+ return Status::Invalid(
+ "Only Support `equal` or `is_not_distinct_from` for join key
comparison");
+ }
+
+ // TODO: Add Suffix support for Substrait
+ compute::HashJoinNodeOptions join_options{
+ {std::move(*callptr->arguments[0].field_ref())},
+ {std::move(*callptr->arguments[1].field_ref())}};
Review Comment:
Should we do validation to ensure that both arguments are indeed
`field_ref`? Otherwise I think we risk dereferncing a null pointer here.
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
+ if (!callptr) {
+ return Status::Invalid(
+ "Only support call expressions as the join key comparison.");
+ }
+
+ compute::JoinKeyCmp join_key_cmp;
+ if (callptr->function_name == "equal") {
+ join_key_cmp = compute::JoinKeyCmp::EQ;
+ } else if (callptr->function_name == "is_not_distinct_from") {
+ join_key_cmp = compute::JoinKeyCmp::IS;
+ } else {
+ return Status::Invalid(
+ "Only Support `equal` or `is_not_distinct_from` for join key
comparison");
+ }
+
+ // TODO: Add Suffix support for Substrait
+ compute::HashJoinNodeOptions join_options{
+ {std::move(*callptr->arguments[0].field_ref())},
+ {std::move(*callptr->arguments[1].field_ref())}};
+ join_options.join_type = join_type;
+ join_options.key_cmp = {join_key_cmp};
+ compute::Declaration join_dec{"hashjoin", std::move(join_options)};
+ join_dec.inputs.emplace_back(std::move(left));
+ join_dec.inputs.emplace_back(std::move(right));
+ return compute::Declaration::Sequence({std::move(join_dec)});
Review Comment:
I'm not sure we need to call `compute::Declaration::Sequence` here. We can
just return `join_dec`.
##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -750,5 +750,309 @@ TEST(Substrait, ExtensionSetFromPlanMissingFunc) {
&ext_set));
}
+TEST(Substrait, JoinPlanBasic) {
Review Comment:
I talked with @cpcloud and @saulpw about this today and I think we should
also investigate using Ibis (for python tests) and Saul is going to try and
make a C++ lib for [jdot](https://github.com/saulpw/jdot). We can worry about
it later though. This is fine for this PR.
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
+ if (!callptr) {
+ return Status::Invalid(
+ "Only support call expressions as the join key comparison.");
+ }
+
+ compute::JoinKeyCmp join_key_cmp;
+ if (callptr->function_name == "equal") {
+ join_key_cmp = compute::JoinKeyCmp::EQ;
+ } else if (callptr->function_name == "is_not_distinct_from") {
+ join_key_cmp = compute::JoinKeyCmp::IS;
+ } else {
+ return Status::Invalid(
+ "Only Support `equal` or `is_not_distinct_from` for join key
comparison");
Review Comment:
```suggestion
"Only `equal` or `is_not_distinct_from` are supported for join
key comparison but got ", callptr->function_name);
```
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
+ if (!callptr) {
+ return Status::Invalid(
+ "Only support call expressions as the join key comparison.");
+ }
+
+ compute::JoinKeyCmp join_key_cmp;
+ if (callptr->function_name == "equal") {
+ join_key_cmp = compute::JoinKeyCmp::EQ;
+ } else if (callptr->function_name == "is_not_distinct_from") {
+ join_key_cmp = compute::JoinKeyCmp::IS;
+ } else {
+ return Status::Invalid(
+ "Only Support `equal` or `is_not_distinct_from` for join key
comparison");
+ }
+
+ // TODO: Add Suffix support for Substrait
Review Comment:
Create a follow-up JIRA or link to a Substrait issue.
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -188,6 +188,79 @@ Result<compute::Declaration> FromProto(const
substrait::Rel& rel,
});
}
+ case substrait::Rel::RelTypeCase::kJoin: {
+ const auto& join = rel.join();
+ RETURN_NOT_OK(CheckRelCommon(join));
+
+ if (!join.has_left()) {
+ return Status::Invalid("substrait::JoinRel with no left relation");
+ }
+
+ if (!join.has_right()) {
+ return Status::Invalid("substrait::JoinRel with no right relation");
+ }
+
+ compute::JoinType join_type;
+ switch (join.type()) {
+ case 0:
+ return Status::NotImplemented("Unspecified join type is not
supported");
+ case 1:
+ join_type = compute::JoinType::INNER;
+ break;
+ case 2:
+ return Status::NotImplemented("Outer join type is not supported");
+ case 3:
+ return Status::NotImplemented("Left join type is not supported");
+ case 4:
+ return Status::NotImplemented("Right join type is not supported");
+ case 5:
+ return Status::NotImplemented("Semi join type is not supported");
+ case 6:
+ return Status::NotImplemented("Anti join type is not supported");
+ default:
+ return Status::Invalid("Unsupported join type");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+ ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+ if (!join.has_expression()) {
+ return Status::Invalid("substrait::JoinRel with no expression");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(),
ext_set));
+
+ const auto& callptr = expression.call();
Review Comment:
`auto` is very odd with pointers:
```
int x = 7;
auto auto_only = &x; // auto_only will have type: int*
auto* auto_ptr = &x; // auto_ptr will have type: int*
const autu& auto_cref = &x; // auto_cref will have type: int * const &
```
General rule of thumb is that `auto` is ok but `auto*` is more explicit
(`auto* auto_ptr = x` will fail to compile) and also helpful to the reader so
they know what is being returned. We don't really follow any strict pattern
within the Arrow code base today so I think both `auto` and `auto*` would be
acceptable in this case. However, `const auto&` is doing something different
and should not be used here.
[Example](https://godbolt.org/z/b4KnKfTM6)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]