westonpace commented on code in PR #13078:
URL: https://github.com/apache/arrow/pull/13078#discussion_r877199514


##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();
+      if (!callptr) {
+        return Status::Invalid(
+            "Only support call expressions as the join key comparison.");

Review Comment:
   ```suggestion
               "A join rel's expression must be a simple equality between keys 
but got ", expression.ToString());
   ```



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");

Review Comment:
   What is stopping us from supporting these joins?  Is it just a matter of 
mapping to the appropriate Arrow equivalent enum?  Or a matter of testing?  Is 
there a follow-up PR?



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();
+      if (!callptr) {
+        return Status::Invalid(
+            "Only support call expressions as the join key comparison.");
+      }
+
+      compute::JoinKeyCmp join_key_cmp;
+      if (callptr->function_name == "equal") {
+        join_key_cmp = compute::JoinKeyCmp::EQ;
+      } else if (callptr->function_name == "is_not_distinct_from") {
+        join_key_cmp = compute::JoinKeyCmp::IS;
+      } else {
+        return Status::Invalid(
+            "Only Support `equal` or `is_not_distinct_from` for join key 
comparison");
+      }
+
+      // TODO: Add Suffix support for Substrait
+      compute::HashJoinNodeOptions join_options{
+          {std::move(*callptr->arguments[0].field_ref())},
+          {std::move(*callptr->arguments[1].field_ref())}};

Review Comment:
   Should we do validation to ensure that both arguments are indeed 
`field_ref`?  Otherwise I think we risk dereferncing a null pointer here.



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();
+      if (!callptr) {
+        return Status::Invalid(
+            "Only support call expressions as the join key comparison.");
+      }
+
+      compute::JoinKeyCmp join_key_cmp;
+      if (callptr->function_name == "equal") {
+        join_key_cmp = compute::JoinKeyCmp::EQ;
+      } else if (callptr->function_name == "is_not_distinct_from") {
+        join_key_cmp = compute::JoinKeyCmp::IS;
+      } else {
+        return Status::Invalid(
+            "Only Support `equal` or `is_not_distinct_from` for join key 
comparison");
+      }
+
+      // TODO: Add Suffix support for Substrait
+      compute::HashJoinNodeOptions join_options{
+          {std::move(*callptr->arguments[0].field_ref())},
+          {std::move(*callptr->arguments[1].field_ref())}};
+      join_options.join_type = join_type;
+      join_options.key_cmp = {join_key_cmp};
+      compute::Declaration join_dec{"hashjoin", std::move(join_options)};
+      join_dec.inputs.emplace_back(std::move(left));
+      join_dec.inputs.emplace_back(std::move(right));
+      return compute::Declaration::Sequence({std::move(join_dec)});

Review Comment:
   I'm not sure we need to call `compute::Declaration::Sequence` here.  We can 
just return `join_dec`.



##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -750,5 +750,309 @@ TEST(Substrait, ExtensionSetFromPlanMissingFunc) {
           &ext_set));
 }
 
+TEST(Substrait, JoinPlanBasic) {

Review Comment:
   I talked with @cpcloud and @saulpw about this today and I think we should 
also investigate using Ibis (for python tests) and Saul is going to try and 
make a C++ lib for [jdot](https://github.com/saulpw/jdot).  We can worry about 
it later though.  This is fine for this PR.



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();
+      if (!callptr) {
+        return Status::Invalid(
+            "Only support call expressions as the join key comparison.");
+      }
+
+      compute::JoinKeyCmp join_key_cmp;
+      if (callptr->function_name == "equal") {
+        join_key_cmp = compute::JoinKeyCmp::EQ;
+      } else if (callptr->function_name == "is_not_distinct_from") {
+        join_key_cmp = compute::JoinKeyCmp::IS;
+      } else {
+        return Status::Invalid(
+            "Only Support `equal` or `is_not_distinct_from` for join key 
comparison");

Review Comment:
   ```suggestion
               "Only `equal` or `is_not_distinct_from` are supported for join 
key comparison but got ", callptr->function_name);
   ```



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -225,6 +225,76 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();
+      if (!callptr) {
+        return Status::Invalid(
+            "Only support call expressions as the join key comparison.");
+      }
+
+      compute::JoinKeyCmp join_key_cmp;
+      if (callptr->function_name == "equal") {
+        join_key_cmp = compute::JoinKeyCmp::EQ;
+      } else if (callptr->function_name == "is_not_distinct_from") {
+        join_key_cmp = compute::JoinKeyCmp::IS;
+      } else {
+        return Status::Invalid(
+            "Only Support `equal` or `is_not_distinct_from` for join key 
comparison");
+      }
+
+      // TODO: Add Suffix support for Substrait

Review Comment:
   Create a follow-up JIRA or link to a Substrait issue.



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -188,6 +188,79 @@ Result<compute::Declaration> FromProto(const 
substrait::Rel& rel,
       });
     }
 
+    case substrait::Rel::RelTypeCase::kJoin: {
+      const auto& join = rel.join();
+      RETURN_NOT_OK(CheckRelCommon(join));
+
+      if (!join.has_left()) {
+        return Status::Invalid("substrait::JoinRel with no left relation");
+      }
+
+      if (!join.has_right()) {
+        return Status::Invalid("substrait::JoinRel with no right relation");
+      }
+
+      compute::JoinType join_type;
+      switch (join.type()) {
+        case 0:
+          return Status::NotImplemented("Unspecified join type is not 
supported");
+        case 1:
+          join_type = compute::JoinType::INNER;
+          break;
+        case 2:
+          return Status::NotImplemented("Outer join type is not supported");
+        case 3:
+          return Status::NotImplemented("Left join type is not supported");
+        case 4:
+          return Status::NotImplemented("Right join type is not supported");
+        case 5:
+          return Status::NotImplemented("Semi join type is not supported");
+        case 6:
+          return Status::NotImplemented("Anti join type is not supported");
+        default:
+          return Status::Invalid("Unsupported join type");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto left, FromProto(join.left(), ext_set));
+      ARROW_ASSIGN_OR_RAISE(auto right, FromProto(join.right(), ext_set));
+
+      if (!join.has_expression()) {
+        return Status::Invalid("substrait::JoinRel with no expression");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto expression, FromProto(join.expression(), 
ext_set));
+
+      const auto& callptr = expression.call();

Review Comment:
   `auto` is very odd with pointers:
   
   ```
   int x = 7;
   auto auto_only = &x; // auto_only will have type: int*
   auto* auto_ptr = &x; // auto_ptr will have type: int*
   const autu& auto_cref = &x; // auto_cref will have type: int * const &
   ```
   
   General rule of thumb is that `auto` is ok but `auto*` is more explicit 
(`auto* auto_ptr = x` will fail to compile) and also helpful to the reader so 
they know what is being returned.  We don't really follow any strict pattern 
within the Arrow code base today so I think both `auto` and `auto*` would be 
acceptable in this case.  However, `const auto&` is doing something different 
and should not be used here.
   
   [Example](https://godbolt.org/z/b4KnKfTM6)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to