vbarua commented on code in PR #12830:
URL: https://github.com/apache/datafusion/pull/12830#discussion_r1794316654


##########
datafusion/substrait/tests/testdata/test_plans/intersect.substrait.json:
##########
@@ -0,0 +1,118 @@
+{
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "set": {
+            "inputs": [
+              {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [
+                        1
+                      ]
+                    }
+                  },
+                  "input": {
+                    "read": {
+                      "common": {
+                        "direct": {}
+                      },
+                      "baseSchema": {
+                        "names": [
+                          "a"
+                        ],
+                        "struct": {
+                          "types": [
+                            {
+                              "i64": {
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            }
+                          ],
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "namedTable": {
+                        "names": [
+                          "data"
+                        ]
+                      }
+                    }
+                  },
+                  "expressions": [
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {}
+                        },
+                        "rootReference": {}
+                      }
+                    }
+                  ]
+                }
+              },
+              {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [
+                        1
+                      ]
+                    }
+                  },
+                  "input": {
+                    "read": {
+                      "common": {
+                        "direct": {}
+                      },
+                      "baseSchema": {
+                        "names": [
+                          "a"
+                        ],
+                        "struct": {
+                          "types": [
+                            {
+                              "i64": {
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            }
+                          ],
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "namedTable": {
+                        "names": [
+                          "data2"
+                        ]
+                      }
+                    }
+                  },
+                  "expressions": [
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {}
+                        },
+                        "rootReference": {}
+                      }
+                    }
+                  ]
+                }
+              }
+            ],
+            "op": "SET_OP_INTERSECTION_PRIMARY"

Review Comment:
   If the intent is for this plan to correspond to
   ```sql
   SELECT a FROM data INTERSECT SELECT a FROM data2"
   ```
   this should actually be `SET_OP_INTERSECTION_MULTISET`. See relevant 
[docs].(https://substrait.io/relations/logical_relations/#set-operation-types)
   
   This is tied to a relatively recent spec change: 
https://github.com/substrait-io/substrait/pull/708



##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -887,6 +887,17 @@ pub async fn from_substrait_rel(
                         not_impl_err!("Union relation requires at least one 
input")
                     }
                 }
+                set_rel::SetOp::IntersectionPrimary => {

Review Comment:
   See note on Substrait plan around mapping SQL INTERSECT into Substrait.



##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -887,6 +887,17 @@ pub async fn from_substrait_rel(
                         not_impl_err!("Union relation requires at least one 
input")
                     }
                 }
+                set_rel::SetOp::IntersectionPrimary => {
+                    if set.inputs.len() == 2 {
+                        LogicalPlanBuilder::intersect(
+                            from_substrait_rel(ctx, &set.inputs[0], 
extensions).await?,
+                            from_substrait_rel(ctx, &set.inputs[1], 
extensions).await?,
+                            false,
+                        )
+                    } else {
+                        not_impl_err!("Primary Intersect relation with more 
than two inputs isn't supported")

Review Comment:
   If we associate this DataFusion operation with `IntersectionMultiset` 
instead of `IntersectionPrimary`, we could potentially handle multiple inputs 
by chaining intersects. For example
   ```
   SetRel(
     op = SET_OP_INTERSECTION_MULTISET,
     inputs = [<A>, <B>, <C>]
   )
   ```
   becomes something like
   ```rust
   LogicalPlanBuilder::intersect(
     from_substrait_rel(ctx, &set.inputs[0], extensions).await?,
     LogicalPlanBuilder::intersect(
       from_substrait_rel(ctx, &set.inputs[1], extensions).await?,
       from_substrait_rel(ctx, &set.inputs[2], extensions).await?,
       false
     )
   false)
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to