This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e63abe78f5 feat(substrait): add set operations to consumer, update 
substrait to `0.45.0` (#12863)
e63abe78f5 is described below

commit e63abe78f54cdbbba7ed92d65400525eeae59b71
Author: Tornike Gurgenidze <[email protected]>
AuthorDate: Thu Oct 17 21:21:49 2024 +0400

    feat(substrait): add set operations to consumer, update substrait to 
`0.45.0` (#12863)
    
    * feat(substait): add set operations to consumer
    
    * add missing intersect all test, change distinct to is_all
    
    * upgrade substrait crate to 0.45
---
 datafusion/substrait/Cargo.toml                    |   2 +-
 datafusion/substrait/src/logical_plan/consumer.rs  | 120 +++++++++++++--
 datafusion/substrait/src/logical_plan/producer.rs  |  12 +-
 .../tests/cases/roundtrip_logical_plan.rs          |  66 ++++++++
 .../test_plans/intersect_multiset.substrait.json   | 166 +++++++++++++++++++++
 .../intersect_multiset_all.substrait.json          | 166 +++++++++++++++++++++
 .../test_plans/intersect_primary.substrait.json    | 166 +++++++++++++++++++++
 .../test_plans/minus_primary.substrait.json        | 166 +++++++++++++++++++++
 .../test_plans/minus_primary_all.substrait.json    | 166 +++++++++++++++++++++
 .../test_plans/union_distinct.substrait.json       | 118 +++++++++++++++
 datafusion/substrait/tests/utils.rs                |   1 +
 11 files changed, 1136 insertions(+), 13 deletions(-)

diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 4175501828..b0aa6acf3c 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -41,7 +41,7 @@ object_store = { workspace = true }
 pbjson-types = "0.7"
 # TODO use workspace version
 prost = "0.13"
-substrait = { version = "0.42", features = ["serde"] }
+substrait = { version = "0.45", features = ["serde"] }
 url = { workspace = true }
 
 [dev-dependencies]
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs 
b/datafusion/substrait/src/logical_plan/consumer.rs
index c727f784ee..4af02858e6 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -196,6 +196,65 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality(
     (accum_join_keys, nulls_equal_nulls, join_filter)
 }
 
+async fn union_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut union_builder = Ok(LogicalPlanBuilder::from(
+        from_substrait_rel(ctx, &rels[0], extensions).await?,
+    ));
+    for input in &rels[1..] {
+        let rel_plan = from_substrait_rel(ctx, input, extensions).await?;
+
+        union_builder = if is_all {
+            union_builder?.union(rel_plan)
+        } else {
+            union_builder?.union_distinct(rel_plan)
+        };
+    }
+    union_builder?.build()
+}
+
+async fn intersect_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut rel = from_substrait_rel(ctx, &rels[0], extensions).await?;
+
+    for input in &rels[1..] {
+        rel = LogicalPlanBuilder::intersect(
+            rel,
+            from_substrait_rel(ctx, input, extensions).await?,
+            is_all,
+        )?
+    }
+
+    Ok(rel)
+}
+
+async fn except_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut rel = from_substrait_rel(ctx, &rels[0], extensions).await?;
+
+    for input in &rels[1..] {
+        rel = LogicalPlanBuilder::except(
+            rel,
+            from_substrait_rel(ctx, input, extensions).await?,
+            is_all,
+        )?
+    }
+
+    Ok(rel)
+}
+
 /// Convert Substrait Plan to DataFusion LogicalPlan
 pub async fn from_substrait_plan(
     ctx: &SessionContext,
@@ -494,6 +553,7 @@ fn make_renamed_schema(
 }
 
 /// Convert Substrait Rel to DataFusion DataFrame
+#[allow(deprecated)]
 #[async_recursion]
 pub async fn from_substrait_rel(
     ctx: &SessionContext,
@@ -877,27 +937,65 @@ pub async fn from_substrait_rel(
             Ok(set_op) => match set_op {
                 set_rel::SetOp::UnionAll => {
                     if !set.inputs.is_empty() {
-                        let mut union_builder = Ok(LogicalPlanBuilder::from(
-                            from_substrait_rel(ctx, &set.inputs[0], 
extensions).await?,
-                        ));
-                        for input in &set.inputs[1..] {
-                            union_builder = union_builder?
-                                .union(from_substrait_rel(ctx, input, 
extensions).await?);
-                        }
-                        union_builder?.build()
+                        union_rels(&set.inputs, ctx, extensions, true).await
+                    } else {
+                        not_impl_err!("Union relation requires at least one 
input")
+                    }
+                }
+                set_rel::SetOp::UnionDistinct => {
+                    if !set.inputs.is_empty() {
+                        union_rels(&set.inputs, ctx, extensions, false).await
                     } else {
                         not_impl_err!("Union relation requires at least one 
input")
                     }
                 }
                 set_rel::SetOp::IntersectionPrimary => {
-                    if set.inputs.len() == 2 {
+                    if set.inputs.len() >= 2 {
                         LogicalPlanBuilder::intersect(
                             from_substrait_rel(ctx, &set.inputs[0], 
extensions).await?,
-                            from_substrait_rel(ctx, &set.inputs[1], 
extensions).await?,
+                            union_rels(&set.inputs[1..], ctx, extensions, 
true).await?,
                             false,
                         )
                     } else {
-                        not_impl_err!("Primary Intersect relation with more 
than two inputs isn't supported")
+                        not_impl_err!(
+                            "Primary Intersect relation requires at least two 
inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::IntersectionMultiset => {
+                    if set.inputs.len() >= 2 {
+                        intersect_rels(&set.inputs, ctx, extensions, 
false).await
+                    } else {
+                        not_impl_err!(
+                            "Multiset Intersect relation requires at least two 
inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::IntersectionMultisetAll => {
+                    if set.inputs.len() >= 2 {
+                        intersect_rels(&set.inputs, ctx, extensions, 
true).await
+                    } else {
+                        not_impl_err!(
+                            "MultisetAll Intersect relation requires at least 
two inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::MinusPrimary => {
+                    if set.inputs.len() >= 2 {
+                        except_rels(&set.inputs, ctx, extensions, false).await
+                    } else {
+                        not_impl_err!(
+                            "Primary Minus relation requires at least two 
inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::MinusPrimaryAll => {
+                    if set.inputs.len() >= 2 {
+                        except_rels(&set.inputs, ctx, extensions, true).await
+                    } else {
+                        not_impl_err!(
+                            "PrimaryAll Minus relation requires at least two 
inputs"
+                        )
                     }
                 }
                 _ => not_impl_err!("Unsupported set operator: {set_op:?}"),
diff --git a/datafusion/substrait/src/logical_plan/producer.rs 
b/datafusion/substrait/src/logical_plan/producer.rs
index 1165ce13d2..0e1375a8e0 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -172,6 +172,7 @@ pub fn to_substrait_extended_expr(
 }
 
 /// Convert DataFusion LogicalPlan to Substrait Rel
+#[allow(deprecated)]
 pub fn to_substrait_rel(
     plan: &LogicalPlan,
     ctx: &SessionContext,
@@ -227,6 +228,7 @@ pub fn to_substrait_rel(
                     advanced_extension: None,
                     read_type: Some(ReadType::VirtualTable(VirtualTable {
                         values: vec![],
+                        expressions: vec![],
                     })),
                 }))),
             }))
@@ -263,7 +265,10 @@ pub fn to_substrait_rel(
                     best_effort_filter: None,
                     projection: None,
                     advanced_extension: None,
-                    read_type: Some(ReadType::VirtualTable(VirtualTable { 
values })),
+                    read_type: Some(ReadType::VirtualTable(VirtualTable {
+                        values,
+                        expressions: vec![],
+                    })),
                 }))),
             }))
         }
@@ -359,6 +364,7 @@ pub fn to_substrait_rel(
                 rel_type: Some(RelType::Aggregate(Box::new(AggregateRel {
                     common: None,
                     input: Some(input),
+                    grouping_expressions: vec![],
                     groupings,
                     measures,
                     advanced_extension: None,
@@ -377,8 +383,10 @@ pub fn to_substrait_rel(
                 rel_type: Some(RelType::Aggregate(Box::new(AggregateRel {
                     common: None,
                     input: Some(input),
+                    grouping_expressions: vec![],
                     groupings: vec![Grouping {
                         grouping_expressions: grouping,
+                        expression_references: vec![],
                     }],
                     measures: vec![],
                     advanced_extension: None,
@@ -764,6 +772,7 @@ pub fn operator_to_name(op: Operator) -> &'static str {
     }
 }
 
+#[allow(deprecated)]
 pub fn parse_flat_grouping_exprs(
     ctx: &SessionContext,
     exprs: &[Expr],
@@ -776,6 +785,7 @@ pub fn parse_flat_grouping_exprs(
         .collect::<Result<Vec<_>>>()?;
     Ok(Grouping {
         grouping_expressions,
+        expression_references: vec![],
     })
 }
 
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index ae87dad015..23ac601a44 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -687,6 +687,72 @@ async fn simple_intersect_consume() -> Result<()> {
     .await
 }
 
+#[tokio::test]
+async fn primary_intersect_consume() -> Result<()> {
+    let proto_plan =
+        
read_json("tests/testdata/test_plans/intersect_primary.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT (SELECT a FROM data2 UNION ALL SELECT a 
FROM data2)",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn multiset_intersect_consume() -> Result<()> {
+    let proto_plan =
+        
read_json("tests/testdata/test_plans/intersect_multiset.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT SELECT a FROM data2 INTERSECT SELECT a 
FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn multiset_intersect_all_consume() -> Result<()> {
+    let proto_plan =
+        
read_json("tests/testdata/test_plans/intersect_multiset_all.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT ALL SELECT a FROM data2 INTERSECT ALL 
SELECT a FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn primary_except_consume() -> Result<()> {
+    let proto_plan = 
read_json("tests/testdata/test_plans/minus_primary.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data EXCEPT SELECT a FROM data2 EXCEPT SELECT a FROM 
data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn primary_except_all_consume() -> Result<()> {
+    let proto_plan =
+        
read_json("tests/testdata/test_plans/minus_primary_all.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data EXCEPT ALL SELECT a FROM data2 EXCEPT ALL SELECT a 
FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn union_distinct_consume() -> Result<()> {
+    let proto_plan = 
read_json("tests/testdata/test_plans/union_distinct.substrait.json");
+
+    assert_substrait_sql(proto_plan, "SELECT a FROM data UNION SELECT a FROM 
data2").await
+}
+
 #[tokio::test]
 async fn simple_intersect_table_reuse() -> Result<()> {
     // Substrait does currently NOT maintain the alias of the tables.
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
 
b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
new file mode 100644
index 0000000000..8ff69bd82c
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_MULTISET"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
 
b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
new file mode 100644
index 0000000000..56daf6ed46
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_MULTISET_ALL"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
 
b/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
new file mode 100644
index 0000000000..229dd72517
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_PRIMARY"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json 
b/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json
new file mode 100644
index 0000000000..33b0e2ab8c
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_MINUS_PRIMARY"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
 
b/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
new file mode 100644
index 0000000000..229f78ab5b
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_MINUS_PRIMARY_ALL"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json 
b/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json
new file mode 100644
index 0000000000..e8b0274966
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json
@@ -0,0 +1,118 @@
+{
+    "relations": [
+        {
+        "root": {
+            "input": {
+            "set": {
+                "inputs": [
+                {
+                    "project": {
+                    "common": {
+                        "emit": {
+                        "outputMapping": [
+                            1
+                        ]
+                        }
+                    },
+                    "input": {
+                        "read": {
+                        "common": {
+                            "direct": {}
+                        },
+                        "baseSchema": {
+                            "names": [
+                            "a"
+                            ],
+                            "struct": {
+                            "types": [
+                                {
+                                "i64": {
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                }
+                                }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                            }
+                        },
+                        "namedTable": {
+                            "names": [
+                            "data"
+                            ]
+                        }
+                        }
+                    },
+                    "expressions": [
+                        {
+                        "selection": {
+                            "directReference": {
+                            "structField": {}
+                            },
+                            "rootReference": {}
+                        }
+                        }
+                    ]
+                    }
+                },
+                {
+                    "project": {
+                    "common": {
+                        "emit": {
+                        "outputMapping": [
+                            1
+                        ]
+                        }
+                    },
+                    "input": {
+                        "read": {
+                        "common": {
+                            "direct": {}
+                        },
+                        "baseSchema": {
+                            "names": [
+                            "a"
+                            ],
+                            "struct": {
+                            "types": [
+                                {
+                                "i64": {
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                }
+                                }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                            }
+                        },
+                        "namedTable": {
+                            "names": [
+                            "data2"
+                            ]
+                        }
+                        }
+                    },
+                    "expressions": [
+                        {
+                        "selection": {
+                            "directReference": {
+                            "structField": {}
+                            },
+                            "rootReference": {}
+                        }
+                        }
+                    ]
+                    }
+                }
+                ],
+                "op": "SET_OP_UNION_DISTINCT"
+            }
+            },
+            "names": [
+            "a"
+            ]
+        }
+        }
+    ],
+    "version": {
+        "minorNumber": 54,
+        "producer": "subframe"
+    }
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/utils.rs 
b/datafusion/substrait/tests/utils.rs
index 9f63b74ef0..00cbfb0c41 100644
--- a/datafusion/substrait/tests/utils.rs
+++ b/datafusion/substrait/tests/utils.rs
@@ -147,6 +147,7 @@ pub mod test {
             Ok(())
         }
 
+        #[allow(deprecated)]
         fn collect_schemas_from_rel(&mut self, rel: &Rel) -> Result<()> {
             let rel_type = rel
                 .rel_type


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to