alamb commented on a change in pull request #1258:
URL: https://github.com/apache/arrow-datafusion/pull/1258#discussion_r744665629
##########
File path: datafusion/src/dataframe.rs
##########
@@ -375,4 +375,19 @@ pub trait DataFrame: Send + Sync {
/// # }
/// ```
fn registry(&self) -> Arc<dyn FunctionRegistry>;
+
+ /// Calculate the intersect two [`DataFrame`]s. The two [`DataFrame`]s
must have exactly the same schema
Review comment:
```suggestion
/// Calculate the intersection of two [`DataFrame`]s. The two
[`DataFrame`]s must have exactly the same schema
```
##########
File path: datafusion/src/execution/dataframe_impl.rs
##########
@@ -231,6 +231,29 @@ impl DataFrame for DataFrameImpl {
.build()?,
)))
}
+
+ fn intersect(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn
DataFrame>> {
+ let left_plan = self.to_logical_plan();
+ let right_plan = dataframe.to_logical_plan();
+ let join_keys = left_plan
+ .schema()
+ .fields()
+ .iter()
+ .zip(right_plan.schema().fields().iter())
+ .map(|(left_field, right_field)| {
+ (
+ (Column::from_name(left_field.name())),
+ (Column::from_name(right_field.name())),
+ )
+ })
+ .unzip();
+ Ok(Arc::new(DataFrameImpl::new(
+ self.ctx_state.clone(),
+ &LogicalPlanBuilder::from(left_plan)
+ .join_detailed(&right_plan, JoinType::Semi, join_keys, true)?
+ .build()?,
Review comment:
I think once we merge
https://github.com/apache/arrow-datafusion/pull/1259 this could just call
`LogicalPlanBuilder::intersect`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]