Dandandan commented on a change in pull request #1023:
URL: https://github.com/apache/arrow-datafusion/pull/1023#discussion_r712269338
##########
File path: datafusion/src/physical_plan/hash_utils.rs
##########
@@ -80,36 +80,68 @@ fn check_join_set_is_valid(
)));
};
- let remaining = right
- .difference(on_right)
- .cloned()
- .collect::<HashSet<Column>>();
-
- let collisions = left.intersection(&remaining).collect::<HashSet<_>>();
-
- if !collisions.is_empty() {
- return Err(DataFusionError::Plan(format!(
- "The left schema and the right schema have the following
columns with the same name without being on the ON statement: {:?}. Consider
aliasing them.",
- collisions,
- )));
- };
-
Ok(())
}
+/// Information about the index and placement (left or right) of the columns
+#[derive(Debug, Clone)]
+pub struct ColumnIndex {
+ /// Index of the column
+ pub index: usize,
+ /// Whether the column is at the left or right side
+ pub is_left: bool,
+}
+
/// Creates a schema for a join operation.
/// The fields from the left side are first
-pub fn build_join_schema(left: &Schema, right: &Schema, join_type: &JoinType)
-> Schema {
- let fields: Vec<Field> = match join_type {
+pub fn build_join_schema(
+ left: &Schema,
+ right: &Schema,
+ join_type: &JoinType,
+) -> (Schema, Vec<ColumnIndex>) {
+ let (fields, column_indices): (Vec<Field>, Vec<ColumnIndex>) = match
join_type {
JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right =>
{
- let left_fields = left.fields().iter();
- let right_fields = right.fields().iter();
+ let left_fields =
left.fields().iter().cloned().enumerate().map(|(i, f)| {
+ (
+ f,
+ ColumnIndex {
+ index: i,
+ is_left: true,
+ },
+ )
+ });
+ let right_fields =
+ right.fields().iter().cloned().enumerate().map(|(i, f)| {
+ (
+ f,
+ ColumnIndex {
+ index: i,
+ is_left: false,
+ },
+ )
+ });
+
// left then right
- left_fields.chain(right_fields).cloned().collect()
+ left_fields.chain(right_fields).unzip()
}
- JoinType::Semi | JoinType::Anti => left.fields().clone(),
+ JoinType::Semi | JoinType::Anti => left
+ .fields()
+ .iter()
+ .cloned()
+ .enumerate()
+ .map(|(i, f)| {
Review comment:
Stylistic suggestion: If you rename `i` to `index` this can be used with
shorthand syntax in record.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]