alamb commented on a change in pull request #1023:
URL: https://github.com/apache/arrow-datafusion/pull/1023#discussion_r712409527
##########
File path: datafusion/src/physical_plan/hash_join.rs
##########
@@ -1808,4 +1777,43 @@ mod tests {
Ok(())
}
+
+ #[tokio::test]
+ async fn join_with_duplicated_column_names() -> Result<()> {
+ let left = build_table(
+ ("a", &vec![1, 2, 3]),
+ ("b", &vec![4, 5, 7]),
+ ("c", &vec![7, 8, 9]),
+ );
+ let right = build_table(
+ ("a", &vec![10, 20, 30]),
+ ("b", &vec![1, 2, 7]),
+ ("c", &vec![70, 80, 90]),
+ );
+ let on = vec![(
+ // join on a=b so there are duplicate column names on unjoined
columns
+ Column::new_with_schema("a", &left.schema()).unwrap(),
+ Column::new_with_schema("b", &right.schema()).unwrap(),
+ )];
+
+ let join = join(left, right, on, &JoinType::Inner)?;
+
+ let columns = columns(&join.schema());
+ assert_eq!(columns, vec!["a", "b", "c", "a", "b", "c"]);
+
+ let stream = join.execute(0).await?;
+ let batches = common::collect(stream).await?;
+
+ let expected = vec![
+ "+---+---+---+----+---+----+",
+ "| a | b | c | a | b | c |",
Review comment:
👍
##########
File path: datafusion/src/physical_plan/hash_join.rs
##########
@@ -1375,7 +1344,7 @@ mod tests {
"| 1 | 4 | 7 | 10 | 4 | 70 |",
"| 2 | 5 | 8 | 20 | 5 | 80 |",
"| 2 | 5 | 8 | 20 | 5 | 80 |",
- "| 3 | 7 | 9 | | 7 | |",
+ "| 3 | 7 | 9 | | | |",
Review comment:
I double checked this and the answer after this PR appears to be correct
(there is no value `7` in `b1` on the right input 👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]