zhengruifeng commented on code in PR #56398:
URL: https://github.com/apache/spark/pull/56398#discussion_r3400650175
##########
python/pyspark/sql/tests/test_column.py:
##########
@@ -928,6 +928,99 @@ def test_resolve_after_intersect(self):
rows = df1.intersect(df2).select(df1.c).collect()
self.assertEqual([r.c for r in rows], [2])
+ def test_resolve_through_zip(self):
Review Comment:
Renamed all of them to `test_resolve_after_zip*` in ee26643956f.
##########
python/pyspark/sql/tests/test_column.py:
##########
@@ -928,6 +928,99 @@ def test_resolve_after_intersect(self):
rows = df1.intersect(df2).select(df1.c).collect()
self.assertEqual([r.c for r in rows], [2])
+ def test_resolve_through_zip(self):
+ # zip merges two column-projected DataFrames side by side. Classic
+ # resolves the tagged left/right reference by attribute id, which
+ # ResolveZip preserves in the merged Project, so it succeeds. Connect
+ # resolves by plan id, but ResolveZip collapses the two sides into one
+ # plan and drops the per-DataFrame plan-id tags, so the tagged
+ # reference is never found and it raises (overridden in the parity
suite).
+ df = self.spark.createDataFrame([(1, 10), (2, 20), (3, 30)], ["a",
"b"])
+ left = df.select((df.a + 1).alias("x"))
+ right = df.select((df.b * 2).alias("y"))
+ zipped = left.zip(right)
+ self.assertEqual(zipped.columns, ["x", "y"])
+ self.assertEqual(sorted(r.x for r in zipped.select(left.x).collect()),
[2, 3, 4])
+ self.assertEqual(sorted(r.y for r in
zipped.select(right.y).collect()), [20, 40, 60])
+ self.assertEqual(
+ sorted((r.x, r.y) for r in zipped.select(left.x,
right.y).collect()),
+ [(2, 20), (3, 40), (4, 60)],
+ )
+
+ def test_resolve_through_zip_reordered(self):
+ # The originating DataFrame controls which side each column reads, in
Review Comment:
Fixed in ee26643956f.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]