aa1371 commented on a change in pull request #35083:
URL: https://github.com/apache/spark/pull/35083#discussion_r777194433
##########
File path: python/pyspark/pandas/frame.py
##########
@@ -7637,102 +7706,35 @@ def merge(
As described in #263, joining string columns currently returns None
for missing values
instead of NaN.
"""
-
- def to_list(os: Optional[Union[Name, List[Name]]]) -> List[Label]:
- if os is None:
- return []
- elif is_name_like_tuple(os):
- return [cast(Label, os)]
- elif is_name_like_value(os):
- return [(os,)]
- else:
- return [o if is_name_like_tuple(o) else (o,) for o in os]
-
if isinstance(right, ps.Series):
right = right.to_frame()
- if on:
- if left_on or right_on:
- raise ValueError(
- 'Can only pass argument "on" OR "left_on" and "right_on", '
- "not a combination of both."
- )
- left_key_names = list(map(self._internal.spark_column_name_for,
to_list(on)))
- right_key_names = list(map(right._internal.spark_column_name_for,
to_list(on)))
- else:
- # TODO: need special handling for multi-index.
- if left_index:
- left_key_names = self._internal.index_spark_column_names
- else:
- left_key_names =
list(map(self._internal.spark_column_name_for, to_list(left_on)))
- if right_index:
- right_key_names = right._internal.index_spark_column_names
- else:
- right_key_names = list(
- map(right._internal.spark_column_name_for,
to_list(right_on))
- )
-
- if left_key_names and not right_key_names:
- raise ValueError("Must pass right_on or right_index=True")
- if right_key_names and not left_key_names:
- raise ValueError("Must pass left_on or left_index=True")
- if not left_key_names and not right_key_names:
- common = list(self.columns.intersection(right.columns))
- if len(common) == 0:
- raise ValueError(
- "No common columns to perform merge on. Merge options:
"
- "left_on=None, right_on=None, left_index=False,
right_index=False"
- )
- left_key_names =
list(map(self._internal.spark_column_name_for, to_list(common)))
- right_key_names =
list(map(right._internal.spark_column_name_for, to_list(common)))
- if len(left_key_names) != len(right_key_names):
- raise ValueError("len(left_keys) must equal len(right_keys)")
-
- # We should distinguish the name to avoid ambiguous column name after
merging.
- right_prefix = "__right_"
- right_key_names = [right_prefix + right_key_name for right_key_name in
right_key_names]
Review comment:
The logic from 7654-7693 has been moved verbatim to
`self._resolve_merge_key_names`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]