allisonwang-db commented on code in PR #49083:
URL: https://github.com/apache/spark/pull/49083#discussion_r1872830679
##########
python/pyspark/sql/connect/plan.py:
##########
@@ -1028,6 +1028,67 @@ def _repr_html_(self) -> str:
"""
+class LateralJoin(LogicalPlan):
+ def __init__(
+ self,
+ left: Optional[LogicalPlan],
+ right: LogicalPlan,
+ on: Optional[Column],
+ how: Optional[str],
+ ) -> None:
+ super().__init__(left)
+ self.left = cast(LogicalPlan, left)
+ self.right = right
+ self.on = on
+ if how is None:
+ join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+ elif how == "inner":
+ join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+ elif how in ["leftouter", "left"]:
+ join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER
+ elif how == "cross":
+ join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
+ else:
+ raise AnalysisException(
+ errorClass="UNSUPPORTED_JOIN_TYPE",
+ messageParameters={"join_type": how},
+ )
+ self.how = join_type
+
+ def plan(self, session: "SparkConnectClient") -> proto.Relation:
+ plan = self._create_proto_relation()
+ plan.lateral_join.left.CopyFrom(self.left.plan(session))
+ plan.lateral_join.right.CopyFrom(self.right.plan(session))
+ if self.on is not None:
+ plan.lateral_join.join_condition.CopyFrom(self.on.to_plan(session))
+ plan.lateral_join.join_type = self.how
+ return plan
+
+ @property
+ def observations(self) -> Dict[str, "Observation"]:
+ return dict(**super().observations, **self.right.observations)
+
+ def print(self, indent: int = 0) -> str:
+ i = " " * indent
+ o = " " * (indent + LogicalPlan.INDENT)
+ n = indent + LogicalPlan.INDENT * 2
+ return (
+ f"{i}<LateralJoin on={self.on} how={self.how}>\n{o}"
+ f"left=\n{self.left.print(n)}\n{o}right=\n{self.right.print(n)}"
+ )
+
+ def _repr_html_(self) -> str:
Review Comment:
Interesting! Just curious what is this used for?
##########
python/pyspark/sql/connect/dataframe.py:
##########
@@ -692,10 +692,14 @@ def lateralJoin(
on: Optional[Column] = None,
how: Optional[str] = None,
) -> ParentDataFrame:
- # TODO(SPARK-50134): Implement this method
- raise PySparkNotImplementedError(
- errorClass="NOT_IMPLEMENTED",
- messageParameters={"feature": "lateralJoin()"},
+ self._check_same_session(other)
+ if how is not None and isinstance(how, str):
Review Comment:
What if `how` is not a string?
##########
python/pyspark/sql/connect/plan.py:
##########
@@ -1028,6 +1028,67 @@ def _repr_html_(self) -> str:
"""
+class LateralJoin(LogicalPlan):
+ def __init__(
+ self,
+ left: Optional[LogicalPlan],
+ right: LogicalPlan,
+ on: Optional[Column],
+ how: Optional[str],
+ ) -> None:
+ super().__init__(left)
+ self.left = cast(LogicalPlan, left)
+ self.right = right
+ self.on = on
+ if how is None:
+ join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+ elif how == "inner":
+ join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+ elif how in ["leftouter", "left"]:
+ join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER
+ elif how == "cross":
+ join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
+ else:
+ raise AnalysisException(
+ errorClass="UNSUPPORTED_JOIN_TYPE",
Review Comment:
I think we need to refactor this error message:
https://github.com/apache/spark/blob/ede9cfc92f0fe6fa735cd20e6e2da79735fd35ff/python/pyspark/errors/error-conditions.json#L1076-L1080
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]