This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6cf42fe4e04 [SPARK-41717][CONNECT] Deduplicate print and _repr_html_ 
at LogicalPlan
6cf42fe4e04 is described below

commit 6cf42fe4e0488d3ea2d1de1a693ac05d1ce69bbb
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Dec 27 10:11:37 2022 +0900

    [SPARK-41717][CONNECT] Deduplicate print and _repr_html_ at LogicalPlan
    
    ### What changes were proposed in this pull request?
    
    This PR deduplicates `print` and `_repr_html_` logic by having a comment 
logic to handle them by looking up the signature and type hints (similar with 
what we're doing at Spark SQL's query string, e.g., `TreeNode.stringArgs`).
    
    ### Why are the changes needed?
    
    To make it easier to maintain.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Virtually no.
    
    ### How was this patch tested?
    
    I tested almost all cases, and complex cases too manually. In addition, I 
added a unittest too.
    
    Closes #39223 from HyukjinKwon/SPARK-41717.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/connect/plan.py                 | 1002 ++------------------
 .../sql/tests/connect/test_connect_plan_only.py    |   25 +
 2 files changed, 88 insertions(+), 939 deletions(-)

diff --git a/python/pyspark/sql/connect/plan.py 
b/python/pyspark/sql/connect/plan.py
index 468d028cee2..b39e5b5058b 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -18,6 +18,7 @@
 from typing import Any, List, Optional, Sequence, Union, cast, TYPE_CHECKING, 
Mapping, Dict
 import functools
 import pyarrow as pa
+from inspect import signature, isclass
 
 from pyspark.sql.types import DataType
 
@@ -95,14 +96,68 @@ class LogicalPlan(object):
 
         return plan
 
-    # TODO(SPARK-41717): Implement the command logic for print and _repr_html_
+    def _parameters_to_print(self, parameters: Mapping[str, Any]) -> 
Mapping[str, Any]:
+        params = {}
+        for name, tpe in parameters.items():
+            is_logical_plan = isclass(tpe.annotation) and 
isinstance(tpe.annotation, LogicalPlan)
+            is_forwardref_logical_plan = getattr(tpe.annotation, 
"__forward_arg__", "").endswith(
+                "LogicalPlan"
+            )
+            is_nested_logical_plan = any(
+                isclass(a) and issubclass(a, LogicalPlan)
+                for a in getattr(tpe.annotation, "__args__", ())
+            )
+            is_nested_forwardref_logical_plan = any(
+                getattr(a, "__forward_arg__", "").endswith("LogicalPlan")
+                for a in getattr(tpe.annotation, "__args__", ())
+            )
+            if (
+                not is_logical_plan
+                and not is_forwardref_logical_plan
+                and not is_nested_logical_plan
+                and not is_nested_forwardref_logical_plan
+            ):
+                # Searches self.name or self._name
+                try:
+                    params[name] = getattr(self, name)
+                except AttributeError:
+                    try:
+                        params[name] = getattr(self, "_" + name)
+                    except AttributeError:
+                        pass  # Simpy ignore
+        return params
+
     def print(self, indent: int = 0) -> str:
-        ...
+        params = 
self._parameters_to_print(signature(self.__class__.__init__).parameters)
+        pretty_params = [f"{name}='{param}'" for name, param in params.items()]
+        if len(pretty_params) == 0:
+            pretty_str = ""
+        else:
+            pretty_str = " " + ", ".join(pretty_params)
+        return f"{' ' * 
indent}<{self.__class__.__name__}{pretty_str}>\n{self._child_print(indent)}"
 
     def _repr_html_(self) -> str:
-        ...
+        params = 
self._parameters_to_print(signature(self.__class__.__init__).parameters)
+        pretty_params = [
+            f"\n              {name}: " f"{param} <br/>" for name, param in 
params.items()
+        ]
+        if len(pretty_params) == 0:
+            pretty_str = ""
+        else:
+            pretty_str = "".join(pretty_params)
+        return f"""
+        <ul>
+           <li>
+              <b>{self.__class__.__name__}</b><br/>{pretty_str}
+              {self._child_repr()}
+           </li>
+        </ul>
+        """
 
-    def _child_repr_(self) -> str:
+    def _child_print(self, indent: int) -> str:
+        return self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
+
+    def _child_repr(self) -> str:
         return self._child._repr_html_() if self._child is not None else ""
 
 
@@ -133,18 +188,6 @@ class DataSource(LogicalPlan):
                     plan.read.data_source.options[k] = v
         return plan
 
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>DataSource</b><br />
-                format: {self.format}
-                schema: {self.schema}
-                options: {self.options}
-            </li>
-        </ul>
-        """
-
 
 class Read(LogicalPlan):
     def __init__(self, table_name: str) -> None:
@@ -159,16 +202,6 @@ class Read(LogicalPlan):
     def print(self, indent: int = 0) -> str:
         return f"{' ' * indent}<Read table_name={self.table_name}>\n"
 
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Read</b><br />
-                table name: {self.table_name}
-            </li>
-        </ul>
-        """
-
 
 class LocalRelation(LogicalPlan):
     """Creates a LocalRelation plan object based on a PyArrow Table."""
@@ -207,7 +240,7 @@ class LocalRelation(LogicalPlan):
     def _repr_html_(self) -> str:
         return """
         <ul>
-            <li>LocalRelation</li>
+            <li><b>LocalRelation</b></li>
         </ul>
         """
 
@@ -230,27 +263,6 @@ class ShowString(LogicalPlan):
         plan.show_string.vertical = self.vertical
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        return (
-            f"{' ' * indent}"
-            f"<ShowString numRows='{self.num_rows}', "
-            f"truncate='{self.truncate}', "
-            f"vertical='{self.vertical}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>ShowString</b><br />
-              NumRows: {self.num_rows} <br />
-              Truncate: {self.truncate} <br />
-              Vertical: {self.vertical} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class Project(LogicalPlan):
     """Logical plan object for a projection.
@@ -265,13 +277,13 @@ class Project(LogicalPlan):
 
     def __init__(self, child: Optional["LogicalPlan"], *columns: 
"ColumnOrName") -> None:
         super().__init__(child)
-        self._raw_columns = list(columns)
+        self._columns = list(columns)
         self.alias: Optional[str] = None
         self._verify_expressions()
 
     def _verify_expressions(self) -> None:
         """Ensures that all input arguments are instances of Expression or 
String."""
-        for c in self._raw_columns:
+        for c in self._columns:
             if not isinstance(c, (Column, str)):
                 raise InputValidationError(
                     f"Only Column or String can be used for projections: 
'{c}'."
@@ -280,7 +292,7 @@ class Project(LogicalPlan):
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         assert self._child is not None
         proj_exprs = []
-        for c in self._raw_columns:
+        for c in self._columns:
             if isinstance(c, Column):
                 proj_exprs.append(c.to_plan(session))
             elif c == "*":
@@ -295,21 +307,6 @@ class Project(LogicalPlan):
         plan.project.expressions.extend(proj_exprs)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Project cols={self._raw_columns}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Project</b><br />
-                Columns: {",".join([str(c) for c in self._raw_columns])}
-                {self._child._repr_html_() if self._child is not None else ""}
-            </li>
-        </uL>
-        """
-
 
 class WithColumns(LogicalPlan):
     """Logical plan object for a withColumns operation."""
@@ -329,21 +326,6 @@ class WithColumns(LogicalPlan):
             plan.with_columns.name_expr_list.append(name_expr)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<WithColumns cols={self._cols_map}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>WithColumns</b><br />
-                Column Map: {self._cols_map}
-                {self._child._repr_html_() if self._child is not None else ""}
-            </li>
-        </uL>
-        """
-
 
 class Hint(LogicalPlan):
     """Logical plan object for a Hint operation."""
@@ -369,21 +351,6 @@ class Hint(LogicalPlan):
             
plan.hint.parameters.append(LiteralExpression._from_value(v).to_plan(session).literal)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        return f"""{" " * indent}<Hint name='{self.name}', 
parameters='{self.params}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>Hint</b><br />
-              name: {self.name} <br />
-              parameters: {self.params} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class Filter(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], filter: Column) -> None:
@@ -397,21 +364,6 @@ class Filter(LogicalPlan):
         plan.filter.condition.CopyFrom(self.filter.to_plan(session))
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Filter filter={self.filter}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Filter</b><br />
-                Condition: {self.filter}
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Limit(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
@@ -425,21 +377,6 @@ class Limit(LogicalPlan):
         plan.limit.limit = self.limit
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Limit limit={self.limit}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Limit</b><br />
-                Limit: {self.limit} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Tail(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
@@ -453,21 +390,6 @@ class Tail(LogicalPlan):
         plan.tail.limit = self.limit
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Tail limit={self.limit}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Tail</b><br />
-                Limit: {self.limit} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Offset(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], offset: int = 0) -> 
None:
@@ -481,21 +403,6 @@ class Offset(LogicalPlan):
         plan.offset.offset = self.offset
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Offset={self.offset}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Limit</b><br />
-                Offset: {self.offset} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Deduplicate(LogicalPlan):
     def __init__(
@@ -517,25 +424,6 @@ class Deduplicate(LogicalPlan):
             plan.deduplicate.column_names.extend(self.column_names)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return (
-            f"{' ' * indent}<all_columns_as_keys={self.all_columns_as_keys} "
-            f"column_names={self.column_names}>\n{c_buf}"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b></b>Deduplicate<br />
-                all_columns_as_keys: {self.all_columns_as_keys} <br />
-                column_names: {self.column_names} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Sort(LogicalPlan):
     def __init__(
@@ -571,22 +459,6 @@ class Sort(LogicalPlan):
         plan.sort.is_global = self.is_global
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Sort columns={self.columns}, 
global={self.is_global}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Sort</b><br />
-                {", ".join([str(c) for c in self.columns])}
-                global: {self.is_global} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Drop(LogicalPlan):
     def __init__(
@@ -615,21 +487,6 @@ class Drop(LogicalPlan):
         plan.drop.cols.extend([self._convert_to_expr(c, session) for c in 
self.columns])
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<Drop columns={self.columns}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Drop</b><br />
-                columns: {self.columns} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Sample(LogicalPlan):
     def __init__(
@@ -660,29 +517,6 @@ class Sample(LogicalPlan):
         plan.sample.force_stable_sort = self.force_stable_sort
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return (
-            f"{' ' * indent}"
-            f"<Sample lowerBound={self.lower_bound}, 
upperBound={self.upper_bound}, "
-            f"withReplacement={self.with_replacement}, seed={self.seed}>"
-            f"\n{c_buf}"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Sample</b><br />
-                LowerBound: {self.lower_bound} <br />
-                UpperBound: {self.upper_bound} <br />
-                WithReplacement: {self.with_replacement} <br />
-                Seed: {self.seed} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Aggregate(LogicalPlan):
     def __init__(
@@ -748,23 +582,6 @@ class Aggregate(LogicalPlan):
 
         return agg
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return (
-            f"{' ' * indent}<Groupby={self._grouping_cols}"
-            f"Aggregate={self._aggregate_cols}>\n{c_buf}"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Aggregation</b><br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class Join(LogicalPlan):
     def __init__(
@@ -930,23 +747,6 @@ class Repartition(LogicalPlan):
         rel.repartition.num_partitions = self._num_partitions
         return rel
 
-    def print(self, indent: int = 0) -> str:
-        plan_name = "repartition" if self._shuffle else "coalesce"
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<{plan_name} 
num_partitions={self._num_partitions}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        plan_name = "repartition" if self._shuffle else "coalesce"
-        return f"""
-        <ul>
-           <li>
-              <b>{plan_name}</b><br />
-              Child: {self._child_repr_()}
-              num_partitions: {self._num_partitions}
-           </li>
-        </ul>
-        """
-
 
 class SubqueryAlias(LogicalPlan):
     """Alias for a relation."""
@@ -962,21 +762,6 @@ class SubqueryAlias(LogicalPlan):
         rel.subquery_alias.alias = self._alias
         return rel
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return f"{' ' * indent}<SubqueryAlias alias={self._alias}>\n{c_buf}"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>SubqueryAlias</b><br />
-              Child: {self._child_repr_()}
-              Alias: {self._alias}
-           </li>
-        </ul>
-        """
-
 
 class SQL(LogicalPlan):
     def __init__(self, query: str) -> None:
@@ -988,21 +773,6 @@ class SQL(LogicalPlan):
         rel.sql.query = self._query
         return rel
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        sub_query = self._query.replace("\n", "")[:50]
-        return f"""{i}<SQL query='{sub_query}...'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>SQL</b><br />
-              Statement: <pre>{self._query}</pre>
-           </li>
-        </ul>
-        """
-
 
 class Range(LogicalPlan):
     def __init__(
@@ -1027,27 +797,6 @@ class Range(LogicalPlan):
             rel.range.num_partitions = self._num_partitions
         return rel
 
-    def print(self, indent: int = 0) -> str:
-        return (
-            f"{' ' * indent}"
-            f"<Range start={self._start}, end={self._end}, "
-            f"step={self._step}, num_partitions={self._num_partitions}>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Range</b><br />
-                Start: {self._start} <br />
-                End: {self._end} <br />
-                Step: {self._step} <br />
-                NumPartitions: {self._num_partitions} <br />
-                {self._child_repr_()}
-            </li>
-        </uL>
-        """
-
 
 class ToSchema(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], schema: DataType) -> 
None:
@@ -1062,21 +811,6 @@ class ToSchema(LogicalPlan):
         
plan.to_schema.schema.CopyFrom(pyspark_types_to_proto_types(self._schema))
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<ToSchema schema='{self._schema}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>ToSchema</b><br />
-              schema: {self._schema} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class RenameColumnsNameByName(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], colsMap: Mapping[str, 
str]) -> None:
@@ -1092,21 +826,6 @@ class RenameColumnsNameByName(LogicalPlan):
             plan.rename_columns_by_name_to_name_map.rename_columns_map[k] = v
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<RenameColumnsNameByName ColsMap='{self._colsMap}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>RenameColumns</b><br />
-              ColsMap: {self._colsMap} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class Unpivot(LogicalPlan):
     """Logical plan object for a unpivot operation."""
@@ -1142,30 +861,6 @@ class Unpivot(LogicalPlan):
         plan.unpivot.value_column_name = self.value_column_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child 
else ""
-        return (
-            f"{' ' * indent}"
-            f"<Unpivot ids={self.ids}, values={self.values}, "
-            f"variable_column_name={self.variable_column_name}, "
-            f"value_column_name={self.value_column_name}>"
-            f"\n{c_buf}"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-            <li>
-                <b>Unpivot</b><br />
-                ids: {self.ids}
-                values: {self.values}
-                variable_column_name: {self.variable_column_name}
-                value_column_name: {self.value_column_name}
-                {self._child._repr_html_() if self._child is not None else ""}
-            </li>
-        </uL>
-        """
-
 
 class NAFill(LogicalPlan):
     def __init__(
@@ -1208,21 +903,6 @@ class NAFill(LogicalPlan):
         plan.fill_na.values.extend([self._convert_value(v) for v in 
self.values])
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        return f"""{" " * indent}<NAFill cols='{self.cols}', 
values='{self.values}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>NAFill</b><br />
-              Cols: {self.cols} <br />
-              Values: {self.values} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class NADrop(LogicalPlan):
     def __init__(
@@ -1246,22 +926,6 @@ class NADrop(LogicalPlan):
             plan.drop_na.min_non_nulls = self.min_non_nulls
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<NADrop cols='{self.cols}' " 
f"min_non_nulls='{self.min_non_nulls}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>NADrop</b><br />
-              Cols: {self.cols} <br />
-              Min_non_nulls: {self.min_non_nulls} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class NAReplace(LogicalPlan):
     def __init__(
@@ -1310,22 +974,6 @@ class NAReplace(LogicalPlan):
                 plan.replace.replacements.append(replacement)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<NAReplace cols='{self.cols}' " 
f"replacements='{self.replacements}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>NADrop</b><br />
-              Cols: {self.cols} <br />
-              Replacements: {self.replacements} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class StatSummary(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], statistics: List[str]) 
-> None:
@@ -1339,21 +987,6 @@ class StatSummary(LogicalPlan):
         plan.summary.statistics.extend(self.statistics)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<Summary statistics='{self.statistics}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>Summary</b><br />
-              Statistics: {self.statistics} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class StatDescribe(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], cols: List[str]) -> 
None:
@@ -1367,21 +1000,6 @@ class StatDescribe(LogicalPlan):
         plan.describe.cols.extend(self.cols)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<Describe cols='{self.cols}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>Describe</b><br />
-              Cols: {self.cols} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class StatCrosstab(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], col1: str, col2: str) 
-> None:
@@ -1398,22 +1016,6 @@ class StatCrosstab(LogicalPlan):
         plan.crosstab.col2 = self.col2
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<Crosstab col1='{self.col1}' col2='{self.col2}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>Crosstab</b><br />
-              Col1: {self.col1} <br />
-              Col2: {self.col2} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class RenameColumns(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], cols: Sequence[str]) -> 
None:
@@ -1428,21 +1030,6 @@ class RenameColumns(LogicalPlan):
         
plan.rename_columns_by_same_length_names.column_names.extend(self._cols)
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"""{i}<RenameColumns cols='{self._cols}'>"""
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>RenameColumns</b><br />
-              cols: {self._cols} <br />
-              {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class CreateView(LogicalPlan):
     def __init__(
@@ -1463,28 +1050,6 @@ class CreateView(LogicalPlan):
         plan.create_dataframe_view.input.CopyFrom(self._child.plan(session))
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<CreateView name='{self._name}' "
-            f"is_global='{self._is_gloal} "
-            f"replace='{self._replace}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>CreateView</b><br />
-              name: {self._name} <br />
-              is_global: {self._is_gloal} <br />
-              replace: {self._replace} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class WriteOperation(LogicalPlan):
     def __init__(self, child: "LogicalPlan") -> None:
@@ -1579,20 +1144,6 @@ class CurrentDatabase(LogicalPlan):
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return proto.Relation(current_database=proto.CurrentDatabase())
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__}>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b>
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class SetCurrentDatabase(LogicalPlan):
     def __init__(self, db_name: str) -> None:
@@ -1604,21 +1155,6 @@ class SetCurrentDatabase(LogicalPlan):
         plan.set_current_database.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class ListDatabases(LogicalPlan):
     def __init__(self) -> None:
@@ -1627,20 +1163,6 @@ class ListDatabases(LogicalPlan):
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return proto.Relation(list_databases=proto.ListDatabases())
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__}>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b>
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class ListTables(LogicalPlan):
     def __init__(self, db_name: Optional[str] = None) -> None:
@@ -1655,21 +1177,6 @@ class ListTables(LogicalPlan):
             plan = proto.Relation(list_tables=proto.ListTables())
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class ListFunctions(LogicalPlan):
     def __init__(self, db_name: Optional[str] = None) -> None:
@@ -1684,21 +1191,6 @@ class ListFunctions(LogicalPlan):
             plan = proto.Relation(list_functions=proto.ListFunctions())
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class ListColumns(LogicalPlan):
     def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1713,27 +1205,6 @@ class ListColumns(LogicalPlan):
             plan.list_columns.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"table_name='{self._table_name}' "
-            f"db_name='{self._db_name}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class GetDatabase(LogicalPlan):
     def __init__(self, db_name: str) -> None:
@@ -1745,21 +1216,6 @@ class GetDatabase(LogicalPlan):
         plan.get_database.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class GetTable(LogicalPlan):
     def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1774,27 +1230,6 @@ class GetTable(LogicalPlan):
             plan.get_table.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"table_name='{self._table_name}' "
-            f"db_name='{self._db_name}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class GetFunction(LogicalPlan):
     def __init__(self, function_name: str, db_name: Optional[str] = None) -> 
None:
@@ -1809,27 +1244,6 @@ class GetFunction(LogicalPlan):
             plan.get_function.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"function_name='{self._function_name}' "
-            f"db_name='{self._db_name}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              function_name: {self._function_name} <br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class DatabaseExists(LogicalPlan):
     def __init__(self, db_name: str) -> None:
@@ -1841,21 +1255,6 @@ class DatabaseExists(LogicalPlan):
         plan.database_exists.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class TableExists(LogicalPlan):
     def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1870,27 +1269,6 @@ class TableExists(LogicalPlan):
             plan.table_exists.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"table_name='{self._table_name}' "
-            f"db_name='{self._db_name}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class FunctionExists(LogicalPlan):
     def __init__(self, function_name: str, db_name: Optional[str] = None) -> 
None:
@@ -1905,27 +1283,6 @@ class FunctionExists(LogicalPlan):
             plan.function_exists.db_name = self._db_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"function_name='{self._function_name}' "
-            f"db_name='{self._db_name}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              function_name: {self._function_name} <br />
-              db_name: {self._db_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class CreateExternalTable(LogicalPlan):
     def __init__(
@@ -1958,33 +1315,6 @@ class CreateExternalTable(LogicalPlan):
                 plan.create_external_table.options[k] = v
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"table_name='{self._table_name}' "
-            f"path='{self._path}' "
-            f"source='{self._source}' "
-            f"schema='{self._schema}' "
-            f"options='{self._options}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-              path: {self._path} <br />
-              source: {self._source} <br />
-              schema: {self._schema} <br />
-              options: {self._options} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class CreateTable(LogicalPlan):
     def __init__(
@@ -2021,35 +1351,6 @@ class CreateTable(LogicalPlan):
                 plan.create_table.options[k] = v
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return (
-            f"{i}"
-            f"<{self.__class__.__name__}"
-            f"table_name='{self._table_name}' "
-            f"path='{self._path}' "
-            f"source='{self._source}' "
-            f"description='{self._description}' "
-            f"schema='{self._schema}' "
-            f"options='{self._options}'>"
-        )
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-              path: {self._path} <br />
-              source: {self._source} <br />
-              description: {self._description} <br />
-              schema: {self._schema} <br />
-              options: {self._options} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class DropTempView(LogicalPlan):
     def __init__(self, view_name: str) -> None:
@@ -2061,21 +1362,6 @@ class DropTempView(LogicalPlan):
         plan.drop_temp_view.view_name = self._view_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} 
view_name='{self._view_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              view_name: {self._view_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class DropGlobalTempView(LogicalPlan):
     def __init__(self, view_name: str) -> None:
@@ -2087,21 +1373,6 @@ class DropGlobalTempView(LogicalPlan):
         plan.drop_global_temp_view.view_name = self._view_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} 
view_name='{self._view_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              view_name: {self._view_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class RecoverPartitions(LogicalPlan):
     def __init__(self, table_name: str) -> None:
@@ -2113,21 +1384,6 @@ class RecoverPartitions(LogicalPlan):
         plan.recover_partitions.table_name = self._table_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} 
table_name='{self._table_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 # TODO(SPARK-41612): Support Catalog.isCached
 # class IsCached(LogicalPlan):
@@ -2140,21 +1396,6 @@ class RecoverPartitions(LogicalPlan):
 #         plan.is_cached.table_name = self._table_name
 #         return plan
 #
-#     def print(self, indent: int = 0) -> str:
-#         i = " " * indent
-#         return f"{i}" f"<{self.__class__.__name__} 
table_name='{self._table_name}'>"
-#
-#     def _repr_html_(self) -> str:
-#         return f"""
-#         <ul>
-#            <li>
-#               <b>{self.__class__.__name__}</b><br />
-#               table_name: {self._table_name} <br />
-#             {self._child_repr_()}
-#            </li>
-#         </ul>
-#         """
-#
 #
 # TODO(SPARK-41600): Support Catalog.cacheTable
 # class CacheTable(LogicalPlan):
@@ -2167,21 +1408,6 @@ class RecoverPartitions(LogicalPlan):
 #         plan.cache_table.table_name = self._table_name
 #         return plan
 #
-#     def print(self, indent: int = 0) -> str:
-#         i = " " * indent
-#         return f"{i}" f"<{self.__class__.__name__} 
table_name='{self._table_name}'>"
-#
-#     def _repr_html_(self) -> str:
-#         return f"""
-#         <ul>
-#            <li>
-#               <b>{self.__class__.__name__}</b><br />
-#               table_name: {self._table_name} <br />
-#             {self._child_repr_()}
-#            </li>
-#         </ul>
-#         """
-#
 #
 # TODO(SPARK-41623): Support Catalog.uncacheTable
 # class UncacheTable(LogicalPlan):
@@ -2193,21 +1419,6 @@ class RecoverPartitions(LogicalPlan):
 #         plan = proto.Relation()
 #         plan.uncache_table.table_name = self._table_name
 #         return plan
-#
-#     def print(self, indent: int = 0) -> str:
-#         i = " " * indent
-#         return f"{i}" f"<{self.__class__.__name__} 
table_name='{self._table_name}'>"
-#
-#     def _repr_html_(self) -> str:
-#         return f"""
-#         <ul>
-#            <li>
-#               <b>{self.__class__.__name__}</b><br />
-#               table_name: {self._table_name} <br />
-#             {self._child_repr_()}
-#            </li>
-#         </ul>
-#         """
 
 
 class ClearCache(LogicalPlan):
@@ -2217,20 +1428,6 @@ class ClearCache(LogicalPlan):
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return proto.Relation(clear_cache=proto.ClearCache())
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__}>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b>
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class RefreshTable(LogicalPlan):
     def __init__(self, table_name: str) -> None:
@@ -2242,21 +1439,6 @@ class RefreshTable(LogicalPlan):
         plan.refresh_table.table_name = self._table_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} 
table_name='{self._table_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              table_name: {self._table_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class RefreshByPath(LogicalPlan):
     def __init__(self, path: str) -> None:
@@ -2268,21 +1450,6 @@ class RefreshByPath(LogicalPlan):
         plan.refresh_by_path.path = self._path
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} path='{self._path}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              db_name: {self._path} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class CurrentCatalog(LogicalPlan):
     def __init__(self) -> None:
@@ -2291,20 +1458,6 @@ class CurrentCatalog(LogicalPlan):
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return proto.Relation(current_catalog=proto.CurrentCatalog())
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__}>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b>
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class SetCurrentCatalog(LogicalPlan):
     def __init__(self, catalog_name: str) -> None:
@@ -2316,21 +1469,6 @@ class SetCurrentCatalog(LogicalPlan):
         plan.set_current_catalog.catalog_name = self._catalog_name
         return plan
 
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__} 
catalog_name='{self._catalog_name}'>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b><br />
-              catalog_name: {self._catalog_name} <br />
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
-
 
 class ListCatalogs(LogicalPlan):
     def __init__(self) -> None:
@@ -2338,17 +1476,3 @@ class ListCatalogs(LogicalPlan):
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return proto.Relation(list_catalogs=proto.ListCatalogs())
-
-    def print(self, indent: int = 0) -> str:
-        i = " " * indent
-        return f"{i}" f"<{self.__class__.__name__}>"
-
-    def _repr_html_(self) -> str:
-        return f"""
-        <ul>
-           <li>
-              <b>{self.__class__.__name__}</b>
-            {self._child_repr_()}
-           </li>
-        </ul>
-        """
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan_only.py 
b/python/pyspark/sql/tests/connect/test_connect_plan_only.py
index ee6ad7b453b..db42c00fa6c 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan_only.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan_only.py
@@ -618,6 +618,31 @@ class SparkConnectTestsPlanOnly(PlanOnlyTestFixture):
         self.assertIsNotNone(col_plan)
         self.assertEqual(col_plan.unresolved_regex.col_name, "col_name")
 
+    def test_print(self):
+        # SPARK-41717: test print
+        self.assertEqual(
+            self.connect.sql("SELECT 1")._plan.print().strip(), "<SQL 
query='SELECT 1'>"
+        )
+        self.assertEqual(
+            self.connect.range(1, 10)._plan.print().strip(),
+            "<Range start='1', end='10', step='1', num_partitions='None'>",
+        )
+
+    def test_repr(self):
+        # SPARK-41717: test __repr_html__
+        self.assertIn("query: SELECT 1", self.connect.sql("SELECT 
1")._plan._repr_html_().strip())
+
+        expected = (
+            "<b>Range</b><br/>",
+            "start: 1 <br/>",
+            "end: 10 <br/>",
+            "step: 1 <br/>",
+            "num_partitions: None <br/>",
+        )
+        actual = self.connect.range(1, 10)._plan._repr_html_().strip()
+        for line in expected:
+            self.assertIn(line, actual)
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_connect_plan_only import *  # noqa: 
F401


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to