This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6cf42fe4e04 [SPARK-41717][CONNECT] Deduplicate print and _repr_html_
at LogicalPlan
6cf42fe4e04 is described below
commit 6cf42fe4e0488d3ea2d1de1a693ac05d1ce69bbb
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Dec 27 10:11:37 2022 +0900
[SPARK-41717][CONNECT] Deduplicate print and _repr_html_ at LogicalPlan
### What changes were proposed in this pull request?
This PR deduplicates `print` and `_repr_html_` logic by having a comment
logic to handle them by looking up the signature and type hints (similar with
what we're doing at Spark SQL's query string, e.g., `TreeNode.stringArgs`).
### Why are the changes needed?
To make it easier to maintain.
### Does this PR introduce _any_ user-facing change?
Virtually no.
### How was this patch tested?
I tested almost all cases, and complex cases too manually. In addition, I
added a unittest too.
Closes #39223 from HyukjinKwon/SPARK-41717.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/plan.py | 1002 ++------------------
.../sql/tests/connect/test_connect_plan_only.py | 25 +
2 files changed, 88 insertions(+), 939 deletions(-)
diff --git a/python/pyspark/sql/connect/plan.py
b/python/pyspark/sql/connect/plan.py
index 468d028cee2..b39e5b5058b 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -18,6 +18,7 @@
from typing import Any, List, Optional, Sequence, Union, cast, TYPE_CHECKING,
Mapping, Dict
import functools
import pyarrow as pa
+from inspect import signature, isclass
from pyspark.sql.types import DataType
@@ -95,14 +96,68 @@ class LogicalPlan(object):
return plan
- # TODO(SPARK-41717): Implement the command logic for print and _repr_html_
+ def _parameters_to_print(self, parameters: Mapping[str, Any]) ->
Mapping[str, Any]:
+ params = {}
+ for name, tpe in parameters.items():
+ is_logical_plan = isclass(tpe.annotation) and
isinstance(tpe.annotation, LogicalPlan)
+ is_forwardref_logical_plan = getattr(tpe.annotation,
"__forward_arg__", "").endswith(
+ "LogicalPlan"
+ )
+ is_nested_logical_plan = any(
+ isclass(a) and issubclass(a, LogicalPlan)
+ for a in getattr(tpe.annotation, "__args__", ())
+ )
+ is_nested_forwardref_logical_plan = any(
+ getattr(a, "__forward_arg__", "").endswith("LogicalPlan")
+ for a in getattr(tpe.annotation, "__args__", ())
+ )
+ if (
+ not is_logical_plan
+ and not is_forwardref_logical_plan
+ and not is_nested_logical_plan
+ and not is_nested_forwardref_logical_plan
+ ):
+ # Searches self.name or self._name
+ try:
+ params[name] = getattr(self, name)
+ except AttributeError:
+ try:
+ params[name] = getattr(self, "_" + name)
+ except AttributeError:
+ pass # Simpy ignore
+ return params
+
def print(self, indent: int = 0) -> str:
- ...
+ params =
self._parameters_to_print(signature(self.__class__.__init__).parameters)
+ pretty_params = [f"{name}='{param}'" for name, param in params.items()]
+ if len(pretty_params) == 0:
+ pretty_str = ""
+ else:
+ pretty_str = " " + ", ".join(pretty_params)
+ return f"{' ' *
indent}<{self.__class__.__name__}{pretty_str}>\n{self._child_print(indent)}"
def _repr_html_(self) -> str:
- ...
+ params =
self._parameters_to_print(signature(self.__class__.__init__).parameters)
+ pretty_params = [
+ f"\n {name}: " f"{param} <br/>" for name, param in
params.items()
+ ]
+ if len(pretty_params) == 0:
+ pretty_str = ""
+ else:
+ pretty_str = "".join(pretty_params)
+ return f"""
+ <ul>
+ <li>
+ <b>{self.__class__.__name__}</b><br/>{pretty_str}
+ {self._child_repr()}
+ </li>
+ </ul>
+ """
- def _child_repr_(self) -> str:
+ def _child_print(self, indent: int) -> str:
+ return self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
+
+ def _child_repr(self) -> str:
return self._child._repr_html_() if self._child is not None else ""
@@ -133,18 +188,6 @@ class DataSource(LogicalPlan):
plan.read.data_source.options[k] = v
return plan
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>DataSource</b><br />
- format: {self.format}
- schema: {self.schema}
- options: {self.options}
- </li>
- </ul>
- """
-
class Read(LogicalPlan):
def __init__(self, table_name: str) -> None:
@@ -159,16 +202,6 @@ class Read(LogicalPlan):
def print(self, indent: int = 0) -> str:
return f"{' ' * indent}<Read table_name={self.table_name}>\n"
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Read</b><br />
- table name: {self.table_name}
- </li>
- </ul>
- """
-
class LocalRelation(LogicalPlan):
"""Creates a LocalRelation plan object based on a PyArrow Table."""
@@ -207,7 +240,7 @@ class LocalRelation(LogicalPlan):
def _repr_html_(self) -> str:
return """
<ul>
- <li>LocalRelation</li>
+ <li><b>LocalRelation</b></li>
</ul>
"""
@@ -230,27 +263,6 @@ class ShowString(LogicalPlan):
plan.show_string.vertical = self.vertical
return plan
- def print(self, indent: int = 0) -> str:
- return (
- f"{' ' * indent}"
- f"<ShowString numRows='{self.num_rows}', "
- f"truncate='{self.truncate}', "
- f"vertical='{self.vertical}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>ShowString</b><br />
- NumRows: {self.num_rows} <br />
- Truncate: {self.truncate} <br />
- Vertical: {self.vertical} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class Project(LogicalPlan):
"""Logical plan object for a projection.
@@ -265,13 +277,13 @@ class Project(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], *columns:
"ColumnOrName") -> None:
super().__init__(child)
- self._raw_columns = list(columns)
+ self._columns = list(columns)
self.alias: Optional[str] = None
self._verify_expressions()
def _verify_expressions(self) -> None:
"""Ensures that all input arguments are instances of Expression or
String."""
- for c in self._raw_columns:
+ for c in self._columns:
if not isinstance(c, (Column, str)):
raise InputValidationError(
f"Only Column or String can be used for projections:
'{c}'."
@@ -280,7 +292,7 @@ class Project(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
assert self._child is not None
proj_exprs = []
- for c in self._raw_columns:
+ for c in self._columns:
if isinstance(c, Column):
proj_exprs.append(c.to_plan(session))
elif c == "*":
@@ -295,21 +307,6 @@ class Project(LogicalPlan):
plan.project.expressions.extend(proj_exprs)
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Project cols={self._raw_columns}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Project</b><br />
- Columns: {",".join([str(c) for c in self._raw_columns])}
- {self._child._repr_html_() if self._child is not None else ""}
- </li>
- </uL>
- """
-
class WithColumns(LogicalPlan):
"""Logical plan object for a withColumns operation."""
@@ -329,21 +326,6 @@ class WithColumns(LogicalPlan):
plan.with_columns.name_expr_list.append(name_expr)
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<WithColumns cols={self._cols_map}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>WithColumns</b><br />
- Column Map: {self._cols_map}
- {self._child._repr_html_() if self._child is not None else ""}
- </li>
- </uL>
- """
-
class Hint(LogicalPlan):
"""Logical plan object for a Hint operation."""
@@ -369,21 +351,6 @@ class Hint(LogicalPlan):
plan.hint.parameters.append(LiteralExpression._from_value(v).to_plan(session).literal)
return plan
- def print(self, indent: int = 0) -> str:
- return f"""{" " * indent}<Hint name='{self.name}',
parameters='{self.params}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Hint</b><br />
- name: {self.name} <br />
- parameters: {self.params} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class Filter(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], filter: Column) -> None:
@@ -397,21 +364,6 @@ class Filter(LogicalPlan):
plan.filter.condition.CopyFrom(self.filter.to_plan(session))
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Filter filter={self.filter}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Filter</b><br />
- Condition: {self.filter}
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Limit(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
@@ -425,21 +377,6 @@ class Limit(LogicalPlan):
plan.limit.limit = self.limit
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Limit limit={self.limit}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Limit</b><br />
- Limit: {self.limit} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Tail(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
@@ -453,21 +390,6 @@ class Tail(LogicalPlan):
plan.tail.limit = self.limit
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Tail limit={self.limit}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Tail</b><br />
- Limit: {self.limit} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Offset(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], offset: int = 0) ->
None:
@@ -481,21 +403,6 @@ class Offset(LogicalPlan):
plan.offset.offset = self.offset
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Offset={self.offset}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Limit</b><br />
- Offset: {self.offset} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Deduplicate(LogicalPlan):
def __init__(
@@ -517,25 +424,6 @@ class Deduplicate(LogicalPlan):
plan.deduplicate.column_names.extend(self.column_names)
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return (
- f"{' ' * indent}<all_columns_as_keys={self.all_columns_as_keys} "
- f"column_names={self.column_names}>\n{c_buf}"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b></b>Deduplicate<br />
- all_columns_as_keys: {self.all_columns_as_keys} <br />
- column_names: {self.column_names} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Sort(LogicalPlan):
def __init__(
@@ -571,22 +459,6 @@ class Sort(LogicalPlan):
plan.sort.is_global = self.is_global
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Sort columns={self.columns},
global={self.is_global}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Sort</b><br />
- {", ".join([str(c) for c in self.columns])}
- global: {self.is_global} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Drop(LogicalPlan):
def __init__(
@@ -615,21 +487,6 @@ class Drop(LogicalPlan):
plan.drop.cols.extend([self._convert_to_expr(c, session) for c in
self.columns])
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<Drop columns={self.columns}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Drop</b><br />
- columns: {self.columns} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Sample(LogicalPlan):
def __init__(
@@ -660,29 +517,6 @@ class Sample(LogicalPlan):
plan.sample.force_stable_sort = self.force_stable_sort
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return (
- f"{' ' * indent}"
- f"<Sample lowerBound={self.lower_bound},
upperBound={self.upper_bound}, "
- f"withReplacement={self.with_replacement}, seed={self.seed}>"
- f"\n{c_buf}"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Sample</b><br />
- LowerBound: {self.lower_bound} <br />
- UpperBound: {self.upper_bound} <br />
- WithReplacement: {self.with_replacement} <br />
- Seed: {self.seed} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Aggregate(LogicalPlan):
def __init__(
@@ -748,23 +582,6 @@ class Aggregate(LogicalPlan):
return agg
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return (
- f"{' ' * indent}<Groupby={self._grouping_cols}"
- f"Aggregate={self._aggregate_cols}>\n{c_buf}"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Aggregation</b><br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class Join(LogicalPlan):
def __init__(
@@ -930,23 +747,6 @@ class Repartition(LogicalPlan):
rel.repartition.num_partitions = self._num_partitions
return rel
- def print(self, indent: int = 0) -> str:
- plan_name = "repartition" if self._shuffle else "coalesce"
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<{plan_name}
num_partitions={self._num_partitions}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- plan_name = "repartition" if self._shuffle else "coalesce"
- return f"""
- <ul>
- <li>
- <b>{plan_name}</b><br />
- Child: {self._child_repr_()}
- num_partitions: {self._num_partitions}
- </li>
- </ul>
- """
-
class SubqueryAlias(LogicalPlan):
"""Alias for a relation."""
@@ -962,21 +762,6 @@ class SubqueryAlias(LogicalPlan):
rel.subquery_alias.alias = self._alias
return rel
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return f"{' ' * indent}<SubqueryAlias alias={self._alias}>\n{c_buf}"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>SubqueryAlias</b><br />
- Child: {self._child_repr_()}
- Alias: {self._alias}
- </li>
- </ul>
- """
-
class SQL(LogicalPlan):
def __init__(self, query: str) -> None:
@@ -988,21 +773,6 @@ class SQL(LogicalPlan):
rel.sql.query = self._query
return rel
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- sub_query = self._query.replace("\n", "")[:50]
- return f"""{i}<SQL query='{sub_query}...'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>SQL</b><br />
- Statement: <pre>{self._query}</pre>
- </li>
- </ul>
- """
-
class Range(LogicalPlan):
def __init__(
@@ -1027,27 +797,6 @@ class Range(LogicalPlan):
rel.range.num_partitions = self._num_partitions
return rel
- def print(self, indent: int = 0) -> str:
- return (
- f"{' ' * indent}"
- f"<Range start={self._start}, end={self._end}, "
- f"step={self._step}, num_partitions={self._num_partitions}>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Range</b><br />
- Start: {self._start} <br />
- End: {self._end} <br />
- Step: {self._step} <br />
- NumPartitions: {self._num_partitions} <br />
- {self._child_repr_()}
- </li>
- </uL>
- """
-
class ToSchema(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], schema: DataType) ->
None:
@@ -1062,21 +811,6 @@ class ToSchema(LogicalPlan):
plan.to_schema.schema.CopyFrom(pyspark_types_to_proto_types(self._schema))
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<ToSchema schema='{self._schema}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>ToSchema</b><br />
- schema: {self._schema} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class RenameColumnsNameByName(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], colsMap: Mapping[str,
str]) -> None:
@@ -1092,21 +826,6 @@ class RenameColumnsNameByName(LogicalPlan):
plan.rename_columns_by_name_to_name_map.rename_columns_map[k] = v
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<RenameColumnsNameByName ColsMap='{self._colsMap}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>RenameColumns</b><br />
- ColsMap: {self._colsMap} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class Unpivot(LogicalPlan):
"""Logical plan object for a unpivot operation."""
@@ -1142,30 +861,6 @@ class Unpivot(LogicalPlan):
plan.unpivot.value_column_name = self.value_column_name
return plan
- def print(self, indent: int = 0) -> str:
- c_buf = self._child.print(indent + LogicalPlan.INDENT) if self._child
else ""
- return (
- f"{' ' * indent}"
- f"<Unpivot ids={self.ids}, values={self.values}, "
- f"variable_column_name={self.variable_column_name}, "
- f"value_column_name={self.value_column_name}>"
- f"\n{c_buf}"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Unpivot</b><br />
- ids: {self.ids}
- values: {self.values}
- variable_column_name: {self.variable_column_name}
- value_column_name: {self.value_column_name}
- {self._child._repr_html_() if self._child is not None else ""}
- </li>
- </uL>
- """
-
class NAFill(LogicalPlan):
def __init__(
@@ -1208,21 +903,6 @@ class NAFill(LogicalPlan):
plan.fill_na.values.extend([self._convert_value(v) for v in
self.values])
return plan
- def print(self, indent: int = 0) -> str:
- return f"""{" " * indent}<NAFill cols='{self.cols}',
values='{self.values}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>NAFill</b><br />
- Cols: {self.cols} <br />
- Values: {self.values} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class NADrop(LogicalPlan):
def __init__(
@@ -1246,22 +926,6 @@ class NADrop(LogicalPlan):
plan.drop_na.min_non_nulls = self.min_non_nulls
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<NADrop cols='{self.cols}' "
f"min_non_nulls='{self.min_non_nulls}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>NADrop</b><br />
- Cols: {self.cols} <br />
- Min_non_nulls: {self.min_non_nulls} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class NAReplace(LogicalPlan):
def __init__(
@@ -1310,22 +974,6 @@ class NAReplace(LogicalPlan):
plan.replace.replacements.append(replacement)
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<NAReplace cols='{self.cols}' "
f"replacements='{self.replacements}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>NADrop</b><br />
- Cols: {self.cols} <br />
- Replacements: {self.replacements} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class StatSummary(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], statistics: List[str])
-> None:
@@ -1339,21 +987,6 @@ class StatSummary(LogicalPlan):
plan.summary.statistics.extend(self.statistics)
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<Summary statistics='{self.statistics}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Summary</b><br />
- Statistics: {self.statistics} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class StatDescribe(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], cols: List[str]) ->
None:
@@ -1367,21 +1000,6 @@ class StatDescribe(LogicalPlan):
plan.describe.cols.extend(self.cols)
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<Describe cols='{self.cols}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Describe</b><br />
- Cols: {self.cols} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class StatCrosstab(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], col1: str, col2: str)
-> None:
@@ -1398,22 +1016,6 @@ class StatCrosstab(LogicalPlan):
plan.crosstab.col2 = self.col2
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<Crosstab col1='{self.col1}' col2='{self.col2}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>Crosstab</b><br />
- Col1: {self.col1} <br />
- Col2: {self.col2} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class RenameColumns(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], cols: Sequence[str]) ->
None:
@@ -1428,21 +1030,6 @@ class RenameColumns(LogicalPlan):
plan.rename_columns_by_same_length_names.column_names.extend(self._cols)
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"""{i}<RenameColumns cols='{self._cols}'>"""
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>RenameColumns</b><br />
- cols: {self._cols} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class CreateView(LogicalPlan):
def __init__(
@@ -1463,28 +1050,6 @@ class CreateView(LogicalPlan):
plan.create_dataframe_view.input.CopyFrom(self._child.plan(session))
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<CreateView name='{self._name}' "
- f"is_global='{self._is_gloal} "
- f"replace='{self._replace}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>CreateView</b><br />
- name: {self._name} <br />
- is_global: {self._is_gloal} <br />
- replace: {self._replace} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class WriteOperation(LogicalPlan):
def __init__(self, child: "LogicalPlan") -> None:
@@ -1579,20 +1144,6 @@ class CurrentDatabase(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
return proto.Relation(current_database=proto.CurrentDatabase())
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b>
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class SetCurrentDatabase(LogicalPlan):
def __init__(self, db_name: str) -> None:
@@ -1604,21 +1155,6 @@ class SetCurrentDatabase(LogicalPlan):
plan.set_current_database.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class ListDatabases(LogicalPlan):
def __init__(self) -> None:
@@ -1627,20 +1163,6 @@ class ListDatabases(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
return proto.Relation(list_databases=proto.ListDatabases())
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b>
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class ListTables(LogicalPlan):
def __init__(self, db_name: Optional[str] = None) -> None:
@@ -1655,21 +1177,6 @@ class ListTables(LogicalPlan):
plan = proto.Relation(list_tables=proto.ListTables())
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class ListFunctions(LogicalPlan):
def __init__(self, db_name: Optional[str] = None) -> None:
@@ -1684,21 +1191,6 @@ class ListFunctions(LogicalPlan):
plan = proto.Relation(list_functions=proto.ListFunctions())
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class ListColumns(LogicalPlan):
def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1713,27 +1205,6 @@ class ListColumns(LogicalPlan):
plan.list_columns.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"table_name='{self._table_name}' "
- f"db_name='{self._db_name}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class GetDatabase(LogicalPlan):
def __init__(self, db_name: str) -> None:
@@ -1745,21 +1216,6 @@ class GetDatabase(LogicalPlan):
plan.get_database.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class GetTable(LogicalPlan):
def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1774,27 +1230,6 @@ class GetTable(LogicalPlan):
plan.get_table.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"table_name='{self._table_name}' "
- f"db_name='{self._db_name}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class GetFunction(LogicalPlan):
def __init__(self, function_name: str, db_name: Optional[str] = None) ->
None:
@@ -1809,27 +1244,6 @@ class GetFunction(LogicalPlan):
plan.get_function.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"function_name='{self._function_name}' "
- f"db_name='{self._db_name}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- function_name: {self._function_name} <br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class DatabaseExists(LogicalPlan):
def __init__(self, db_name: str) -> None:
@@ -1841,21 +1255,6 @@ class DatabaseExists(LogicalPlan):
plan.database_exists.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} db_name='{self._db_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class TableExists(LogicalPlan):
def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
@@ -1870,27 +1269,6 @@ class TableExists(LogicalPlan):
plan.table_exists.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"table_name='{self._table_name}' "
- f"db_name='{self._db_name}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class FunctionExists(LogicalPlan):
def __init__(self, function_name: str, db_name: Optional[str] = None) ->
None:
@@ -1905,27 +1283,6 @@ class FunctionExists(LogicalPlan):
plan.function_exists.db_name = self._db_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"function_name='{self._function_name}' "
- f"db_name='{self._db_name}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- function_name: {self._function_name} <br />
- db_name: {self._db_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class CreateExternalTable(LogicalPlan):
def __init__(
@@ -1958,33 +1315,6 @@ class CreateExternalTable(LogicalPlan):
plan.create_external_table.options[k] = v
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"table_name='{self._table_name}' "
- f"path='{self._path}' "
- f"source='{self._source}' "
- f"schema='{self._schema}' "
- f"options='{self._options}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- path: {self._path} <br />
- source: {self._source} <br />
- schema: {self._schema} <br />
- options: {self._options} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class CreateTable(LogicalPlan):
def __init__(
@@ -2021,35 +1351,6 @@ class CreateTable(LogicalPlan):
plan.create_table.options[k] = v
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return (
- f"{i}"
- f"<{self.__class__.__name__}"
- f"table_name='{self._table_name}' "
- f"path='{self._path}' "
- f"source='{self._source}' "
- f"description='{self._description}' "
- f"schema='{self._schema}' "
- f"options='{self._options}'>"
- )
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- path: {self._path} <br />
- source: {self._source} <br />
- description: {self._description} <br />
- schema: {self._schema} <br />
- options: {self._options} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class DropTempView(LogicalPlan):
def __init__(self, view_name: str) -> None:
@@ -2061,21 +1362,6 @@ class DropTempView(LogicalPlan):
plan.drop_temp_view.view_name = self._view_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}
view_name='{self._view_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- view_name: {self._view_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class DropGlobalTempView(LogicalPlan):
def __init__(self, view_name: str) -> None:
@@ -2087,21 +1373,6 @@ class DropGlobalTempView(LogicalPlan):
plan.drop_global_temp_view.view_name = self._view_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}
view_name='{self._view_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- view_name: {self._view_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class RecoverPartitions(LogicalPlan):
def __init__(self, table_name: str) -> None:
@@ -2113,21 +1384,6 @@ class RecoverPartitions(LogicalPlan):
plan.recover_partitions.table_name = self._table_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}
table_name='{self._table_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
# TODO(SPARK-41612): Support Catalog.isCached
# class IsCached(LogicalPlan):
@@ -2140,21 +1396,6 @@ class RecoverPartitions(LogicalPlan):
# plan.is_cached.table_name = self._table_name
# return plan
#
-# def print(self, indent: int = 0) -> str:
-# i = " " * indent
-# return f"{i}" f"<{self.__class__.__name__}
table_name='{self._table_name}'>"
-#
-# def _repr_html_(self) -> str:
-# return f"""
-# <ul>
-# <li>
-# <b>{self.__class__.__name__}</b><br />
-# table_name: {self._table_name} <br />
-# {self._child_repr_()}
-# </li>
-# </ul>
-# """
-#
#
# TODO(SPARK-41600): Support Catalog.cacheTable
# class CacheTable(LogicalPlan):
@@ -2167,21 +1408,6 @@ class RecoverPartitions(LogicalPlan):
# plan.cache_table.table_name = self._table_name
# return plan
#
-# def print(self, indent: int = 0) -> str:
-# i = " " * indent
-# return f"{i}" f"<{self.__class__.__name__}
table_name='{self._table_name}'>"
-#
-# def _repr_html_(self) -> str:
-# return f"""
-# <ul>
-# <li>
-# <b>{self.__class__.__name__}</b><br />
-# table_name: {self._table_name} <br />
-# {self._child_repr_()}
-# </li>
-# </ul>
-# """
-#
#
# TODO(SPARK-41623): Support Catalog.uncacheTable
# class UncacheTable(LogicalPlan):
@@ -2193,21 +1419,6 @@ class RecoverPartitions(LogicalPlan):
# plan = proto.Relation()
# plan.uncache_table.table_name = self._table_name
# return plan
-#
-# def print(self, indent: int = 0) -> str:
-# i = " " * indent
-# return f"{i}" f"<{self.__class__.__name__}
table_name='{self._table_name}'>"
-#
-# def _repr_html_(self) -> str:
-# return f"""
-# <ul>
-# <li>
-# <b>{self.__class__.__name__}</b><br />
-# table_name: {self._table_name} <br />
-# {self._child_repr_()}
-# </li>
-# </ul>
-# """
class ClearCache(LogicalPlan):
@@ -2217,20 +1428,6 @@ class ClearCache(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
return proto.Relation(clear_cache=proto.ClearCache())
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b>
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class RefreshTable(LogicalPlan):
def __init__(self, table_name: str) -> None:
@@ -2242,21 +1439,6 @@ class RefreshTable(LogicalPlan):
plan.refresh_table.table_name = self._table_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}
table_name='{self._table_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- table_name: {self._table_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class RefreshByPath(LogicalPlan):
def __init__(self, path: str) -> None:
@@ -2268,21 +1450,6 @@ class RefreshByPath(LogicalPlan):
plan.refresh_by_path.path = self._path
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__} path='{self._path}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- db_name: {self._path} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class CurrentCatalog(LogicalPlan):
def __init__(self) -> None:
@@ -2291,20 +1458,6 @@ class CurrentCatalog(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
return proto.Relation(current_catalog=proto.CurrentCatalog())
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b>
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class SetCurrentCatalog(LogicalPlan):
def __init__(self, catalog_name: str) -> None:
@@ -2316,21 +1469,6 @@ class SetCurrentCatalog(LogicalPlan):
plan.set_current_catalog.catalog_name = self._catalog_name
return plan
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}
catalog_name='{self._catalog_name}'>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b><br />
- catalog_name: {self._catalog_name} <br />
- {self._child_repr_()}
- </li>
- </ul>
- """
-
class ListCatalogs(LogicalPlan):
def __init__(self) -> None:
@@ -2338,17 +1476,3 @@ class ListCatalogs(LogicalPlan):
def plan(self, session: "SparkConnectClient") -> proto.Relation:
return proto.Relation(list_catalogs=proto.ListCatalogs())
-
- def print(self, indent: int = 0) -> str:
- i = " " * indent
- return f"{i}" f"<{self.__class__.__name__}>"
-
- def _repr_html_(self) -> str:
- return f"""
- <ul>
- <li>
- <b>{self.__class__.__name__}</b>
- {self._child_repr_()}
- </li>
- </ul>
- """
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan_only.py
b/python/pyspark/sql/tests/connect/test_connect_plan_only.py
index ee6ad7b453b..db42c00fa6c 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan_only.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan_only.py
@@ -618,6 +618,31 @@ class SparkConnectTestsPlanOnly(PlanOnlyTestFixture):
self.assertIsNotNone(col_plan)
self.assertEqual(col_plan.unresolved_regex.col_name, "col_name")
+ def test_print(self):
+ # SPARK-41717: test print
+ self.assertEqual(
+ self.connect.sql("SELECT 1")._plan.print().strip(), "<SQL
query='SELECT 1'>"
+ )
+ self.assertEqual(
+ self.connect.range(1, 10)._plan.print().strip(),
+ "<Range start='1', end='10', step='1', num_partitions='None'>",
+ )
+
+ def test_repr(self):
+ # SPARK-41717: test __repr_html__
+ self.assertIn("query: SELECT 1", self.connect.sql("SELECT
1")._plan._repr_html_().strip())
+
+ expected = (
+ "<b>Range</b><br/>",
+ "start: 1 <br/>",
+ "end: 10 <br/>",
+ "step: 1 <br/>",
+ "num_partitions: None <br/>",
+ )
+ actual = self.connect.range(1, 10)._plan._repr_html_().strip()
+ for line in expected:
+ self.assertIn(line, actual)
+
if __name__ == "__main__":
from pyspark.sql.tests.connect.test_connect_plan_only import * # noqa:
F401
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]