grundprinzip commented on code in PR #38631:
URL: https://github.com/apache/spark/pull/38631#discussion_r1020586736
##########
python/pyspark/sql/connect/column.py:
##########
@@ -82,6 +82,74 @@ def to_plan(self, session: "RemoteSparkSession") ->
"proto.Expression":
def __str__(self) -> str:
...
+ def alias(self, *alias: str, **kwargs: Any) -> "Expression":
+ """
+ Returns this column aliased with a new name or names (in the case of
expressions that
+ return more than one column, such as explode).
+
+ .. versionadded:: 1.3.0
+
+ Parameters
+ ----------
+ alias : str
+ desired column names (collects all positional arguments passed)
+
+ Other Parameters
+ ----------------
+ metadata: dict
+ a dict of information to be stored in ``metadata`` attribute of the
+ corresponding :class:`StructField <pyspark.sql.types.StructField>`
(optional, keyword
+ only argument)
+
+ .. versionchanged:: 2.2.0
+ Added optional ``metadata`` argument.
+
+ Returns
+ -------
+ :class:`Column`
+ Column representing whether each element of Column is aliased with
new name or names.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame(
+ ... [(2, "Alice"), (5, "Bob")], ["age", "name"])
+ >>> df.select(df.age.alias("age2")).collect()
+ [Row(age2=2), Row(age2=5)]
+ >>> df.select(df.age.alias("age3", metadata={'max':
99})).schema['age3'].metadata['max']
+ 99
+ """
+ metadata = kwargs.pop("metadata", None)
+ assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
+ return ColumnAlias(self, list(alias), metadata)
+
+
+class ColumnAlias(Expression):
+ def __init__(self, parent: Expression, alias: list[str], metadata: Any):
+
+ self._alias = alias
+ self._metadata = metadata
+ self._parent = parent
+
+ def to_plan(self, session: "RemoteSparkSession") -> "proto.Expression":
+ if len(self._alias) == 1:
+ if self._metadata:
+ raise ValueError("Creating aliases with metadata is not
supported.")
+ else:
+ exp = proto.Expression()
+ exp.alias.name.append(self._alias[0])
+ exp.alias.expr.CopyFrom(self._parent.to_plan(session))
+ return exp
+ else:
+ if self._metadata:
+ raise ValueError("metadata can only be provided for a single
column")
Review Comment:
I just pushed the implementation of metadata as well. However, now the
schema does not return the metadata, but at least the input is consistent with
Spark.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]