amaliujia commented on code in PR #38218:
URL: https://github.com/apache/spark/pull/38218#discussion_r995379653
##########
python/pyspark/sql/connect/plan.py:
##########
@@ -143,23 +143,29 @@ def __init__(self, child: Optional["LogicalPlan"],
*columns: "ExpressionOrString
self._verify_expressions()
def _verify_expressions(self) -> None:
- """Ensures that all input arguments are instances of Expression."""
+ """Ensures that all input arguments are instances of Expression or
String."""
for c in self._raw_columns:
- if not isinstance(c, Expression):
- raise InputValidationError(f"Only Expressions can be used for
projections: '{c}'.")
+ if not isinstance(c, (Expression, str)):
+ raise InputValidationError(
+ f"Only Expressions or String can be used for projections:
'{c}'."
+ )
def withAlias(self, alias: str) -> LogicalPlan:
self.alias = alias
return self
def plan(self, session: Optional["RemoteSparkSession"]) -> proto.Relation:
assert self._child is not None
- proj_exprs = [
- c.to_plan(session)
- if isinstance(c, Expression)
- else self.unresolved_attr(*(c.split(".")))
- for c in self._raw_columns
- ]
+ proj_exprs = []
+ for c in self._raw_columns:
+ if isinstance(c, Expression):
+ proj_exprs.append(c.to_plan(session))
+ elif c == "*":
+ exp = proto.Expression()
+ exp.unresolved_star.SetInParent()
+ proj_exprs.append(exp)
+ else:
+ proj_exprs.append(self.unresolved_attr(*(c.split("."))))
Review Comment:
First of all this belongs to the initial implementation and I didn't add it.
In fact your asking reminds me a very tricky problem that I didn't think of:
in the past PySpark does not care because it can use Py4j to reuse Scala
implementation, however now the world has changed....
For this specific problem, it is not about the separator. What we really
need to do is to match the scala implementation for
`CatalystSqlParser.parseMultipartIdentifier(s)`. The identifier could contain
multiple parts.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]