amaliujia commented on code in PR #38218:
URL: https://github.com/apache/spark/pull/38218#discussion_r996238628


##########
python/pyspark/sql/connect/plan.py:
##########
@@ -143,23 +143,29 @@ def __init__(self, child: Optional["LogicalPlan"], 
*columns: "ExpressionOrString
         self._verify_expressions()
 
     def _verify_expressions(self) -> None:
-        """Ensures that all input arguments are instances of Expression."""
+        """Ensures that all input arguments are instances of Expression or 
String."""
         for c in self._raw_columns:
-            if not isinstance(c, Expression):
-                raise InputValidationError(f"Only Expressions can be used for 
projections: '{c}'.")
+            if not isinstance(c, (Expression, str)):
+                raise InputValidationError(
+                    f"Only Expressions or String can be used for projections: 
'{c}'."
+                )
 
     def withAlias(self, alias: str) -> LogicalPlan:
         self.alias = alias
         return self
 
     def plan(self, session: Optional["RemoteSparkSession"]) -> proto.Relation:
         assert self._child is not None
-        proj_exprs = [
-            c.to_plan(session)
-            if isinstance(c, Expression)
-            else self.unresolved_attr(*(c.split(".")))
-            for c in self._raw_columns
-        ]
+        proj_exprs = []
+        for c in self._raw_columns:
+            if isinstance(c, Expression):
+                proj_exprs.append(c.to_plan(session))
+            elif c == "*":
+                exp = proto.Expression()
+                exp.unresolved_star.SetInParent()
+                proj_exprs.append(exp)
+            else:
+                proj_exprs.append(self.unresolved_attr(*(c.split("."))))

Review Comment:
   After some thoughts, I think there is another way which is to not ask 
clients to parse the identifier so clients only need to provide the relation 
name. I put my idea into this draft PR: 
https://github.com/apache/spark/pull/38264



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to