zero323 commented on a change in pull request #34136:
URL: https://github.com/apache/spark/pull/34136#discussion_r718937128
##########
File path: python/pyspark/sql/session.py
##########
@@ -566,7 +629,70 @@ def _create_shell_session():
return SparkSession.builder.getOrCreate()
- def createDataFrame(self, data, schema=None, samplingRatio=None,
verifySchema=True):
+ @overload
+ def createDataFrame(
+ self,
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
+ samplingRatio: Optional[float] = ...,
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self,
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
+ schema: Union[List[str], Tuple[str, ...]] = ...,
+ verifySchema: bool = ...,
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self,
+ data: Union[
+ "RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]]",
+ Iterable[Union["DateTimeLiteral", "LiteralType",
"DecimalLiteral"]],
+ ],
+ schema: Union[AtomicType, str],
+ verifySchema: bool = ...,
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self,
+ data: Union["RDD[RowLike]", Iterable["RowLike"]],
+ schema: Union[StructType, str],
+ verifySchema: bool = ...,
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ...
+ ) -> DataFrame:
+ ...
+
+ @overload
+ def createDataFrame(
+ self,
+ data: "PandasDataFrameLike",
+ schema: Union[StructType, str],
+ verifySchema: bool = ...,
+ ) -> DataFrame:
+ ...
+
+ def createDataFrame( # type: ignore[misc]
+ self,
+ data: Union[
+ "RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral,
RowLike]]",
+ Iterable[Union["DateTimeLiteral", "LiteralType", "DecimalLiteral",
"RowLike"]],
+ "PandasDataFrameLike",
+ ],
+ schema: Optional[Union[AtomicType, StructType, str]] = None,
+ samplingRatio: Optional[float] = None,
+ verifySchema: bool = True
+ ) -> DataFrame:
Review comment:
Would you mind explaining what is the intention here? Adding `RowLike`
to supported type params and `StructType` to supported schemas seems to miss
the point of having this annotation (I assume ignore is due to overlap with
previous annotations).
In general this one
https://github.com/apache/spark/blob/aa9064ad96ff7cefaa4381e912608b0b0d39a09c/python/pyspark/sql/session.pyi#L89-L97
was added to support invocations like:
```python
spark.createDataFrame([1], IntegerType())
```
but reject
```python
spark.createDataFrame([(1, 2)], IntegerType())
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]