[spark] branch master updated: [MINOR][PYTHON] Remove unnecessary quotes in pyspark

gurwls223 Sun, 27 Feb 2022 16:27:33 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new cfd66cf  [MINOR][PYTHON] Remove unnecessary quotes in pyspark
cfd66cf is described below

commit cfd66cfc1f955e218b3f53fd56ca423578f39638
Author: dch nguyen <[email protected]>
AuthorDate: Mon Feb 28 09:24:59 2022 +0900

    [MINOR][PYTHON] Remove unnecessary quotes in pyspark
    
    ### What changes were proposed in this pull request?
    Remove unnecessary quotes in pyspark
    
    ### Why are the changes needed?
     To make the code clean
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Existing tests
    
    Closes #35664 from dchvn/remove_unused_quote.
    
    Authored-by: dch nguyen <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/context.py                  | 32 ++++++++++++++----------------
 python/pyspark/mllib/stat/KernelDensity.py |  2 +-
 python/pyspark/sql/context.py              |  2 +-
 python/pyspark/sql/dataframe.py            |  2 +-
 python/pyspark/sql/readwriter.py           |  2 +-
 python/pyspark/sql/session.py              | 12 +++++------
 6 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2f1746b..3beebb0 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -575,7 +575,7 @@ class SparkContext:
         with SparkContext._lock:
             SparkContext._active_spark_context = None  # type: 
ignore[assignment]
 
-    def emptyRDD(self) -> "RDD[Any]":
+    def emptyRDD(self) -> RDD[Any]:
         """
         Create an RDD that has no partitions or elements.
         """
@@ -583,7 +583,7 @@ class SparkContext:
 
     def range(
         self, start: int, end: Optional[int] = None, step: int = 1, numSlices: 
Optional[int] = None
-    ) -> "RDD[int]":
+    ) -> RDD[int]:
         """
         Create a new RDD of int containing elements from `start` to `end`
         (exclusive), increased by `step` every element. Can be called the same
@@ -621,7 +621,7 @@ class SparkContext:
 
         return self.parallelize(range(start, end, step), numSlices)
 
-    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) -> 
"RDD[T]":
+    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) -> 
RDD[T]:
         """
         Distribute a local Python collection to form an RDD. Using range
         is recommended if the input represents a range for performance.
@@ -725,7 +725,7 @@ class SparkContext:
                 # we eagerly reads the file so we can delete right after.
                 os.unlink(tempFile.name)
 
-    def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> 
"RDD[Any]":
+    def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> 
RDD[Any]:
         """
         Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
 
@@ -742,7 +742,7 @@ class SparkContext:
 
     def textFile(
         self, name: str, minPartitions: Optional[int] = None, use_unicode: 
bool = True
-    ) -> "RDD[str]":
+    ) -> RDD[str]:
         """
         Read a text file from HDFS, a local file system (available on all
         nodes), or any Hadoop-supported file system URI, and return it as an
@@ -767,7 +767,7 @@ class SparkContext:
 
     def wholeTextFiles(
         self, path: str, minPartitions: Optional[int] = None, use_unicode: 
bool = True
-    ) -> "RDD[Tuple[str, str]]":
+    ) -> RDD[Tuple[str, str]]:
         """
         Read a directory of text files from HDFS, a local file system
         (available on all nodes), or any  Hadoop-supported file system
@@ -822,9 +822,7 @@ class SparkContext:
             PairDeserializer(UTF8Deserializer(use_unicode), 
UTF8Deserializer(use_unicode)),
         )
 
-    def binaryFiles(
-        self, path: str, minPartitions: Optional[int] = None
-    ) -> "RDD[Tuple[str, bytes]]":
+    def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> 
RDD[Tuple[str, bytes]]:
         """
         Read a directory of binary files from HDFS, a local file system
         (available on all nodes), or any Hadoop-supported file system URI
@@ -843,7 +841,7 @@ class SparkContext:
             PairDeserializer(UTF8Deserializer(), NoOpSerializer()),
         )
 
-    def binaryRecords(self, path: str, recordLength: int) -> "RDD[bytes]":
+    def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]:
         """
         Load data from a flat binary file, assuming each record is a set of 
numbers
         with the specified numerical format (see ByteBuffer), and the number of
@@ -876,7 +874,7 @@ class SparkContext:
         valueConverter: Optional[str] = None,
         minSplits: Optional[int] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a Hadoop SequenceFile with arbitrary key and value Writable class 
from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported 
file system URI.
@@ -931,7 +929,7 @@ class SparkContext:
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class 
from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported 
file system URI.
@@ -990,7 +988,7 @@ class SparkContext:
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value 
class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -1043,7 +1041,7 @@ class SparkContext:
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class 
from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported 
file system URI.
@@ -1098,7 +1096,7 @@ class SparkContext:
         valueConverter: Optional[str] = None,
         conf: Optional[Dict[str, str]] = None,
         batchSize: int = 0,
-    ) -> "RDD[Tuple[T, U]]":
+    ) -> RDD[Tuple[T, U]]:
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, 
from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -1145,7 +1143,7 @@ class SparkContext:
         jrdd = self._jsc.checkpointFile(name)
         return RDD(jrdd, self, input_deserializer)
 
-    def union(self, rdds: List["RDD[T]"]) -> "RDD[T]":
+    def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         """
         Build the union of a list of RDDs.
 
@@ -1464,7 +1462,7 @@ class SparkContext:
 
     def runJob(
         self,
-        rdd: "RDD[T]",
+        rdd: RDD[T],
         partitionFunc: Callable[[Iterable[T]], Iterable[U]],
         partitions: Optional[Sequence[int]] = None,
         allowLocal: bool = False,
diff --git a/python/pyspark/mllib/stat/KernelDensity.py 
b/python/pyspark/mllib/stat/KernelDensity.py
index 103c955..febf4fd 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -46,7 +46,7 @@ class KernelDensity:
         """Set bandwidth of each sample. Defaults to 1.0"""
         self._bandwidth = bandwidth
 
-    def setSample(self, sample: "RDD[float]") -> None:
+    def setSample(self, sample: RDD[float]) -> None:
         """Set sample points from the population. Should be a RDD"""
         if not isinstance(sample, RDD):
             raise TypeError("samples should be a RDD, received %s" % 
type(sample))
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index c6eb6c3..18816d3 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -364,7 +364,7 @@ class SQLContext:
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 76c4076..37e184b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -190,7 +190,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         """Returns a :class:`DataFrameStatFunctions` for statistic 
functions."""
         return DataFrameStatFunctions(self)
 
-    def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
+    def toJSON(self, use_unicode: bool = True) -> RDD[str]:
         """Converts a :class:`DataFrame` into a :class:`RDD` of string.
 
         Each row is turned into a JSON document as one element in the returned 
RDD.
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 8c729c6..8c8756d 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -187,7 +187,7 @@ class DataFrameReader(OptionUtils):
 
     def json(
         self,
-        path: Union[str, List[str], "RDD[str]"],
+        path: Union[str, List[str], RDD[str]],
         schema: Optional[Union[StructType, str]] = None,
         primitivesAsString: Optional[Union[bool, str]] = None,
         prefersDecimal: Optional[Union[bool, str]] = None,
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index a41ad15..759859a 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -524,7 +524,7 @@ class SparkSession(SparkConversionMixin):
 
     def _inferSchema(
         self,
-        rdd: "RDD[Any]",
+        rdd: RDD[Any],
         samplingRatio: Optional[float] = None,
         names: Optional[List[str]] = None,
     ) -> StructType:
@@ -589,10 +589,10 @@ class SparkSession(SparkConversionMixin):
 
     def _createFromRDD(
         self,
-        rdd: "RDD[Any]",
+        rdd: RDD[Any],
         schema: Optional[Union[DataType, List[str]]],
         samplingRatio: Optional[float],
-    ) -> Tuple["RDD[Tuple]", StructType]:
+    ) -> Tuple[RDD[Tuple], StructType]:
         """
         Create an RDD for DataFrame from an existing RDD, returns the RDD and 
schema.
         """
@@ -618,7 +618,7 @@ class SparkSession(SparkConversionMixin):
 
     def _createFromLocal(
         self, data: Iterable[Any], schema: Optional[Union[DataType, List[str]]]
-    ) -> Tuple["RDD[Tuple]", StructType]:
+    ) -> Tuple[RDD[Tuple], StructType]:
         """
         Create an RDD for DataFrame from a list or pandas.DataFrame, returns
         the RDD and schema.
@@ -766,7 +766,7 @@ class SparkSession(SparkConversionMixin):
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
@@ -897,7 +897,7 @@ class SparkSession(SparkConversionMixin):
 
     def _create_dataframe(
         self,
-        data: Union["RDD[Any]", Iterable[Any]],
+        data: Union[RDD[Any], Iterable[Any]],
         schema: Optional[Union[DataType, List[str]]],
         samplingRatio: Optional[float],
         verifySchema: bool,

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [MINOR][PYTHON] Remove unnecessary quotes in pyspark

Reply via email to