This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cfd66cf [MINOR][PYTHON] Remove unnecessary quotes in pyspark
cfd66cf is described below
commit cfd66cfc1f955e218b3f53fd56ca423578f39638
Author: dch nguyen <[email protected]>
AuthorDate: Mon Feb 28 09:24:59 2022 +0900
[MINOR][PYTHON] Remove unnecessary quotes in pyspark
### What changes were proposed in this pull request?
Remove unnecessary quotes in pyspark
### Why are the changes needed?
To make the code clean
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing tests
Closes #35664 from dchvn/remove_unused_quote.
Authored-by: dch nguyen <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/context.py | 32 ++++++++++++++----------------
python/pyspark/mllib/stat/KernelDensity.py | 2 +-
python/pyspark/sql/context.py | 2 +-
python/pyspark/sql/dataframe.py | 2 +-
python/pyspark/sql/readwriter.py | 2 +-
python/pyspark/sql/session.py | 12 +++++------
6 files changed, 25 insertions(+), 27 deletions(-)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2f1746b..3beebb0 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -575,7 +575,7 @@ class SparkContext:
with SparkContext._lock:
SparkContext._active_spark_context = None # type:
ignore[assignment]
- def emptyRDD(self) -> "RDD[Any]":
+ def emptyRDD(self) -> RDD[Any]:
"""
Create an RDD that has no partitions or elements.
"""
@@ -583,7 +583,7 @@ class SparkContext:
def range(
self, start: int, end: Optional[int] = None, step: int = 1, numSlices:
Optional[int] = None
- ) -> "RDD[int]":
+ ) -> RDD[int]:
"""
Create a new RDD of int containing elements from `start` to `end`
(exclusive), increased by `step` every element. Can be called the same
@@ -621,7 +621,7 @@ class SparkContext:
return self.parallelize(range(start, end, step), numSlices)
- def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) ->
"RDD[T]":
+ def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) ->
RDD[T]:
"""
Distribute a local Python collection to form an RDD. Using range
is recommended if the input represents a range for performance.
@@ -725,7 +725,7 @@ class SparkContext:
# we eagerly reads the file so we can delete right after.
os.unlink(tempFile.name)
- def pickleFile(self, name: str, minPartitions: Optional[int] = None) ->
"RDD[Any]":
+ def pickleFile(self, name: str, minPartitions: Optional[int] = None) ->
RDD[Any]:
"""
Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
@@ -742,7 +742,7 @@ class SparkContext:
def textFile(
self, name: str, minPartitions: Optional[int] = None, use_unicode:
bool = True
- ) -> "RDD[str]":
+ ) -> RDD[str]:
"""
Read a text file from HDFS, a local file system (available on all
nodes), or any Hadoop-supported file system URI, and return it as an
@@ -767,7 +767,7 @@ class SparkContext:
def wholeTextFiles(
self, path: str, minPartitions: Optional[int] = None, use_unicode:
bool = True
- ) -> "RDD[Tuple[str, str]]":
+ ) -> RDD[Tuple[str, str]]:
"""
Read a directory of text files from HDFS, a local file system
(available on all nodes), or any Hadoop-supported file system
@@ -822,9 +822,7 @@ class SparkContext:
PairDeserializer(UTF8Deserializer(use_unicode),
UTF8Deserializer(use_unicode)),
)
- def binaryFiles(
- self, path: str, minPartitions: Optional[int] = None
- ) -> "RDD[Tuple[str, bytes]]":
+ def binaryFiles(self, path: str, minPartitions: Optional[int] = None) ->
RDD[Tuple[str, bytes]]:
"""
Read a directory of binary files from HDFS, a local file system
(available on all nodes), or any Hadoop-supported file system URI
@@ -843,7 +841,7 @@ class SparkContext:
PairDeserializer(UTF8Deserializer(), NoOpSerializer()),
)
- def binaryRecords(self, path: str, recordLength: int) -> "RDD[bytes]":
+ def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]:
"""
Load data from a flat binary file, assuming each record is a set of
numbers
with the specified numerical format (see ByteBuffer), and the number of
@@ -876,7 +874,7 @@ class SparkContext:
valueConverter: Optional[str] = None,
minSplits: Optional[int] = None,
batchSize: int = 0,
- ) -> "RDD[Tuple[T, U]]":
+ ) -> RDD[Tuple[T, U]]:
"""
Read a Hadoop SequenceFile with arbitrary key and value Writable class
from HDFS,
a local file system (available on all nodes), or any Hadoop-supported
file system URI.
@@ -931,7 +929,7 @@ class SparkContext:
valueConverter: Optional[str] = None,
conf: Optional[Dict[str, str]] = None,
batchSize: int = 0,
- ) -> "RDD[Tuple[T, U]]":
+ ) -> RDD[Tuple[T, U]]:
"""
Read a 'new API' Hadoop InputFormat with arbitrary key and value class
from HDFS,
a local file system (available on all nodes), or any Hadoop-supported
file system URI.
@@ -990,7 +988,7 @@ class SparkContext:
valueConverter: Optional[str] = None,
conf: Optional[Dict[str, str]] = None,
batchSize: int = 0,
- ) -> "RDD[Tuple[T, U]]":
+ ) -> RDD[Tuple[T, U]]:
"""
Read a 'new API' Hadoop InputFormat with arbitrary key and value
class, from an arbitrary
Hadoop configuration, which is passed in as a Python dict.
@@ -1043,7 +1041,7 @@ class SparkContext:
valueConverter: Optional[str] = None,
conf: Optional[Dict[str, str]] = None,
batchSize: int = 0,
- ) -> "RDD[Tuple[T, U]]":
+ ) -> RDD[Tuple[T, U]]:
"""
Read an 'old' Hadoop InputFormat with arbitrary key and value class
from HDFS,
a local file system (available on all nodes), or any Hadoop-supported
file system URI.
@@ -1098,7 +1096,7 @@ class SparkContext:
valueConverter: Optional[str] = None,
conf: Optional[Dict[str, str]] = None,
batchSize: int = 0,
- ) -> "RDD[Tuple[T, U]]":
+ ) -> RDD[Tuple[T, U]]:
"""
Read an 'old' Hadoop InputFormat with arbitrary key and value class,
from an arbitrary
Hadoop configuration, which is passed in as a Python dict.
@@ -1145,7 +1143,7 @@ class SparkContext:
jrdd = self._jsc.checkpointFile(name)
return RDD(jrdd, self, input_deserializer)
- def union(self, rdds: List["RDD[T]"]) -> "RDD[T]":
+ def union(self, rdds: List[RDD[T]]) -> RDD[T]:
"""
Build the union of a list of RDDs.
@@ -1464,7 +1462,7 @@ class SparkContext:
def runJob(
self,
- rdd: "RDD[T]",
+ rdd: RDD[T],
partitionFunc: Callable[[Iterable[T]], Iterable[U]],
partitions: Optional[Sequence[int]] = None,
allowLocal: bool = False,
diff --git a/python/pyspark/mllib/stat/KernelDensity.py
b/python/pyspark/mllib/stat/KernelDensity.py
index 103c955..febf4fd 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -46,7 +46,7 @@ class KernelDensity:
"""Set bandwidth of each sample. Defaults to 1.0"""
self._bandwidth = bandwidth
- def setSample(self, sample: "RDD[float]") -> None:
+ def setSample(self, sample: RDD[float]) -> None:
"""Set sample points from the population. Should be a RDD"""
if not isinstance(sample, RDD):
raise TypeError("samples should be a RDD, received %s" %
type(sample))
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index c6eb6c3..18816d3 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -364,7 +364,7 @@ class SQLContext:
def createDataFrame( # type: ignore[misc]
self,
- data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+ data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
schema: Optional[Union[AtomicType, StructType, str]] = None,
samplingRatio: Optional[float] = None,
verifySchema: bool = True,
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 76c4076..37e184b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -190,7 +190,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
"""Returns a :class:`DataFrameStatFunctions` for statistic
functions."""
return DataFrameStatFunctions(self)
- def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
+ def toJSON(self, use_unicode: bool = True) -> RDD[str]:
"""Converts a :class:`DataFrame` into a :class:`RDD` of string.
Each row is turned into a JSON document as one element in the returned
RDD.
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 8c729c6..8c8756d 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -187,7 +187,7 @@ class DataFrameReader(OptionUtils):
def json(
self,
- path: Union[str, List[str], "RDD[str]"],
+ path: Union[str, List[str], RDD[str]],
schema: Optional[Union[StructType, str]] = None,
primitivesAsString: Optional[Union[bool, str]] = None,
prefersDecimal: Optional[Union[bool, str]] = None,
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index a41ad15..759859a 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -524,7 +524,7 @@ class SparkSession(SparkConversionMixin):
def _inferSchema(
self,
- rdd: "RDD[Any]",
+ rdd: RDD[Any],
samplingRatio: Optional[float] = None,
names: Optional[List[str]] = None,
) -> StructType:
@@ -589,10 +589,10 @@ class SparkSession(SparkConversionMixin):
def _createFromRDD(
self,
- rdd: "RDD[Any]",
+ rdd: RDD[Any],
schema: Optional[Union[DataType, List[str]]],
samplingRatio: Optional[float],
- ) -> Tuple["RDD[Tuple]", StructType]:
+ ) -> Tuple[RDD[Tuple], StructType]:
"""
Create an RDD for DataFrame from an existing RDD, returns the RDD and
schema.
"""
@@ -618,7 +618,7 @@ class SparkSession(SparkConversionMixin):
def _createFromLocal(
self, data: Iterable[Any], schema: Optional[Union[DataType, List[str]]]
- ) -> Tuple["RDD[Tuple]", StructType]:
+ ) -> Tuple[RDD[Tuple], StructType]:
"""
Create an RDD for DataFrame from a list or pandas.DataFrame, returns
the RDD and schema.
@@ -766,7 +766,7 @@ class SparkSession(SparkConversionMixin):
def createDataFrame( # type: ignore[misc]
self,
- data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+ data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
schema: Optional[Union[AtomicType, StructType, str]] = None,
samplingRatio: Optional[float] = None,
verifySchema: bool = True,
@@ -897,7 +897,7 @@ class SparkSession(SparkConversionMixin):
def _create_dataframe(
self,
- data: Union["RDD[Any]", Iterable[Any]],
+ data: Union[RDD[Any], Iterable[Any]],
schema: Optional[Union[DataType, List[str]]],
samplingRatio: Optional[float],
verifySchema: bool,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]