This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7323bae6df1 [MINOR][DOC] Fix typo under python directory
7323bae6df1 is described below

commit 7323bae6df1def2cfcf9509baf699ca6d0ba20f5
Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>
AuthorDate: Sun Dec 4 20:29:38 2022 +0900

    [MINOR][DOC] Fix typo under python directory
    
    ### What changes were proposed in this pull request?
    
    Fix typo in pydoc and messages under `python` directory
    
    ### Why are the changes needed?
    
    Better documentation
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    N/A
    
    Closes #38900 from kiszk/typo-pydocs.
    
    Authored-by: Kazuaki Ishizaki <ishiz...@jp.ibm.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/ml/tuning.py                    |  2 +-
 python/pyspark/pandas/frame.py                 |  2 +-
 python/pyspark/pandas/groupby.py               |  4 ++--
 python/pyspark/pandas/spark/accessors.py       |  2 +-
 python/pyspark/pandas/supported_api_gen.py     | 10 +++++-----
 python/pyspark/rdd.py                          |  4 ++--
 python/pyspark/sql/catalog.py                  |  4 ++--
 python/pyspark/sql/connect/function_builder.py |  2 +-
 python/pyspark/sql/dataframe.py                |  6 +++---
 python/pyspark/sql/functions.py                |  4 ++--
 python/pyspark/sql/session.py                  |  2 +-
 python/pyspark/sql/streaming/query.py          |  4 ++--
 python/pyspark/sql/streaming/readwriter.py     |  4 ++--
 13 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 44a8b51ef8e..0dabcdd7f27 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -405,7 +405,7 @@ class _ValidatorSharedReadWrite:
                 elif isinstance(v, MLWritable):
                     raise RuntimeError(
                         "ValidatorSharedReadWrite.saveImpl does not handle 
parameters of type: "
-                        "MLWritable that are not 
Estimaor/Evaluator/Transformer, and if parameter "
+                        "MLWritable that are not 
Estimator/Evaluator/Transformer, and if parameter "
                         "is estimator, it cannot be meta estimator such as 
Validator or OneVsRest"
                     )
                 else:
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 835c13d6fdd..f044634da0b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -382,7 +382,7 @@ class DataFrame(Frame, Generic[T]):
     .. versionchanged:: 3.4.0
         Since 3.4.0, it deals with `data` and `index` in this approach:
         1, when `data` is a distributed dataset (Internal DataFrame/Spark 
DataFrame/
-        pandas-on-Spark DataFrame/pandas-on-Spark Series), it will first 
parallize
+        pandas-on-Spark DataFrame/pandas-on-Spark Series), it will first 
parallelize
         the `index` if necessary, and then try to combine the `data` and 
`index`;
         Note that if `data` and `index` doesn't have the same anchor, then
         `compute.ops_on_diff_frames` should be turned on;
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index c5dbcb79710..baa5f0ae146 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -1989,7 +1989,7 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
         if should_infer_schema:
             # Here we execute with the first 1000 to get the return type.
             log_advice(
-                "If the type hints is not specified for `grouby.apply`, "
+                "If the type hints is not specified for `groupby.apply`, "
                 "it is expensive to infer the data type internally."
             )
             limit = get_option("compute.shortcut_limit")
@@ -3107,7 +3107,7 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
             # Here we execute with the first 1000 to get the return type.
             # If the records were less than 1000, it uses pandas API directly 
for a shortcut.
             log_advice(
-                "If the type hints is not specified for `grouby.transform`, "
+                "If the type hints is not specified for `groupby.transform`, "
                 "it is expensive to infer the data type internally."
             )
             limit = get_option("compute.shortcut_limit")
diff --git a/python/pyspark/pandas/spark/accessors.py 
b/python/pyspark/pandas/spark/accessors.py
index c8e7f507c6f..4e1caa2432b 100644
--- a/python/pyspark/pandas/spark/accessors.py
+++ b/python/pyspark/pandas/spark/accessors.py
@@ -144,7 +144,7 @@ class SparkSeriesMethods(SparkIndexOpsMethods["ps.Series"]):
 
         .. note:: It forces to lose the index and end up with using default 
index. It is
             preferred to use :meth:`Series.spark.transform` or 
`:meth:`DataFrame.spark.apply`
-            with specifying the `inedx_col`.
+            with specifying the `index_col`.
 
         .. note:: It does not require to have the same length of the input and 
output.
             However, it requires to create a new DataFrame internally which 
will require
diff --git a/python/pyspark/pandas/supported_api_gen.py 
b/python/pyspark/pandas/supported_api_gen.py
index e60cba204f9..301e6a2f9b7 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -171,8 +171,8 @@ def _organize_by_implementation_status(
     """
     Check the implementation status and parameters of both modules.
 
-    Parmeters
-    ---------
+    Parameters
+    ----------
     module_name : str
         Class name that exists in the path of the module.
     pd_funcs: Dict[str, Callable]
@@ -284,11 +284,11 @@ def _update_all_supported_status(
     """
     Updates supported status across multiple module paths.
 
-    Parmeters
-    ---------
+    Parameters
+    ----------
     all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
         Data that stores the supported status across multiple module paths.
-    pd_modles: List[str]
+    pd_modules: List[str]
         Name list of pandas modules.
     pd_module_group : Any
         Specific path of importable pandas module.
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 7f5e4e603f4..829f3d08b63 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -4289,7 +4289,7 @@ class RDD(Generic[T_co]):
         Returns
         -------
         :class:`RDD`
-            a :class:`RDD` containing the keys and cogouped values
+            a :class:`RDD` containing the keys and cogrouped values
 
         See Also
         --------
@@ -4330,7 +4330,7 @@ class RDD(Generic[T_co]):
         Returns
         -------
         :class:`RDD`
-            a :class:`RDD` containing the keys and cogouped values
+            a :class:`RDD` containing the keys and cogrouped values
 
         See Also
         --------
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 4a49ef1fa04..6b97aa1db48 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -803,7 +803,7 @@ class Catalog:
         --------
         >>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
 
-        Droppping the temporary view.
+        Dropping the temporary view.
 
         >>> spark.catalog.dropTempView("my_table")
         True
@@ -840,7 +840,7 @@ class Catalog:
         --------
         >>> spark.createDataFrame([(1, 1)]).createGlobalTempView("my_table")
 
-        Droppping the global view.
+        Dropping the global view.
 
         >>> spark.catalog.dropGlobalTempView("my_table")
         True
diff --git a/python/pyspark/sql/connect/function_builder.py 
b/python/pyspark/sql/connect/function_builder.py
index 1b3450786a3..3c59312888a 100644
--- a/python/pyspark/sql/connect/function_builder.py
+++ b/python/pyspark/sql/connect/function_builder.py
@@ -64,7 +64,7 @@ functions = FunctionBuilder()
 
 
 class UserDefinedFunction(Expression):
-    """A user defied function is an expresison that has a reference to the 
actual
+    """A user defied function is an expression that has a reference to the 
actual
     Python callable attached. During plan generation, the client sends a 
command to
     the server to register the UDF before execution. The expression object can 
be
     reused and is not attached to a specific execution. If the internal name of
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f3873e3c8cd..bd5df07a50f 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2375,7 +2375,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         |  2|Alice|
         +---+-----+
 
-        Specify miltiple columns
+        Specify multiple columns
 
         >>> df = spark.createDataFrame([
         ...     (2, "Alice"), (2, "Bob"), (5, "Bob")], schema=["age", "name"])
@@ -2388,7 +2388,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         |  2|  Bob|
         +---+-----+
 
-        Specify miltiple columns for sorting order at `ascending`.
+        Specify multiple columns for sorting order at `ascending`.
 
         >>> df.orderBy(["age", "name"], ascending=[False, False]).show()
         +---+-----+
@@ -3209,7 +3209,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         Parameters
         ----------
         exprs : :class:`Column` or dict of key and value strings
-            Columns or expressions to aggreate DataFrame by.
+            Columns or expressions to aggregate DataFrame by.
 
         Returns
         -------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3aeb48adea7..9746196dc94 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -5517,7 +5517,7 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     Parameters
     ----------
     sep : str
-        words seperator.
+        words separator.
     cols : :class:`~pyspark.sql.Column` or str
         list of columns to work on.
 
@@ -7916,7 +7916,7 @@ def arrays_zip(*cols: "ColumnOrName") -> Column:
     """
     Collection function: Returns a merged array of structs in which the N-th 
struct contains all
     N-th values of input arrays. If one of the arrays is shorter than others 
then
-    resulting struct type value will be a `null` for missing elemets.
+    resulting struct type value will be a `null` for missing elements.
 
     .. versionadded:: 2.4.0
 
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index eec3246cac3..ebad3224f02 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -1393,7 +1393,7 @@ class SparkSession(SparkConversionMixin):
         >>> spark.readStream
         <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
 
-        The example below uses Rate source that generates rows continously.
+        The example below uses Rate source that generates rows continuously.
         After that, we operate a modulo by 3, and then write the stream out to 
the console.
         The streaming query stops in 3 seconds.
 
diff --git a/python/pyspark/sql/streaming/query.py 
b/python/pyspark/sql/streaming/query.py
index 5ee7a4790b0..c1f0e734800 100644
--- a/python/pyspark/sql/streaming/query.py
+++ b/python/pyspark/sql/streaming/query.py
@@ -183,7 +183,7 @@ class StreamingQuery:
         >>> sdf = spark.readStream.format("rate").load()
         >>> sq = 
sdf.writeStream.format('memory').queryName('query_awaitTermination').start()
 
-        Return wheter the query has terminated or not within 5 seconds
+        Return whether the query has terminated or not within 5 seconds
 
         >>> sq.awaitTermination(5)
         False
@@ -517,7 +517,7 @@ class StreamingQueryManager:
         >>> sdf = spark.readStream.format("rate").load()
         >>> sq = 
sdf.writeStream.format('memory').queryName('this_query').start()
 
-        Return wheter any of the query on the associated SparkSession
+        Return whether any of the query on the associated SparkSession
         has terminated or not within 5 seconds
 
         >>> spark.streams.awaitAnyTermination(5)
diff --git a/python/pyspark/sql/streaming/readwriter.py 
b/python/pyspark/sql/streaming/readwriter.py
index ef3b7e525e3..c58848dc508 100644
--- a/python/pyspark/sql/streaming/readwriter.py
+++ b/python/pyspark/sql/streaming/readwriter.py
@@ -52,7 +52,7 @@ class DataStreamReader(OptionUtils):
     >>> spark.readStream
     <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
 
-    The example below uses Rate source that generates rows continously.
+    The example below uses Rate source that generates rows continuously.
     After that, we operate a modulo by 3, and then writes the stream out to 
the console.
     The streaming query stops in 3 seconds.
 
@@ -721,7 +721,7 @@ class DataStreamWriter:
 
     Examples
     --------
-    The example below uses Rate source that generates rows continously.
+    The example below uses Rate source that generates rows continuously.
     After that, we operate a modulo by 3, and then writes the stream out to 
the console.
     The streaming query stops in 3 seconds.
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to