This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 784b942196b [SPARK-44380][PYTHON][FOLLOWUP] Set __doc__ for analyze
static method when Arrow is enabled
784b942196b is described below
commit 784b942196bb08a7959222f549722c6db3a3588e
Author: Takuya UESHIN <[email protected]>
AuthorDate: Sat Jul 22 16:49:14 2023 +0900
[SPARK-44380][PYTHON][FOLLOWUP] Set __doc__ for analyze static method when
Arrow is enabled
### What changes were proposed in this pull request?
This is a follow-up of apache/spark#41948.
Set `__doc__` for `analyze` static method when Arrow is enabled.
### Why are the changes needed?
When Arrow is enabled, `analyze` static method doesn't have `__doc__` that
should be the same as the original contents.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Updated the related tests.
Closes #42111 from ueshin/issues/SPARK-44380/analyze_doc.
Authored-by: Takuya UESHIN <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/tests/test_udtf.py | 8 +++++++-
python/pyspark/sql/udtf.py | 3 +++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/sql/tests/test_udtf.py
b/python/pyspark/sql/tests/test_udtf.py
index 13ea86ebcb2..e67ec245795 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -749,6 +749,11 @@ class BaseUDTFTestsMixin:
"""Initialize the UDTF"""
...
+ @staticmethod
+ def analyze(x: AnalyzeArgument) -> AnalyzeResult:
+ """Analyze the argument."""
+ ...
+
def eval(self, x: int):
"""Evaluate the input row."""
yield x + 1,
@@ -757,9 +762,10 @@ class BaseUDTFTestsMixin:
"""Terminate the UDTF."""
...
- cls = udtf(TestUDTF, returnType="y: int").func
+ cls = udtf(TestUDTF).func
self.assertIn("A UDTF for test", cls.__doc__)
self.assertIn("Initialize the UDTF", cls.__init__.__doc__)
+ self.assertIn("Analyze the argument", cls.analyze.__doc__)
self.assertIn("Evaluate the input row", cls.eval.__doc__)
self.assertIn("Terminate the UDTF", cls.terminate.__doc__)
diff --git a/python/pyspark/sql/udtf.py b/python/pyspark/sql/udtf.py
index a35278deb9b..67d4ef33777 100644
--- a/python/pyspark/sql/udtf.py
+++ b/python/pyspark/sql/udtf.py
@@ -178,6 +178,9 @@ def _vectorize_udtf(cls: Type) -> Type:
if hasattr(cls, "terminate"):
getattr(vectorized_udtf, "terminate").__doc__ = getattr(cls,
"terminate").__doc__
+ if hasattr(vectorized_udtf, "analyze"):
+ getattr(vectorized_udtf, "analyze").__doc__ = getattr(cls,
"analyze").__doc__
+
return vectorized_udtf
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]