This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 704f956dcbed [MINOR][PYTHON][TESTS] Remove the doc in error message tests to allow other PyArrow versions in tests 704f956dcbed is described below commit 704f956dcbeddc9067e4ec502c4fd07175171cac Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue May 7 20:07:25 2024 -0700 [MINOR][PYTHON][TESTS] Remove the doc in error message tests to allow other PyArrow versions in tests This PR is a minor change to support more PyArrow versions in the test. To support more PyArrow versions in the test. it can fail: (https://github.com/HyukjinKwon/spark/actions/runs/8994639538/job/24708397027) ``` Traceback (most recent call last): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 585, in _test_merge_error self.__test_merge_error( File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 606, in __test_merge_error with self.assertRaisesRegex(error_class, error_message_regex): AssertionError: "Return type of the user-defined function should be pandas.DataFrame, but is int64." does not match " An exception was thrown from the Python worker. Please see the stack trace below. Traceback (most recent call last): File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1834, in main process() File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1826, in process serializer.dump_stream(out_iter, outfile) File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 531, in dump_stream return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 104, in dump_stream for batch in iterator: File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 524, in init_stream_yield_batches for series in iterator: File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1694, in mapper return f(df1_keys, df1_vals, df2_keys, df2_vals) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 370, in <lambda> return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))] ^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 364, in wrapped verify_pandas_result( File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 234, in verify_pandas_result raise PySparkTypeError( pyspark.errors.exceptions.base.PySparkTypeError: [UDF_RETURN_TYPE] Return type of the user-defined function should be pandas.DataFrame, but is int. ``` No, test-only. Ci should validate it. No. Closes #46453 from HyukjinKwon/minor-test. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py | 2 +- python/pyspark/sql/tests/pandas/test_pandas_map.py | 4 ++-- python/pyspark/sql/tests/test_arrow_map.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py index c3cd0f37b103..948ef4a53f2c 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py @@ -166,7 +166,7 @@ class CogroupedApplyInPandasTestsMixin: fn=lambda lft, rgt: lft.size + rgt.size, error_class=PythonException, error_message_regex="Return type of the user-defined function " - "should be pandas.DataFrame, but is int64.", + "should be pandas.DataFrame, but is int", ) def test_apply_in_pandas_returning_column_names(self): diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py index c3ba7b3e93a0..4b2be2bcf844 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py @@ -151,14 +151,14 @@ class MapInPandasTestsMixin: with self.assertRaisesRegex( PythonException, "Return type of the user-defined function should be iterator of pandas.DataFrame, " - "but is int.", + "but is int", ): (self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a int").count()) with self.assertRaisesRegex( PythonException, "Return type of the user-defined function should be iterator of pandas.DataFrame, " - "but is iterator of int.", + "but is iterator of int", ): (self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, "a int").count()) diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py index 15367743585e..176286a809d4 100644 --- a/python/pyspark/sql/tests/test_arrow_map.py +++ b/python/pyspark/sql/tests/test_arrow_map.py @@ -104,14 +104,14 @@ class MapInArrowTestsMixin(object): with self.assertRaisesRegex( PythonException, "Return type of the user-defined function should be iterator " - "of pyarrow.RecordBatch, but is int.", + "of pyarrow.RecordBatch, but is int", ): (self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a int").count()) with self.assertRaisesRegex( PythonException, "Return type of the user-defined function should be iterator " - "of pyarrow.RecordBatch, but is iterator of int.", + "of pyarrow.RecordBatch, but is iterator of int", ): (self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, "a int").count()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org