[
https://issues.apache.org/jira/browse/SPARK-44825?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17754895#comment-17754895
]
Yang Jie edited comment on SPARK-44825 at 8/16/23 6:34 AM:
-----------------------------------------------------------
Another failure case:
[https://github.com/LuciferYang/spark/actions/runs/5874437044/job/15929204583]
!image-2023-08-16-14-34-46-477.png!
was (Author: luciferyang):
Another failure case:
https://github.com/LuciferYang/spark/actions/runs/5874437044/job/15929204583
> flaky pyspark tests
> -------------------
>
> Key: SPARK-44825
> URL: https://issues.apache.org/jira/browse/SPARK-44825
> Project: Spark
> Issue Type: Improvement
> Components: PySpark
> Affects Versions: 4.0.0
> Reporter: Yang Jie
> Priority: Major
> Attachments: image-2023-08-16-14-34-46-477.png
>
>
> [https://github.com/apache/spark/actions/runs/5872185188/job/15923301107]
>
> {code:java}
> test_vectorized_udf_exception
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> malloc(): unsorted double linked list corrupted
> ----------------------------------------
> Exception occurred during processing of request from ('127.0.0.1', 56342)
> Traceback (most recent call last):
> File "/usr/lib/python3.9/socketserver.py", line 316, in
> _handle_request_noblock
> self.process_request(request, client_address)
> File "/usr/lib/python3.9/socketserver.py", line 347, in process_request
> self.finish_request(request, client_address)
> File "/usr/lib/python3.9/socketserver.py", line 360, in finish_request
> self.RequestHandlerClass(request, client_address, self)
> File "/usr/lib/python3.9/socketserver.py", line 747, in __init__
> self.handle()
> File "/__w/spark/spark/python/pyspark/accumulators.py", line 295, in handle
> poll(accum_updates)
> File "/__w/spark/spark/python/pyspark/accumulators.py", line 267, in poll
> if self.rfile in r and func():
> File "/__w/spark/spark/python/pyspark/accumulators.py", line 271, in
> accum_updates
> num_updates = read_int(self.rfile)
> File "/__w/spark/spark/python/pyspark/serializers.py", line 596, in read_int
> raise EOFError
> EOFError
> ----------------------------------------
> ERROR:root:Exception while sending command.
> Traceback (most recent call last):
> File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
> 179, in deco
> return f(*a, **kw)
> File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py",
> line 326, in get_return_value
> raise Py4JJavaError(
> py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During
> handling of the above exception, another exception occurred:Traceback (most
> recent call last):
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 516, in send_command
> raise Py4JNetworkError("Answer from Java side is empty")
> py4j.protocol.Py4JNetworkError: Answer from Java side is emptyDuring handling
> of the above exception, another exception occurred:Traceback (most recent
> call last):
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1038, in send_command
> response = connection.send_command(command)
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 539, in send_command
> raise Py4JNetworkError(
> py4j.protocol.Py4JNetworkError: Error while sending or receiving
> ERROR (0.529s)
> test_vectorized_udf_invalid_length
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_map_type
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.002s)
> test_vectorized_udf_nested_struct
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_array
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_binary
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_boolean
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_byte
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_decimal
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_double
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_float
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_int
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_long
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_short
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_null_string
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_return_scalar
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.000s)
> test_vectorized_udf_return_timestamp_tz
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_string_in_udf
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_struct_complex
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_struct_empty
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_struct_type
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.002s)
> test_vectorized_udf_struct_with_empty_partition
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_timestamps
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.003s)
> test_vectorized_udf_timestamps_respect_session_timezone
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.002s)
> test_vectorized_udf_varargs
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR (0.001s)
> test_vectorized_udf_wrong_return_type
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
> ERROR
> (0.000s)======================================================================
> ERROR [0.529s]: test_vectorized_udf_exception
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
> 179, in deco
> return f(*a, **kw)
> File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py",
> line 326, in get_return_value
> raise Py4JJavaError(
> py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During
> handling of the above exception, another exception occurred:Traceback (most
> recent call last):
> File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1230, in
> collect
> sock_info = self._jdf.collectToPython()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1322, in __call__
> return_value = get_return_value(
> File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
> 181, in deco
> converted = convert_exception(e.java_exception)
> File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
> 129, in convert_exception
> if is_instance_of(gw, e,
> "org.apache.spark.sql.catalyst.parser.ParseException"):
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 464, in is_instance_of
> return gateway.jvm.py4j.reflection.TypeUtil.isInstanceOf(
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1725, in __getattr__
> raise Py4JError(message)
> py4j.protocol.Py4JError: py4j does not exist in the JVMDuring handling of the
> above exception, another exception occurred:ConnectionRefusedError: [Errno
> 111] Connection refusedDuring handling of the above exception, another
> exception occurred:Traceback (most recent call last):
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
> line 646, in test_vectorized_udf_exception
> self.check_vectorized_udf_exception()
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
> line 659, in check_vectorized_udf_exception
> df.select(raise_exception(col("id"))).collect()
> File "/usr/lib/python3.9/unittest/case.py", line 239, in __exit__
> self._raiseFailure('"{}" does not match "{}"'.format(
> File "/usr/lib/python3.9/unittest/case.py", line 163, in _raiseFailure
> raise self.test_case.failureException(msg)
> AssertionError: "division( or modulo)? by zero" does not match "[Errno 111]
> Connection refused" numPartitions = self._sc.defaultParallelism
> File "/__w/spark/spark/python/pyspark/context.py", line 630, in
> defaultParallelism
> return self._jsc.sc().defaultParallelism()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1321, in __call__
> answer = self.gateway_client.send_command(command)
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1036, in send_command
> connection = self._get_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 284, in _get_connection
> connection = self._create_new_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 291, in _create_new_connection
> connection.connect_to_java_server()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 438, in connect_to_java_server
> self.socket.connect((self.java_address, self.java_port))
> ConnectionRefusedError: [Errno 111] Connection
> refused======================================================================
> ERROR [0.000s]: test_vectorized_udf_wrong_return_type
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
> line 737, in test_vectorized_udf_wrong_return_type
> with QuietTest(self.sc):
> File "/__w/spark/spark/python/pyspark/testing/utils.py", line 119, in
> __init__
> self.log4j = sc._jvm.org.apache.log4j
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1712, in __getattr__
> answer = self._gateway_client.send_command(
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1036, in send_command
> connection = self._get_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 284, in _get_connection
> connection = self._create_new_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 291, in _create_new_connection
> connection.connect_to_java_server()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 438, in connect_to_java_server
> self.socket.connect((self.java_address, self.java_port))
> ConnectionRefusedError: [Errno 111] Connection
> refused======================================================================
> ERROR [0.000s]: tearDownClass
> (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
> line 1491, in tearDownClass
> ReusedSQLTestCase.tearDownClass()
> File "/__w/spark/spark/python/pyspark/testing/sqlutils.py", line 269, in
> tearDownClass
> super(ReusedSQLTestCase, cls).tearDownClass()
> File "/__w/spark/spark/python/pyspark/testing/utils.py", line 154, in
> tearDownClass
> cls.sc.stop()
> File "/__w/spark/spark/python/pyspark/context.py", line 654, in stop
> self._jsc.stop()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1321, in __call__
> answer = self.gateway_client.send_command(command)
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py",
> line 1036, in send_command
> connection = self._get_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 284, in _get_connection
> connection = self._create_new_connection()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 291, in _create_new_connection
> connection.connect_to_java_server()
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py",
> line 438, in connect_to_java_server
> self.socket.connect((self.java_address, self.java_port))
> ConnectionRefusedError: [Errno 111] Connection
> refused----------------------------------------------------------------------
> Ran 56 tests in 65.816sFAILED (errors=27, skipped=1) {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]