Yang Jie created SPARK-44825:
--------------------------------
Summary: flaky pyspark tests
Key: SPARK-44825
URL: https://issues.apache.org/jira/browse/SPARK-44825
Project: Spark
Issue Type: Improvement
Components: PySpark
Affects Versions: 4.0.0
Reporter: Yang Jie
[https://github.com/apache/spark/actions/runs/5872185188/job/15923301107]
{code:java}
test_vectorized_udf_exception
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
malloc(): unsorted double linked list corrupted
----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 56342)
Traceback (most recent call last):
File "/usr/lib/python3.9/socketserver.py", line 316, in
_handle_request_noblock
self.process_request(request, client_address)
File "/usr/lib/python3.9/socketserver.py", line 347, in process_request
self.finish_request(request, client_address)
File "/usr/lib/python3.9/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/usr/lib/python3.9/socketserver.py", line 747, in __init__
self.handle()
File "/__w/spark/spark/python/pyspark/accumulators.py", line 295, in handle
poll(accum_updates)
File "/__w/spark/spark/python/pyspark/accumulators.py", line 267, in poll
if self.rfile in r and func():
File "/__w/spark/spark/python/pyspark/accumulators.py", line 271, in
accum_updates
num_updates = read_int(self.rfile)
File "/__w/spark/spark/python/pyspark/serializers.py", line 596, in read_int
raise EOFError
EOFError
----------------------------------------
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
179, in deco
return f(*a, **kw)
File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py",
line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During handling
of the above exception, another exception occurred:Traceback (most recent call
last):
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
516, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is emptyDuring handling
of the above exception, another exception occurred:Traceback (most recent call
last):
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1038, in send_command
response = connection.send_command(command)
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
539, in send_command
raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
ERROR (0.529s)
test_vectorized_udf_invalid_length
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_map_type
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.002s)
test_vectorized_udf_nested_struct
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_array
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_binary
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_boolean
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_byte
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_decimal
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_double
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_float
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_int
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_long
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_short
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_null_string
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_return_scalar
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.000s)
test_vectorized_udf_return_timestamp_tz
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_string_in_udf
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_struct_complex
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_struct_empty
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_struct_type
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.002s)
test_vectorized_udf_struct_with_empty_partition
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_timestamps
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.003s)
test_vectorized_udf_timestamps_respect_session_timezone
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.002s)
test_vectorized_udf_varargs
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR (0.001s)
test_vectorized_udf_wrong_return_type
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ...
ERROR
(0.000s)======================================================================
ERROR [0.529s]: test_vectorized_udf_exception
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
179, in deco
return f(*a, **kw)
File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py",
line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During handling
of the above exception, another exception occurred:Traceback (most recent call
last):
File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1230, in collect
sock_info = self._jdf.collectToPython()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1322, in __call__
return_value = get_return_value(
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
181, in deco
converted = convert_exception(e.java_exception)
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
129, in convert_exception
if is_instance_of(gw, e,
"org.apache.spark.sql.catalyst.parser.ParseException"):
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
464, in is_instance_of
return gateway.jvm.py4j.reflection.TypeUtil.isInstanceOf(
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1725, in __getattr__
raise Py4JError(message)
py4j.protocol.Py4JError: py4j does not exist in the JVMDuring handling of the
above exception, another exception occurred:ConnectionRefusedError: [Errno 111]
Connection refusedDuring handling of the above exception, another exception
occurred:Traceback (most recent call last):
File
"/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
line 646, in test_vectorized_udf_exception
self.check_vectorized_udf_exception()
File
"/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
line 659, in check_vectorized_udf_exception
df.select(raise_exception(col("id"))).collect()
File "/usr/lib/python3.9/unittest/case.py", line 239, in __exit__
self._raiseFailure('"{}" does not match "{}"'.format(
File "/usr/lib/python3.9/unittest/case.py", line 163, in _raiseFailure
raise self.test_case.failureException(msg)
AssertionError: "division( or modulo)? by zero" does not match "[Errno 111]
Connection refused" numPartitions = self._sc.defaultParallelism
File "/__w/spark/spark/python/pyspark/context.py", line 630, in
defaultParallelism
return self._jsc.sc().defaultParallelism()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1321, in __call__
answer = self.gateway_client.send_command(command)
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1036, in send_command
connection = self._get_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
284, in _get_connection
connection = self._create_new_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
291, in _create_new_connection
connection.connect_to_java_server()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
438, in connect_to_java_server
self.socket.connect((self.java_address, self.java_port))
ConnectionRefusedError: [Errno 111] Connection
refused======================================================================
ERROR [0.000s]: test_vectorized_udf_wrong_return_type
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
line 737, in test_vectorized_udf_wrong_return_type
with QuietTest(self.sc):
File "/__w/spark/spark/python/pyspark/testing/utils.py", line 119, in __init__
self.log4j = sc._jvm.org.apache.log4j
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1712, in __getattr__
answer = self._gateway_client.send_command(
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1036, in send_command
connection = self._get_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
284, in _get_connection
connection = self._create_new_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
291, in _create_new_connection
connection.connect_to_java_server()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
438, in connect_to_java_server
self.socket.connect((self.java_address, self.java_port))
ConnectionRefusedError: [Errno 111] Connection
refused======================================================================
ERROR [0.000s]: tearDownClass
(pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py",
line 1491, in tearDownClass
ReusedSQLTestCase.tearDownClass()
File "/__w/spark/spark/python/pyspark/testing/sqlutils.py", line 269, in
tearDownClass
super(ReusedSQLTestCase, cls).tearDownClass()
File "/__w/spark/spark/python/pyspark/testing/utils.py", line 154, in
tearDownClass
cls.sc.stop()
File "/__w/spark/spark/python/pyspark/context.py", line 654, in stop
self._jsc.stop()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1321, in __call__
answer = self.gateway_client.send_command(command)
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line
1036, in send_command
connection = self._get_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
284, in _get_connection
connection = self._create_new_connection()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
291, in _create_new_connection
connection.connect_to_java_server()
File
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line
438, in connect_to_java_server
self.socket.connect((self.java_address, self.java_port))
ConnectionRefusedError: [Errno 111] Connection
refused----------------------------------------------------------------------
Ran 56 tests in 65.816sFAILED (errors=27, skipped=1) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]