Yang Jie created SPARK-44825: -------------------------------- Summary: flaky pyspark tests Key: SPARK-44825 URL: https://issues.apache.org/jira/browse/SPARK-44825 Project: Spark Issue Type: Improvement Components: PySpark Affects Versions: 4.0.0 Reporter: Yang Jie
[https://github.com/apache/spark/actions/runs/5872185188/job/15923301107] {code:java} test_vectorized_udf_exception (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... malloc(): unsorted double linked list corrupted ---------------------------------------- Exception occurred during processing of request from ('127.0.0.1', 56342) Traceback (most recent call last): File "/usr/lib/python3.9/socketserver.py", line 316, in _handle_request_noblock self.process_request(request, client_address) File "/usr/lib/python3.9/socketserver.py", line 347, in process_request self.finish_request(request, client_address) File "/usr/lib/python3.9/socketserver.py", line 360, in finish_request self.RequestHandlerClass(request, client_address, self) File "/usr/lib/python3.9/socketserver.py", line 747, in __init__ self.handle() File "/__w/spark/spark/python/pyspark/accumulators.py", line 295, in handle poll(accum_updates) File "/__w/spark/spark/python/pyspark/accumulators.py", line 267, in poll if self.rfile in r and func(): File "/__w/spark/spark/python/pyspark/accumulators.py", line 271, in accum_updates num_updates = read_int(self.rfile) File "/__w/spark/spark/python/pyspark/serializers.py", line 596, in read_int raise EOFError EOFError ---------------------------------------- ERROR:root:Exception while sending command. Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 179, in deco return f(*a, **kw) File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value raise Py4JJavaError( py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During handling of the above exception, another exception occurred:Traceback (most recent call last): File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 516, in send_command raise Py4JNetworkError("Answer from Java side is empty") py4j.protocol.Py4JNetworkError: Answer from Java side is emptyDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1038, in send_command response = connection.send_command(command) File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 539, in send_command raise Py4JNetworkError( py4j.protocol.Py4JNetworkError: Error while sending or receiving ERROR (0.529s) test_vectorized_udf_invalid_length (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_map_type (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.002s) test_vectorized_udf_nested_struct (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_array (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_binary (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_boolean (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_byte (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_decimal (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_double (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_float (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_int (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_long (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_short (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_null_string (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_return_scalar (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.000s) test_vectorized_udf_return_timestamp_tz (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_string_in_udf (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_struct_complex (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_struct_empty (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_struct_type (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.002s) test_vectorized_udf_struct_with_empty_partition (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_timestamps (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.003s) test_vectorized_udf_timestamps_respect_session_timezone (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.002s) test_vectorized_udf_varargs (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.001s) test_vectorized_udf_wrong_return_type (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ... ERROR (0.000s)====================================================================== ERROR [0.529s]: test_vectorized_udf_exception (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 179, in deco return f(*a, **kw) File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value raise Py4JJavaError( py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>During handling of the above exception, another exception occurred:Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1230, in collect sock_info = self._jdf.collectToPython() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__ return_value = get_return_value( File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 181, in deco converted = convert_exception(e.java_exception) File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 129, in convert_exception if is_instance_of(gw, e, "org.apache.spark.sql.catalyst.parser.ParseException"): File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 464, in is_instance_of return gateway.jvm.py4j.reflection.TypeUtil.isInstanceOf( File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1725, in __getattr__ raise Py4JError(message) py4j.protocol.Py4JError: py4j does not exist in the JVMDuring handling of the above exception, another exception occurred:ConnectionRefusedError: [Errno 111] Connection refusedDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py", line 646, in test_vectorized_udf_exception self.check_vectorized_udf_exception() File "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py", line 659, in check_vectorized_udf_exception df.select(raise_exception(col("id"))).collect() File "/usr/lib/python3.9/unittest/case.py", line 239, in __exit__ self._raiseFailure('"{}" does not match "{}"'.format( File "/usr/lib/python3.9/unittest/case.py", line 163, in _raiseFailure raise self.test_case.failureException(msg) AssertionError: "division( or modulo)? by zero" does not match "[Errno 111] Connection refused" numPartitions = self._sc.defaultParallelism File "/__w/spark/spark/python/pyspark/context.py", line 630, in defaultParallelism return self._jsc.sc().defaultParallelism() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1321, in __call__ answer = self.gateway_client.send_command(command) File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1036, in send_command connection = self._get_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 284, in _get_connection connection = self._create_new_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 291, in _create_new_connection connection.connect_to_java_server() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 438, in connect_to_java_server self.socket.connect((self.java_address, self.java_port)) ConnectionRefusedError: [Errno 111] Connection refused====================================================================== ERROR [0.000s]: test_vectorized_udf_wrong_return_type (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py", line 737, in test_vectorized_udf_wrong_return_type with QuietTest(self.sc): File "/__w/spark/spark/python/pyspark/testing/utils.py", line 119, in __init__ self.log4j = sc._jvm.org.apache.log4j File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1712, in __getattr__ answer = self._gateway_client.send_command( File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1036, in send_command connection = self._get_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 284, in _get_connection connection = self._create_new_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 291, in _create_new_connection connection.connect_to_java_server() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 438, in connect_to_java_server self.socket.connect((self.java_address, self.java_port)) ConnectionRefusedError: [Errno 111] Connection refused====================================================================== ERROR [0.000s]: tearDownClass (pyspark.sql.tests.pandas.test_pandas_udf_scalar.ScalarPandasUDFTests) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py", line 1491, in tearDownClass ReusedSQLTestCase.tearDownClass() File "/__w/spark/spark/python/pyspark/testing/sqlutils.py", line 269, in tearDownClass super(ReusedSQLTestCase, cls).tearDownClass() File "/__w/spark/spark/python/pyspark/testing/utils.py", line 154, in tearDownClass cls.sc.stop() File "/__w/spark/spark/python/pyspark/context.py", line 654, in stop self._jsc.stop() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1321, in __call__ answer = self.gateway_client.send_command(command) File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1036, in send_command connection = self._get_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 284, in _get_connection connection = self._create_new_connection() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 291, in _create_new_connection connection.connect_to_java_server() File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/clientserver.py", line 438, in connect_to_java_server self.socket.connect((self.java_address, self.java_port)) ConnectionRefusedError: [Errno 111] Connection refused---------------------------------------------------------------------- Ran 56 tests in 65.816sFAILED (errors=27, skipped=1) {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org