[
https://issues.apache.org/jira/browse/SPARK-50706?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated SPARK-50706:
-----------------------------------
Labels: pull-request-available (was: )
> Skip test_value_state_ttl_expiration in Coverage build
> ------------------------------------------------------
>
> Key: SPARK-50706
> URL: https://issues.apache.org/jira/browse/SPARK-50706
> Project: Spark
> Issue Type: Improvement
> Components: PySpark, Tests
> Affects Versions: 4.0.0
> Reporter: Hyukjin Kwon
> Priority: Major
> Labels: pull-request-available
>
> {code}
> ======================================================================
> ERROR [12.848s]: test_value_state_ttl_expiration
> (pyspark.sql.tests.pandas.test_pandas_transform_with_state.TransformWithStateInPandasTests.test_value_state_ttl_expiration)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py",
> line 403, in test_value_state_ttl_expiration
> q.processAllAvailable()
> File "/__w/spark/spark/python/pyspark/sql/streaming/query.py", line 351, in
> processAllAvailable
> return self._jsq.processAllAvailable()
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.8-src.zip/py4j/java_gateway.py",
> line 1355, in __call__
> return_value = get_return_value(
> ^^^^^^^^^^^^^^^^^
> File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line
> 253, in deco
> raise converted from None
> pyspark.errors.exceptions.captured.StreamingQueryException: [STREAM_FAILED]
> Query [id = 623e9008-52cb-4b9d-9343-432e7bd855bb, runId =
> cc06b909-37fd-4acd-98ff-8809b9df92c7] terminated with exception:
> [FOREACH_BATCH_USER_FUNCTION_ERROR] An error occurred in the user provided
> function in foreach batch sink. Reason: An exception was raised by the Python
> Proxy. Return Message: Traceback (most recent call last):
> File
> "/__w/spark/spark/python/lib/py4j-0.10.9.8-src.zip/py4j/clientserver.py",
> line 641, in _call_proxy
> return_value = getattr(self.pool[obj_id], method)(*params)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File "/__w/spark/spark/python/pyspark/sql/utils.py", line 157, in call
> raise e
> File "/__w/spark/spark/python/pyspark/sql/utils.py", line 154, in call
> self.func(DataFrame(jdf, wrapped_session_jdf), batch_id)
> File
> "/__w/spark/spark/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py",
> line 334, in check_results
> assertDataFrameEqual(
> File "/__w/spark/spark/python/pyspark/testing/utils.py", line 1074, in
> assertDataFrameEqual
> assert_rows_equal(actual_list, expected_list, maxErrors=maxErrors,
> showOnlyDiff=showOnlyDiff)
> File "/__w/spark/spark/python/pyspark/testing/utils.py", line 1030, in
> assert_rows_equal
> raise PySparkAssertionError(
> pyspark.errors.exceptions.base.PySparkAssertionError: [DIFFERENT_ROWS]
> Results do not match: ( 75.00000 % )
> *** actual ***
> Row(id='count-0', count=2)
> Row(id='count-1', count=2)
> ! Row(id='ttl-count-0', count=1)
> ! Row(id='ttl-count-1', count=1)
> ! Row(id='ttl-list-state-count-0', count=1)
> ! Row(id='ttl-list-state-count-1', count=1)
> ! Row(id='ttl-map-state-count-0', count=1)
> ! Row(id='ttl-map-state-count-1', count=1)
> *** expected ***
> Row(id='count-0', count=2)
> Row(id='count-1', count=2)
> ! Row(id='ttl-count-0', count=2)
> ! Row(id='ttl-count-1', count=2)
> ! Row(id='ttl-list-state-count-0', count=3)
> ! Row(id='ttl-map-state-count-0', count=1)
> ! Row(id='ttl-map-state-count-1', count=1)
> *** expected ***
> Row(id='count-0', count=2)
> Row(id='count-1', count=2)
> ! Row(id='ttl-count-0', count=2)
> ! Row(id='ttl-count-1', count=2)
> ! Row(id='ttl-list-state-count-0', count=3)
> ! Row(id='ttl-list-state-count-1', count=3)
> ! Row(id='ttl-map-state-count-0', count=2)
> ! Row(id='ttl-map-state-count-1', count=2)
> at py4j.Protocol.getReturnValue(Protocol.java:476)
> at
> py4j.reflection.PythonProxyHandler.invoke(PythonProxyHandler.java:108)
> at jdk.proxy3/jdk.proxy3.$Proxy45.call(Unknown Source)
> at
> org.apache.spark.sql.execution.streaming.sources.PythonForeachBatchHelper$.$anonfun$callForeachBatch$1(ForeachBatchSink.scala:85)
> at
> org.apache.spark.sql.execution.streaming.sources.PythonForeachBatchHelper$.$anonfun$callForeachBatch$1$adapted(ForeachBatchSink.scala:85)
> at
> org.apache.spark.sql.execution.streaming.sources.ForeachBatchSink.callBatchWriter(ForeachBatchSink.scala:54)
> at
> org.apache.spark.sql.execution.streaming.sources.ForeachBatchSink.addBatch(ForeachBatchSink.scala:47)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:869)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:162)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:268)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:124)
> at
> org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
> at
> org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:110)
> at
> org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:104)
> at
> org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:109)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:124)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:291)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:123)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:790)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:77)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:233)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:866)
> at
> org.apache.spark.sql.execution.streaming.ProgressContext.reportTimeTaken(ProgressReporter.scala:185)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:866)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$executeOneBatch$2(MicroBatchExecution.scala:387)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
> at
> org.apache.spark.sql.execution.streaming.ProgressContext.reportTimeTaken(ProgressReporter.scala:185)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.executeOneBatch(MicroBatchExecution.scala:357)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:337)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1$adapted(MicroBatchExecution.scala:337)
> at
> org.apache.spark.sql.execution.streaming.TriggerExecutor.runOneBatch(TriggerExecutor.scala:39)
> at
> org.apache.spark.sql.execution.streaming.TriggerExecutor.runOneBatch$(TriggerExecutor.scala:37)
> at
> org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.runOneBatch(TriggerExecutor.scala:70)
> at
> org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:82)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:337)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:337)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:790)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:311)
> ... 1 more
> ----------------------------------------------------------------------
> Ran 25 tests in 542.906s
> FAILED (errors=1)
> {code}
> https://github.com/apache/spark/actions/runs/12544995465/job/34978553717
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]