voonhous commented on PR #8418: URL: https://github.com/apache/hudi/pull/8418#issuecomment-1511106294
Am struggling with the Github CI: ``` 2023-04-17T09:00:40.6653644Z 918536 [task-result-getter-0] ERROR org.apache.spark.scheduler.TaskSetManager [] - Task 0 in stage 2366.0 failed 1 times; aborting job 2023-04-17T09:00:40.8838862Z 918673 [Executor task launch worker for task 1.0 in stage 2366.0 (TID 3438)] ERROR org.apache.spark.executor.Executor [] - Exception in task 1.0 in stage 2366.0 (TID 3438) 2023-04-17T09:00:40.8840609Z java.lang.ClassCastException: org.apache.spark.sql.vectorized.ColumnarBatchRow cannot be cast to org.apache.spark.sql.vectorized.ColumnarBatch 2023-04-17T09:00:40.8841616Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.next(DataSourceScanExec.scala:528) ~[spark-sql_2.12-3.2.3.jar:0.14.0-SNAPSHOT] 2023-04-17T09:00:40.8842408Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.next(DataSourceScanExec.scala:517) ~[spark-sql_2.12-3.2.3.jar:0.14.0-SNAPSHOT] 2023-04-17T09:00:40.8843112Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) ~[?:?] 2023-04-17T09:00:40.8843866Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?] 2023-04-17T09:00:40.8844701Z at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.2.3.jar:0.14.0-SNAPSHOT] 2023-04-17T09:00:40.8845512Z at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759) ~[spark-sql_2.12-3.2.3.jar:0.14.0-SNAPSHOT] 2023-04-17T09:00:40.8846189Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ~[scala-library-2.12.10.jar:?] 2023-04-17T09:00:40.8846741Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ~[scala-library-2.12.10.jar:?] 2023-04-17T09:00:40.8847270Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ~[scala-library-2.12.10.jar:?] 2023-04-17T09:00:40.8847824Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ~[scala-library-2.12.10.jar:?] 2023-04-17T09:00:40.8848354Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) ~[scala-library-2.12.10.jar:?] 2023-04-17T09:00:40.8849015Z at org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8849674Z at org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:306) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8850316Z at org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:304) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8850941Z at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8851886Z at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8852535Z at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8853174Z at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8853737Z at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8854385Z at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8854959Z at org.apache.spark.scheduler.Task.run(Task.scala:131) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8855554Z at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8856150Z at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8856720Z at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) ~[spark-core_2.12-3.2.3.jar:3.2.3] 2023-04-17T09:00:40.8857327Z at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_362] 2023-04-17T09:00:40.8857840Z at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_362] 2023-04-17T09:00:40.8858350Z at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_362] 2023-04-17T09:01:07.0036050Z - Test Call run_clustering Procedure Order Strategy *** FAILED *** 2023-04-17T09:01:07.0038126Z java.util.concurrent.CompletionException: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2366.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2366.0 (TID 3437) (fv-az400-928.4sktce1ui1juvaou3g2cooit5e.bx.internal.cloudapp.net executor driver): java.lang.ClassCastException: org.apache.spark.sql.vectorized.ColumnarBatchRow cannot be cast to org.apache.spark.sql.vectorized.ColumnarBatch 2023-04-17T09:01:07.0039313Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.next(DataSourceScanExec.scala:528) 2023-04-17T09:01:07.0039947Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.next(DataSourceScanExec.scala:517) 2023-04-17T09:01:07.0040676Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) 2023-04-17T09:01:07.0041487Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) 2023-04-17T09:01:07.0042213Z at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) 2023-04-17T09:01:07.0042873Z at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:759) 2023-04-17T09:01:07.0043424Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) 2023-04-17T09:01:07.0043856Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) 2023-04-17T09:01:07.0044301Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) 2023-04-17T09:01:07.0044743Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) 2023-04-17T09:01:07.0045169Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) 2023-04-17T09:01:07.0045675Z at org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41) 2023-04-17T09:01:07.0046244Z at org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:306) 2023-04-17T09:01:07.0046765Z at org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:304) 2023-04-17T09:01:07.0075476Z at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915) 2023-04-17T09:01:07.0076081Z at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915) 2023-04-17T09:01:07.0076644Z at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) 2023-04-17T09:01:07.0077198Z at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) 2023-04-17T09:01:07.0100079Z at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) 2023-04-17T09:01:07.0100905Z at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) 2023-04-17T09:01:07.0101488Z at org.apache.spark.scheduler.Task.run(Task.scala:131) 2023-04-17T09:01:07.0101994Z at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) 2023-04-17T09:01:07.0102506Z at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491) 2023-04-17T09:01:07.0102994Z at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) 2023-04-17T09:01:07.0108497Z at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 2023-04-17T09:01:07.0109167Z at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 2023-04-17T09:01:07.0109625Z at java.lang.Thread.run(Thread.java:750) ``` Ran the test locally and it's passing. The stack trace doesn't look like it's caused by flaky tests though. T.T -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
