bkosuru opened a new issue, #5741: URL: https://github.com/apache/hudi/issues/5741
Hello, I was trying to copy hdfs based hudi table created with 0.8.0 to upgrade to 0.11.0 as a workaround for issue https://github.com/apache/hudi/issues/5569 There are about 50 partitions and the copy failed after 10 partitions with an exception(see Stacktrace at the end) The table size is 14TB. I am trying to copy one partition at a time in a loop. I calculate parallelism for each partition in order to minimize the number of parquet files. The failed partition had 143 sub partitions and 435MB. Parallelism for this partition turned out to be 1. There are no issues if I copy using 0.8.0. **To Reproduce** Steps to reproduce the behavior: 1. Code to copy ``` val df = spark.read.format("hudi").load(input) val sdf = df.select("s", "p", "o", "g", "isDeleted") val count = sdf.count() rowsPerFile = 5000000 val parallelism = Math.max(1, Math.ceil(count / rowsPerFile)).toInt Spark settings: new SparkConf() .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.ui.enabled", "false") .set("spark.sql.parquet.mergeSchema", "false") .set("spark.sql.files.ignoreCorruptFiles", "true") .set("spark.sql.hive.convertMetastoreParquet", "false") spark-submit: spark-submit --master yarn --deploy-mode cluster --name kg-copy --driver-memory 24G --executor-memory 50G --executor-cores 6 --num-executors 500 --conf spark.dynamicAllocation.enabled=False --conf spark.network.timeout=240s --conf spark.shuffle.sasl.timeout=60000 --conf spark.driver.maxResultSize=20g --conf spark.port.maxRetries=60 --conf spark.shuffle.service.enabled=True --conf spark.sql.shuffle.partitions=3000 --conf "spark.driver.extraJavaOptions=-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/hoodie-heapdump.hprof" --conf "spark.executor.extraJavaOptions=-XX:NewSize=1g -XX:SurvivorRatio=2 -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:CMSInitiatingOccupancyFraction=70 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/hoodie-heapdump.hprof" --conf spark.driver.memoryOverhead=1024 --conf spark.executor.memoryOverhead=3072 --conf spark.yarn.max.executor.failures=100 --conf spark.kryoserializer.buffer.max=512m --conf spark.task.maxFailures=4 --conf spark.rdd.compress=True private val AVG_RECORD_SIZE: Int = 256 // approx bytes of our average record, contra Hudi default assumption of 1024 private val ONE_GIGABYTE: Int = 1024 * 1024 * 1024 // used for Parquet file size & block size private val BLOOM_MAX_ENTRIES: Int = ONE_GIGABYTE / (2 * AVG_RECORD_SIZE) sdf.write .format("hudi") // DataSourceWriteOptions .option(HIVE_STYLE_PARTITIONING_OPT_KEY, "true") .option( KEYGENERATOR_CLASS_OPT_KEY,"com.xyz.SpoKeyGenerator") .option(OPERATION_OPT_KEY, INSERT_OPERATION_OPT_VAL) .option(INSERT_DROP_DUPS_OPT_KEY, value = false) .option(INSERT_PARALLELISM, parallelism) .option(PARTITIONPATH_FIELD_OPT_KEY, "g,p") .option(PRECOMBINE_FIELD_OPT_KEY, "isDeleted") .option(RECORDKEY_FIELD_OPT_KEY, "s,o") .option(URL_ENCODE_PARTITIONING_OPT_KEY, value = true) // HoodieCompactionConfig .option(COPY_ON_WRITE_RECORD_SIZE_ESTIMATE.key, 64) // HoodieIndexConfig .option(HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES, BLOOM_MAX_ENTRIES) .option(BLOOM_INDEX_FILTER_TYPE, BloomFilterTypeCode.DYNAMIC_V0.name) .option(INDEX_TYPE.key, HoodieIndex.IndexType.BLOOM.name) // HoodieStorageConfig .option(LOGFILE_SIZE_MAX_BYTES, ONE_GIGABYTE / 0.35) .option(PARQUET_BLOCK_SIZE_BYTES, ONE_GIGABYTE) .option(PARQUET_FILE_MAX_BYTES,ONE_GIGABYTE) // Commit history .option(CLEANER_COMMITS_RETAINED_PROP, Integer.MAX_VALUE - 2) .option(MIN_COMMITS_TO_KEEP_PROP, Integer.MAX_VALUE - 1) .option(MAX_COMMITS_TO_KEEP_PROP, Integer.MAX_VALUE) // HoodieWriteConfig .option(EMBEDDED_TIMELINE_SERVER_ENABLED, "false") .option(TABLE_NAME, "spog") .mode(SaveMode.Append) class SpoKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props) { def hash128(s: String): String = { val h: Array[Long] = MurmurHash3.hash128(s.getBytes) h(0).toString + h(1).toString } override def getRecordKey(record: GenericRecord): String = { val s = HoodieAvroUtils.getNestedFieldValAsString(record, "s", false, false) val o = HoodieAvroUtils.getNestedFieldValAsString(record, "o", false, false) genKey(s, o) } private def genKey(s: String, o: String): String = hash128(s + o) override def getRecordKey(row: Row): String = { val s = row.getAs(0).toString val o = row.getAs(1).toString genKey(s, o) } } ``` **Expected behavior** A clear and concise description of what you expected to happen. **Environment Description** * Hudi version : 0.11.0 * Spark version : 2.4.4 * Hive version : * Hadoop version : * Storage (HDFS/S3/GCS..) : HDFS * Running on Docker? (yes/no) : no **Additional context** Add any other context about the problem here. **Stacktrace** User class threw exception: org.apache.hudi.exception.HoodieUpsertException: Failed to upsert for commit time 20220601212022721 at org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:64) at org.apache.hudi.table.action.commit.SparkInsertCommitActionExecutor.execute(SparkInsertCommitActionExecutor.java:45) at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.insert(HoodieSparkCopyOnWriteTable.java:118) at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.insert(HoodieSparkCopyOnWriteTable.java:97) at org.apache.hudi.client.SparkRDDWriteClient.insert(SparkRDDWriteClient.java:182) at org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:211) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:304) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:163) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:677) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:677) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:677) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) at com.abc.kg.common.HudiUtils$.insert(HudiUtils.scala:83) at com.abc.kg.copy.Copy$$anonfun$copy$1.apply(Copy.scala:104) at com.abc.kg.copy.Copy$$anonfun$copy$1.apply(Copy.scala:83) at scala.collection.immutable.List.foreach(List.scala:392) at com.abc.kg.copy.Copy$.copy(Copy.scala:83) at com.abc.kg.copy.Copy$.main(Copy.scala:116) at com.abc.kg.copy.Copy.main(Copy.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:673) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 61 in stage 13.0 failed 4 times, most recent failure: Lost task 61.3 in stage 13.0 (TID 480773, us1salxhpw1324.corpnet2.com, executor 6): java.lang.NoSuchMethodError: org.apache.hadoop.hdfs.client.HdfsDataInputStream.getReadStatistics()Lorg/apache/hadoop/hdfs/DFSInputStream$ReadStatistics; at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.updateInputStreamStatistics(FSDataInputStreamWrapper.java:249) at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.close(FSDataInputStreamWrapper.java:296) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.closeStreams(HFileBlock.java:1825) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFilePreadReader.close(HFilePreadReader.java:107) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.close(HFileReaderImpl.java:1421) at org.apache.hudi.io.storage.HoodieHFileReader.close(HoodieHFileReader.java:218) at org.apache.hudi.metadata.HoodieBackedTableMetadata.closeReader(HoodieBackedTableMetadata.java:574) at org.apache.hudi.metadata.HoodieBackedTableMetadata.close(HoodieBackedTableMetadata.java:567) at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$0(HoodieBackedTableMetadata.java:225) at java.util.HashMap.forEach(HashMap.java:1289) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:200) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:140) at org.apache.hudi.metadata.BaseTableMetadata.fetchAllFilesInPartition(BaseTableMetadata.java:312) at org.apache.hudi.metadata.BaseTableMetadata.getAllFilesInPartition(BaseTableMetadata.java:135) at org.apache.hudi.metadata.HoodieMetadataFileSystemView.listPartition(HoodieMetadataFileSystemView.java:65) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$ensurePartitionLoadedCorrectly$9(AbstractTableFileSystemView.java:304) at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.ensurePartitionLoadedCorrectly(AbstractTableFileSystemView.java:295) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.getLatestBaseFilesBeforeOrOn(AbstractTableFileSystemView.java:502) at org.apache.hudi.table.action.commit.UpsertPartitioner.getSmallFiles(UpsertPartitioner.java:291) at org.apache.hudi.table.action.commit.UpsertPartitioner.lambda$getSmallFilesForPartitions$f1d92f9e$1(UpsertPartitioner.java:272) at org.apache.spark.api.java.JavaPairRDD$$anonfun$pairFunToScalaFun$1.apply(JavaPairRDD.scala:1043) at org.apache.spark.api.java.JavaPairRDD$$anonfun$pairFunToScalaFun$1.apply(JavaPairRDD.scala:1043) at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) at scala.collection.Iterator$class.foreach(Iterator.scala:891) at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1334) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1334) at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:967) at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:967) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1892) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1880) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1879) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:930) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:930) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:930) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2113) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2062) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2051) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:741) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2081) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2102) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2121) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2146) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:967) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) at org.apache.spark.rdd.RDD.collect(RDD.scala:966) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$collectAsMap$1.apply(PairRDDFunctions.scala:743) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$collectAsMap$1.apply(PairRDDFunctions.scala:742) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) at org.apache.spark.rdd.PairRDDFunctions.collectAsMap(PairRDDFunctions.scala:742) at org.apache.spark.api.java.JavaPairRDD.collectAsMap(JavaPairRDD.scala:661) at org.apache.hudi.table.action.commit.UpsertPartitioner.getSmallFilesForPartitions(UpsertPartitioner.java:272) at org.apache.hudi.table.action.commit.UpsertPartitioner.assignInserts(UpsertPartitioner.java:167) at org.apache.hudi.table.action.commit.UpsertPartitioner.<init>(UpsertPartitioner.java:95) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getUpsertPartitioner(BaseSparkCommitActionExecutor.java:397) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getInsertPartitioner(BaseSparkCommitActionExecutor.java:401) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getPartitioner(BaseSparkCommitActionExecutor.java:219) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:163) at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:85) at org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:57) ... 40 more Caused by: java.lang.NoSuchMethodError: org.apache.hadoop.hdfs.client.HdfsDataInputStream.getReadStatistics()Lorg/apache/hadoop/hdfs/DFSInputStream$ReadStatistics; at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.updateInputStreamStatistics(FSDataInputStreamWrapper.java:249) at org.apache.hudi.org.apache.hadoop.hbase.io.FSDataInputStreamWrapper.close(FSDataInputStreamWrapper.java:296) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.closeStreams(HFileBlock.java:1825) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFilePreadReader.close(HFilePreadReader.java:107) at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.close(HFileReaderImpl.java:1421) at org.apache.hudi.io.storage.HoodieHFileReader.close(HoodieHFileReader.java:218) at org.apache.hudi.metadata.HoodieBackedTableMetadata.closeReader(HoodieBackedTableMetadata.java:574) at org.apache.hudi.metadata.HoodieBackedTableMetadata.close(HoodieBackedTableMetadata.java:567) at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$0(HoodieBackedTableMetadata.java:225) at java.util.HashMap.forEach(HashMap.java:1289) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:200) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:140) at org.apache.hudi.metadata.BaseTableMetadata.fetchAllFilesInPartition(BaseTableMetadata.java:312) at org.apache.hudi.metadata.BaseTableMetadata.getAllFilesInPartition(BaseTableMetadata.java:135) at org.apache.hudi.metadata.HoodieMetadataFileSystemView.listPartition(HoodieMetadataFileSystemView.java:65) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$ensurePartitionLoadedCorrectly$9(AbstractTableFileSystemView.java:304) at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.ensurePartitionLoadedCorrectly(AbstractTableFileSystemView.java:295) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.getLatestBaseFilesBeforeOrOn(AbstractTableFileSystemView.java:502) at org.apache.hudi.table.action.commit.UpsertPartitioner.getSmallFiles(UpsertPartitioner.java:291) at org.apache.hudi.table.action.commit.UpsertPartitioner.lambda$getSmallFilesForPartitions$f1d92f9e$1(UpsertPartitioner.java:272) at org.apache.spark.api.java.JavaPairRDD$$anonfun$pairFunToScalaFun$1.apply(JavaPairRDD.scala:1043) at org.apache.spark.api.java.JavaPairRDD$$anonfun$pairFunToScalaFun$1.apply(JavaPairRDD.scala:1043) at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) at scala.collection.Iterator$class.foreach(Iterator.scala:891) at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1334) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1334) at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:967) at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:967) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
