[ https://issues.apache.org/jira/browse/PIG-5412?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17457790#comment-17457790 ]
Rohini Palaniswamy commented on PIG-5412: ----------------------------------------- +1 > testSkewedJoinOuter spark unit-test failing with ClassNotFoundException > ----------------------------------------------------------------------- > > Key: PIG-5412 > URL: https://issues.apache.org/jira/browse/PIG-5412 > Project: Pig > Issue Type: Bug > Components: spark > Reporter: Koji Noguchi > Assignee: Koji Noguchi > Priority: Minor > Attachments: pig-5412-v01.patch > > > \{TestSkewedJoin,TestJoinSmoke}.testSkewedJoinOuter > both with {{-Dtest.exec.type=spark -Dsparkversion=2}} > are somehow failing with > "java.lang.ClassNotFoundException: > org.apache.pig.backend.hadoop.executionengine.spark.Spark1Shims" > {noformat} > Unable to open iterator for alias C. Backend error : Job aborted. > org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to > open iterator for alias C. Backend error : Job aborted. > at org.apache.pig.PigServer.openIterator(PigServer.java:1014) > at > org.apache.pig.test.TestJoinSmoke.testSkewedJoinOuter(TestJoinSmoke.java:199) > Caused by: org.apache.spark.SparkException: Job aborted. > at > org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:100) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1081) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1000) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:991) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:991) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) > at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:991) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.StoreConverter.convert(StoreConverter.java:99) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.StoreConverter.convert(StoreConverter.java:56) > at > org.apache.pig.backend.hadoop.executionengine.spark.JobGraphBuilder.physicalToRDD(JobGraphBuilder.java:292) > at > org.apache.pig.backend.hadoop.executionengine.spark.JobGraphBuilder.sparkOperToRDD(JobGraphBuilder.java:182) > at > org.apache.pig.backend.hadoop.executionengine.spark.JobGraphBuilder.visitSparkOp(JobGraphBuilder.java:112) > at > org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator.visit(SparkOperator.java:140) > at > org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator.visit(SparkOperator.java:37) > at > org.apache.pig.impl.plan.DependencyOrderWalker.walk(DependencyOrderWalker.java:87) > at org.apache.pig.impl.plan.PlanVisitor.visit(PlanVisitor.java:46) > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkLauncher.launchPig(SparkLauncher.java:240) > at > org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.launchPig(HExecutionEngine.java:290) > at org.apache.pig.PigServer.launchPlan(PigServer.java:1479) > at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1464) > at org.apache.pig.PigServer.storeEx(PigServer.java:1123) > at org.apache.pig.PigServer.store(PigServer.java:1086) > at org.apache.pig.PigServer.openIterator(PigServer.java:999) > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 19.0 failed 4 times, most recent failure: Lost task 0.3 in > stage 19.0 (TID 26, gsrd466n11.red.ygrid.yahoo.com, executor 2): > org.apache.spark.SparkException: Task failed while writing rows > at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:157) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: > org.apache.pig.backend.hadoop.executionengine.spark.Spark1Shims > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.getInstance(SparkShims.java:74) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.SkewedJoinConverter$ToValueFunction$Tuple2TransformIterable$1.transform(SkewedJoinConverter.java:215) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.SkewedJoinConverter$ToValueFunction$Tuple2TransformIterable$1.transform(SkewedJoinConverter.java:176) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.IteratorTransform.next(IteratorTransform.java:37) > at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:131) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:129) > at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) > at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:141) > Caused by: java.lang.ClassNotFoundException: > org.apache.pig.backend.hadoop.executionengine.spark.Spark1Shims > at java.lang.ClassLoader.findClass(ClassLoader.java:523) > at > org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.java:35) > at java.lang.ClassLoader.loadClass(ClassLoader.java:418) > at > org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.java:40) > at > org.apache.spark.util.ChildFirstURLClassLoader.loadClass(ChildFirstURLClassLoader.java:48) > at java.lang.ClassLoader.loadClass(ClassLoader.java:351) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:264) > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.loadShims(SparkShims.java:54) > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.getInstance(SparkShims.java:72) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1913) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1912) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2088) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2120) > at > org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78) > Caused by: org.apache.spark.SparkException: Task failed while writing rows > at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:157) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:123) > at > org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:411) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: > org.apache.pig.backend.hadoop.executionengine.spark.Spark1Shims > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.getInstance(SparkShims.java:74) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.SkewedJoinConverter$ToValueFunction$Tuple2TransformIterable$1.transform(SkewedJoinConverter.java:215) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.SkewedJoinConverter$ToValueFunction$Tuple2TransformIterable$1.transform(SkewedJoinConverter.java:176) > at > org.apache.pig.backend.hadoop.executionengine.spark.converter.IteratorTransform.next(IteratorTransform.java:37) > at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:410) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:131) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:129) > at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394) > at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:141) > Caused by: java.lang.ClassNotFoundException: > org.apache.pig.backend.hadoop.executionengine.spark.Spark1Shims > at java.lang.ClassLoader.findClass(ClassLoader.java:523) > at > org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.java:35) > at java.lang.ClassLoader.loadClass(ClassLoader.java:418) > at > org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.java:40) > at > org.apache.spark.util.ChildFirstURLClassLoader.loadClass(ChildFirstURLClassLoader.java:48) > at java.lang.ClassLoader.loadClass(ClassLoader.java:351) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:264) > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.loadShims(SparkShims.java:54) > at > org.apache.pig.backend.hadoop.executionengine.spark.SparkShims.getInstance(SparkShims.java:72) > 1.720 > TestSkewedJoin testSkewedJoinOuter Error > {noformat} -- This message was sent by Atlassian Jira (v8.20.1#820001)