After fixing the above issue by updating spark_home to point to emr spark.
i get the following. still appears to be a spark versioning issue...
Driver stacktrace:
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1753)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1741)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1740)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1740)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:871)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:871)
at scala.Option.foreach(Option.scala:257)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:871)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1974)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1923)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1912)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:682)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at
org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083)
at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081)
at
org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at
org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1081)
at
org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopDataset(JavaPairRDD.scala:831)
at
org.apache.kylin.engine.spark.SparkCubingByLayer.saveToHDFS(SparkCubingByLayer.java:277)
at
org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:230)
at
org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37)
at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.IllegalArgumentException: Class is not
registered: org.apache.spark.internal.io.FileCommitProtocol$TaskCommitMessage
Note: To register this class use:
kryo.register(org.apache.spark.internal.io.FileCommitProtocol$TaskCommitMessage.class);
at com.esotericsoftware.kryo.Kryo.getRegistration(Kryo.java:488)
at com.twitter.chill.KryoBase.getRegistration(KryoBase.scala:52)
at
com.esotericsoftware.kryo.util.DefaultClassResolver.writeClass(DefaultClassResolver.java:97)
at com.esotericsoftware.kryo.Kryo.writeClass(Kryo.java:517)
at com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:622)
at
org.apache.spark.serializer.KryoSerializerInstance.serialize(KryoSerializer.scala:347)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:393)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
On Tue, Aug 28, 2018 at 8:11 AM Sonny Heer <[email protected]> wrote:
> Unable to build cube at step "#6 Step Name: Build Cube with Spark"
>
> Looks to be a classpath issue with spark not able to find some amazon emr
> libs. when i look in spark defaults /etc/spark/conf i do see the classpath
> being set correctly.
>
> any ideas?
>
>
> -------------
>
> Exception in thread "main" java.lang.RuntimeException: error execute
> org.apache.kylin.engine.spark.SparkCubingByLayer
> at
> org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42)
> at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:744)
> at
> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException:
> Class com.amazon.ws.emr.hadoop.fs.EmrFileSystem not found
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
> at
> org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2654)
> at
> org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
> at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
> at
> org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
> at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
> at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
> at
> org.apache.kylin.common.util.HadoopUtil.deletePath(HadoopUtil.java:133)
> at
> org.apache.kylin.engine.spark.SparkCubingByLayer.execute(SparkCubingByLayer.java:142)
> at
> org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37)
> ... 10 more
> Caused by: java.lang.ClassNotFoundException: Class
> com.amazon.ws.emr.hadoop.fs.EmrFileSystem not found
> at
> org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
> at
> org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193)
> ... 19 more
>
>