[ 
https://issues.apache.org/jira/browse/KYLIN-3537?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16773183#comment-16773183
 ] 

ASF GitHub Bot commented on KYLIN-3537:
---------------------------------------

Sidonet commented on pull request #474: KYLIN-3537
URL: https://github.com/apache/kylin/pull/474
 
 
   Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
   add HBase dependicies to avoid java.lang.NoClassDefFoundError
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
> --------------------------------------------------------
>
>                 Key: KYLIN-3537
>                 URL: https://issues.apache.org/jira/browse/KYLIN-3537
>             Project: Kylin
>          Issue Type: Bug
>         Environment: HDP3.0
>            Reporter: Lijun Cao
>            Assignee: Lijun Cao
>            Priority: Major
>         Attachments: KYLIN-3537.master.001.patch
>
>
> *The log on Yarn:*
> 18/09/05 03:34:00 INFO scheduler.DAGScheduler: Job 0 failed: 
> saveAsNewAPIHadoopDataset at SparkCubeHFile.java:227, took 13.734642 s
> 18/09/05 03:34:00 ERROR yarn.ApplicationMaster: User class threw exception: 
> java.lang.RuntimeException: error execute 
> org.apache.kylin.storage.hbase.steps.SparkCubeHFile
> java.lang.RuntimeException: error execute 
> org.apache.kylin.storage.hbase.steps.SparkCubeHFile
>       at 
> org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42)
>       at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:498)
>       at 
> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:636)
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in 
> stage 1.0 (TID 14, ignite02.com, executor 2): java.lang.NoClassDefFoundError: 
> org/apache/hadoop/hbase/io/hfile/HFileWriterImpl
>       at 
> org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209)
>       at 
> org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
>       at org.apache.spark.scheduler.Task.run(Task.scala:99)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> Driver stacktrace:
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>       at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
>       at scala.Option.foreach(Option.scala:257)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
>       at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>       at 
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:1928)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:1941)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:1961)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>       at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
>       at 
> org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085)
>       at 
> org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopDataset(JavaPairRDD.scala:831)
>       at 
> org.apache.kylin.storage.hbase.steps.SparkCubeHFile.execute(SparkCubeHFile.java:227)
>       at 
> org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:37)
>       ... 6 more
> Caused by: java.lang.NoClassDefFoundError: 
> org/apache/hadoop/hbase/io/hfile/HFileWriterImpl
>       at 
> org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209)
>       at 
> org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119)
>       at 
> org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
>       at org.apache.spark.scheduler.Task.run(Task.scala:99)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> *The Spark submission command is:*
> export HADOOP_CONF_DIR=/usr/hdp/3.0.0.0-1634/hadoop/conf && 
> /root/apache-kylin-2.5.0-SNAPSHOT-bin/spark/bin/spark-submit 
> --class org.apache.kylin.common.util.SparkEntry  
> --conf spark.executor.instances=40  
> --conf spark.yarn.queue=default  
> --conf spark.yarn.am.extraJavaOptions=-Dhdp.version=current  
> --conf spark.history.fs.logDirectory=hdfs:///kylin/spark-history  
> --conf spark.driver.extraJavaOptions=-Dhdp.version=current  
> --conf spark.master=yarn  
> --conf spark.executor.extraJavaOptions=-Dhdp.version=current  
> --conf spark.hadoop.yarn.timeline-service.enabled=false  
> --conf spark.executor.memory=4G  
> --conf spark.eventLog.enabled=true  
> --conf spark.eventLog.dir=hdfs:///kylin/spark-history  
> --conf spark.yarn.executor.memoryOverhead=1024  
> --conf spark.driver.memory=2G  
> --conf spark.submit.deployMode=cluster  
> --conf spark.shuffle.service.enabled=true 
> --jars 
> /usr/hdp/3.0.0.0-1634/hbase/lib/hbase-common-2.0.0.3.0.0.0-1634.jar,/usr/hdp/3.0.0.0-1634/hbase/lib/hbase-mapreduce-2.0.0.3.0.0.0-1634.jar,/usr/hdp/3.0.0.0-1634/hbase/lib/hbase-client-2.0.0.3.0.0.0-1634.jar,/usr/hdp/3.0.0.0-1634/hbase/lib/hbase-protocol-2.0.0.3.0.0.0-1634.jar,/usr/hdp/3.0.0.0-1634/hbase/lib/hbase-hadoop-compat-2.0.0.3.0.0.0-1634.jar,/usr/hdp/3.0.0.0-1634/hbase/lib/htrace-core-3.2.0-incubating.jar,
>  /root/apache-kylin-2.5.0-SNAPSHOT-bin/lib/kylin-job-2.5.0-SNAPSHOT.jar 
> -className org.apache.kylin.storage.hbase.steps.SparkCubeHFile 
> -output 
> hdfs://ignite01.com:8020/kylin/kylin_metadata/kylin-2fd7b157-a6af-72fa-5a25-350e92056b20/kylin_sales_cube_clone_spark/hfile
>  
> -partitions 
> hdfs://ignite01.com:8020/kylin/kylin_metadata/kylin-2fd7b157-a6af-72fa-5a25-350e92056b20/kylin_sales_cube_clone_spark/rowkey_stats/part-r-00000_hfile
>  
> -input 
> hdfs://ignite01.com:8020/kylin/kylin_metadata/kylin-2fd7b157-a6af-72fa-5a25-350e92056b20/kylin_sales_cube_clone_spark/cuboid/
>  
> -segmentId 8588caa3-df7f-5176-aeda-2ef6642b9698 
> -metaUrl 
> kylin_metadata@hdfs,path=hdfs://ignite01.com:8020/kylin/kylin_metadata/kylin-2fd7b157-a6af-72fa-5a25-350e92056b20/kylin_sales_cube_clone_spark/metadata
>  
> -cubename kylin_sales_cube_clone_spark -hbaseConfPath 
> hdfs://ignite01.com:8020/kylin/kylin_metadata/kylin-2fd7b157-a6af-72fa-5a25-350e92056b20/hbase-conf.xml



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to