Yes,it work thanks。 follow wiki quick start,use spark-default.conf , should config carbondata.properties but i use spark-shell , not include carbon.properties in it.
2016-12-09 11:48 GMT+08:00 Liang Chen [via Apache CarbonData Mailing List archive] <ml-node+s1130556n3973...@n5.nabble.com>: > Hi > > Have you solved this issue after applying new configurations? > > Regards > Liang > > geda wrote > hello: > i test data in spark locak model ,then load data inpath to table ,works > well. > but when i use yarn-client modle, with 1w rows , size :940k ,but error > happend ,there is no lock find in tmp dir ,i don't know how to > debug,help.thanks. > spark1.6 hadoop 2.7|2.6 carbondata 0.2 > local mode: run ok > $SPARK_HOME/bin/spark-shell --master local[4] --jars /usr/local/spark/lib/ > carbondata_2.10-0.2.0-incubating-shade-hadoop2.7.1.jar > > > yarn command : run bad > $SPARK_HOME/bin/spark-shell --verbose --master yarn-client > --driver-memory 1G --driver-cores 1 --executor-memory 4G --num-executors > 5 --executor-cores 1 --conf "spark.executor.extraJavaOptions=-XX:NewRatio=2 > -XX:PermSize=512m -XX:MaxPermSize=512m -XX:SurvivorRatio=6 -verbose:gc > -XX:-PrintGCDetails -XX:+PrintGCTimeStamps " --conf "spark.driver. > extraJavaOptions=-XX:MaxPermSize=512m -XX:PermSize=512m" --conf > spark.yarn.driver.memoryOverhead=1024 --conf > spark.yarn.executor.memoryOverhead=3096 > --jars /usr/local/spark/lib/carbondata_2.10-0.2.0- > incubating-shade-hadoop2.7.1.jar > > import java.io._ > import org.apache.hadoop.hive.conf.HiveConf > import org.apache.spark.sql.CarbonContext > val storePath = "hdfs://test:8020/usr/carbondata/store" > val cc = new CarbonContext(sc, storePath) > cc.setConf(HiveConf.ConfVars.HIVECHECKFILEFORMAT.varname, "false") > cc.setConf("carbon.kettle.home","/usr/local/spark/carbondata/carbonplugins") > > cc.sql("CREATE TABLE `LINEORDER3` ( LO_ORDERKEY bigint, > LO_LINENUMBER int, LO_CUSTKEY bigint, LO_PARTKEY > bigint, LO_SUPPKEY bigint, LO_ORDERDATE int, > LO_ORDERPRIOTITY string, LO_SHIPPRIOTITY int, LO_QUANTITY int, > LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, > LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, > LO_COMMITDATE int, LO_SHIPMODE string ) STORED BY > 'carbondata'") > cc.sql(s"load data local inpath 'hdfs://test:8020/tmp/lineorder_1w.tbl' > into table lineorder3 options('DELIMITER'='|', 'FILEHEADER'='LO_ORDERKEY, > LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY , LO_SUPPKEY , LO_ORDERDATE , > LO_ORDERPRIOTITY , LO_SHIPPRIOTITY , LO_QUANTITY ,LO_EXTENDEDPRICE , > LO_ORDTOTALPRICE ,LO_DISCOUNT , LO_REVENUE , LO_SUPPLYCOST, LO_TAX, > LO_COMMITDATE, LO_SHIPMODE')") > > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage > 2.0 (TID 8, datanode03-bi-dev): java.lang.RuntimeException: Dictionary file > lo_orderpriotity is locked for updation. Please try after some time > at scala.sys.package$.error(package.scala:27) > at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerate > RDD$$anon$1.<init>(CarbonGlobalDictionaryRDD.scala:353) > at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerate > RDD.compute(CarbonGlobalDictionaryRDD.scala:293) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) > > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > > at java.lang.Thread.run(Thread.java:745) > > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > abortStage$1.apply(DAGScheduler.scala:1419) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > abortStage$1.apply(DAGScheduler.scala:1418) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > doOnReceive(DAGScheduler.scala:1640) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1599) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1588) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) > > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > at org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > generateGlobalDictionary(GlobalDictionaryUtil.scala:800) > at org.apache.spark.sql.execution.command. > LoadTableUsingKettle.run(carbonTableSchema.scala:1197) > at org.apache.spark.sql.execution.command.LoadTable. > run(carbonTableSchema.scala:1036) > at org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult(commands.scala:56) > at > org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) > > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$5.apply(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$5.apply(SparkPlan.scala:130) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) > > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) > > at > org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > at > org.apache.carbondata.spark.rdd.CarbonDataFrameRDD.<init>(CarbonDataFrameRDD.scala:23) > > at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:137) > > at > $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37) > > at > $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) > > at > $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > > at > $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:48) > > at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:50) > at $line32.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:52) > at $line32.$read$$iwC$$iwC$$iwC.<init>(<console>:54) > at $line32.$read$$iwC$$iwC.<init>(<console>:56) > at $line32.$read$$iwC.<init>(<console>:58) > at $line32.$read.<init>(<console>:60) > at $line32.$read$.<init>(<console>:64) > at $line32.$read$.<clinit>(<console>) > at $line32.$eval$.<init>(<console>:7) > at $line32.$eval$.<clinit>(<console>) > at $line32.$eval.$print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:57) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) > > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) > > at > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) > > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) > > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) > > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) > > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > at > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > repl$SparkILoop$$loop(SparkILoop.scala:670) > at org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) > > at org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > repl$SparkILoop$$process(SparkILoop.scala:945) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) > > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:57) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) > > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.RuntimeException: Dictionary file lo_orderpriotity is > locked for updation. Please try after some time > at scala.sys.package$.error(package.scala:27) > > > > ------------------------------ > If you reply to this email, your message will be added to the discussion > below: > http://apache-carbondata-mailing-list-archive.1130556. > n5.nabble.com/carbondata-0-2-load-data-failed-in-yarn- > molde-tp3908p3973.html > To unsubscribe from carbondata-0.2 load data failed in yarn molde, click > here > <http://apache-carbondata-mailing-list-archive.1130556.n5.nabble.com/template/NamlServlet.jtp?macro=unsubscribe_by_code&node=3908&code=YmVpZG91NzdAZ21haWwuY29tfDM5MDh8MTU5NTU5NDExOQ==> > . > NAML > <http://apache-carbondata-mailing-list-archive.1130556.n5.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml> > -- View this message in context: http://apache-carbondata-mailing-list-archive.1130556.n5.nabble.com/carbondata-0-2-load-data-failed-in-yarn-molde-tp3908p4012.html Sent from the Apache CarbonData Mailing List archive mailing list archive at Nabble.com.