Hi
Can you raise one JIRA to report this issue?
Regards
Liang
Cao Lu 曹鲁 wrote
> Hi dev team,
> I build the carbondata from master branch and distributed to the spark on
> yarn cluster.
> The data successfully loaded and count(*) is OK, but when I tried to query
> the detail data, it returns below error:
> Any idea for this? My spark is 1.6.0
>
>
> scala> cc.sql("desc carbontest_002").show
>
> +---------+---------+-------+
>
> | col_name|data_type|comment|
>
> +---------+---------+-------+
>
> | vin| string| |
>
> |data_date| string| |
>
> +---------+---------+-------+
>
>
>
> scala> cc.sql("load data inpath
> 'hdfs://nameservice2/user/appuser/lucao/mydata4.csv' into table
> default.carbontest_002 OPTIONS('DELIMITER'=',')")
>
> WARN 07-12 16:30:30,241 - main skip empty input file:
> hdfs://nameservice2/user/appuser/lucao/mydata4.csv/_SUCCESS
>
> AUDIT 07-12 16:30:34,338 - [*.com][appuser][Thread-1]Data load request has
> been received for table default.carbontest_002
>
> AUDIT 07-12 16:30:38,410 - [*.com][appuser][Thread-1]Data load is
> successful for default.carbontest_002
>
> res12: org.apache.spark.sql.DataFrame = []
>
>
> scala> cc.sql("select count(*) from carbontest_002")
>
> res14: org.apache.spark.sql.DataFrame = [_c0: bigint]
>
>
> scala> res14.show
>
> +-------+
>
> | _c0|
>
> +-------+
>
> |1000000|
>
> +-------+
>
>
>
> scala> cc.sql("select vin, count(*) as cnt from carbontest_002 group by
> vin").show
>
> WARN 07-12 16:32:04,250 - Lost task 1.0 in stage 20.0 (TID 40, *.com):
> java.lang.ClassCastException: java.lang.String cannot be cast to
> java.lang.Integer
>
> at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
>
> at
> org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getInt(rows.scala:41)
>
> at
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow.getInt(rows.scala:248)
>
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown
> Source)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:155)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:149)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:512)
>
> at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.
> <init>
> (TungstenAggregationIterator.scala:686)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:95)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:86)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>
> at org.apache.spark.scheduler.Task.run(Task.scala:89)
>
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
>
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>
> at java.lang.Thread.run(Thread.java:745)
>
>
> ERROR 07-12 16:32:04,516 - Task 1 in stage 20.0 failed 4 times; aborting
> job
>
> WARN 07-12 16:32:04,600 - Lost task 0.1 in stage 20.0 (TID 45, *):
> TaskKilled (killed intentionally)
>
> ERROR 07-12 16:32:04,604 - Listener SQLListener threw an exception
>
> java.lang.NullPointerException
>
> at
> org.apache.spark.sql.execution.ui.SQLListener.onTaskEnd(SQLListener.scala:167)
>
> at
> org.apache.spark.scheduler.SparkListenerBus$class.onPostEvent(SparkListenerBus.scala:42)
>
> at
> org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
>
> at
> org.apache.spark.scheduler.LiveListenerBus.onPostEvent(LiveListenerBus.scala:31)
>
> at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:55)
>
> at
> org.apache.spark.util.AsynchronousListenerBus.postToAll(AsynchronousListenerBus.scala:37)
>
> at
> org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(AsynchronousListenerBus.scala:80)
>
> at
> org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(AsynchronousListenerBus.scala:65)
>
> at
> org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(AsynchronousListenerBus.scala:65)
>
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
>
> at
> org.apache.spark.util.AsynchronousListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(AsynchronousListenerBus.scala:64)
>
> at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1181)
>
> at
> org.apache.spark.util.AsynchronousListenerBus$$anon$1.run(AsynchronousListenerBus.scala:63)
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 1
> in stage 20.0 failed 4 times, most recent failure: Lost task 1.3 in stage
> 20.0 (TID 44, *): java.lang.ClassCastException: java.lang.String cannot be
> cast to java.lang.Integer
>
> at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
>
> at
> org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getInt(rows.scala:41)
>
> at
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow.getInt(rows.scala:248)
>
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown
> Source)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:155)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:149)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:512)
>
> at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.
> <init>
> (TungstenAggregationIterator.scala:686)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:95)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:86)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>
> at org.apache.spark.scheduler.Task.run(Task.scala:89)
>
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
>
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>
> at java.lang.Thread.run(Thread.java:745)
>
>
> Driver stacktrace:
>
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
>
> at scala.Option.foreach(Option.scala:236)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
> at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869)
>
> at
> org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:212)
>
> at
> org.apache.spark.sql.execution.Limit.executeCollect(basicOperators.scala:165)
>
> at
> org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:174)
>
> at
> org.apache.spark.sql.DataFrame$$anonfun$org$apache$spark$sql$DataFrame$$execute$1$1.apply(DataFrame.scala:1499)
>
> at
> org.apache.spark.sql.DataFrame$$anonfun$org$apache$spark$sql$DataFrame$$execute$1$1.apply(DataFrame.scala:1499)
>
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:53)
>
> at org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:2086)
>
> at
> org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$execute$1(DataFrame.scala:1498)
>
> at
> org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$collect(DataFrame.scala:1505)
>
> at
> org.apache.spark.sql.DataFrame$$anonfun$head$1.apply(DataFrame.scala:1375)
>
> at
> org.apache.spark.sql.DataFrame$$anonfun$head$1.apply(DataFrame.scala:1374)
>
> at org.apache.spark.sql.DataFrame.withCallback(DataFrame.scala:2099)
>
> at org.apache.spark.sql.DataFrame.head(DataFrame.scala:1374)
>
> at org.apache.spark.sql.DataFrame.take(DataFrame.scala:1456)
>
> at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:170)
>
> at org.apache.spark.sql.DataFrame.show(DataFrame.scala:350)
>
> at org.apache.spark.sql.DataFrame.show(DataFrame.scala:311)
>
> at org.apache.spark.sql.DataFrame.show(DataFrame.scala:319)
>
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :35)
>
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :40)
>
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :42)
>
> at $iwC$$iwC$$iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :44)
>
> at $iwC$$iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :46)
>
> at $iwC$$iwC$$iwC.
> <init>
> (
> <console>
> :48)
>
> at $iwC$$iwC.
> <init>
> (
> <console>
> :50)
>
> at $iwC.
> <init>
> (
> <console>
> :52)
>
> at
> <init>
> (
> <console>
> :54)
>
> at .
> <init>
> (
> <console>
> :58)
>
> at .
> <clinit>
> (
> <console>
> )
>
> at .
> <init>
> (
> <console>
> :7)
>
> at .
> <clinit>
> (
> <console>
> )
>
> at $print(
> <console>
> )
>
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>
> at java.lang.reflect.Method.invoke(Method.java:606)
>
> at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045)
>
> at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326)
>
> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821)
>
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852)
>
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800)
>
> at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
>
> at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
>
> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
>
> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
>
> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
>
> at
> org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
>
> at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
>
> at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>
> at
> org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>
> at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>
> at
> org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
>
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064)
>
> at org.apache.spark.repl.Main$.main(Main.scala:31)
>
> at org.apache.spark.repl.Main.main(Main.scala)
>
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>
> at java.lang.reflect.Method.invoke(Method.java:606)
>
> at
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
>
> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
>
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
>
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
>
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>
> Caused by: java.lang.ClassCastException: java.lang.String cannot be cast
> to java.lang.Integer
>
> at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
>
> at
> org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getInt(rows.scala:41)
>
> at
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow.getInt(rows.scala:248)
>
> at
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown
> Source)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:155)
>
> at
> org.apache.spark.sql.CarbonScan$$anonfun$1$$anon$1.next(CarbonScan.scala:149)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:512)
>
> at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.
> <init>
> (TungstenAggregationIterator.scala:686)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:95)
>
> at
> org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:86)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
>
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
>
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>
> at org.apache.spark.scheduler.Task.run(Task.scala:89)
>
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
>
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>
> at java.lang.Thread.run(Thread.java:745)
>
>
> Thanks,
> Lionel
>
> 邮件免责申明----- 该电子邮件中的信息是保密的,除收件人外任何人无权访问此电子邮件。
> 如果您不是收件人,公开、复制、分发或基于此封邮件的任何行动,都是禁止的,并可能是违法的。该邮件包含的任何意见与建议均应遵循上汽集团关于信息传递与保密的制度或规定。除经上汽集团信函以正式书面方式确认外,任何相关的内容或信息不得作为正式依据。
> Email Disclaimer----- The information in this email is confidential and
> may be legally privileged. It is intended solely for the addressee. Access
> to this email by anyone else is unauthorized. If you are not the intended
> recipient, any disclosure, copying, distribution or any action taken or
> omitted to be taken in reliance on it, is prohibited and may be unlawful.
> Any opinions or advice contained in this email are subject to the terms
> and conditions expressed in the governing SAICMOTOR client engagement
> letter and should not be relied upon unless they are confirmed in writing
> on SAICMOTOR's letterhead.
--
View this message in context:
http://apache-carbondata-mailing-list-archive.1130556.n5.nabble.com/query-on-carbondata-table-return-error-tp3928p3974.html
Sent from the Apache CarbonData Mailing List archive mailing list archive at
Nabble.com.