Ajantha Bhat created CARBONDATA-3639:
----------------------------------------

             Summary: Global Sort CSV loading flow with Binary non-sort columns 
throws exception
                 Key: CARBONDATA-3639
                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3639
             Project: CarbonData
          Issue Type: Bug
            Reporter: Ajantha Bhat
            Assignee: Ajantha Bhat


Global Sort CSV loading flow with Binary non-sort columns throws exception

Previous exception in task: Dataload failed, String length cannot exceed 32000 
charactersPrevious exception in task: Dataload failed, String length cannot 
exceed 32000 characters 
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
 
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
 
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
 scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) 
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
 scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) 
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
 org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) 
org.apache.spark.scheduler.Task.run(Task.scala:109) 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
java.lang.Thread.run(Thread.java:748) at 
org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at 
org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at 
org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more2019-12-28 
17:37:47 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 times; aborting 
job2019-12-28 17:37:47 ERROR CarbonDataRDDFactory$:429 - 
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in 
stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 
0, localhost, executor driver): java.lang.Exception: Dataload failed, String 
length cannot exceed 32000 characters at 
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
 at 
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
 at 
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
 at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) at 
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
 at 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
 at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) at 
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
 at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) 
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) 
at org.apache.spark.scheduler.Task.run(Task.scala:109) at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
at java.lang.Thread.run(Thread.java:748) Suppressed: 
org.apache.spark.util.TaskCompletionListenerException: Exception 0: Data 
Loading failed for table binarytableException 1: Data Loading failed for table 
binarytable
Previous exception in task: Dataload failed, String length cannot exceed 32000 
characters 
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
 
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
 
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
 scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) 
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
 
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
 scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) 
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
 org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) 
org.apache.spark.scheduler.Task.run(Task.scala:109) 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
java.lang.Thread.run(Thread.java:748) at 
org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at 
org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at 
org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more

 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to