Ajantha Bhat created CARBONDATA-3639:
----------------------------------------
Summary: Global Sort CSV loading flow with Binary non-sort columns
throws exception
Key: CARBONDATA-3639
URL: https://issues.apache.org/jira/browse/CARBONDATA-3639
Project: CarbonData
Issue Type: Bug
Reporter: Ajantha Bhat
Assignee: Ajantha Bhat
Global Sort CSV loading flow with Binary non-sort columns throws exception
Previous exception in task: Dataload failed, String length cannot exceed 32000
charactersPrevious exception in task: Dataload failed, String length cannot
exceed 32000 characters
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462)
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
org.apache.spark.scheduler.Task.run(Task.scala:109)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748) at
org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at
org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at
org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more2019-12-28
17:37:47 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 times; aborting
job2019-12-28 17:37:47 ERROR CarbonDataRDDFactory$:429 -
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in
stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID
0, localhost, executor driver): java.lang.Exception: Dataload failed, String
length cannot exceed 32000 characters at
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
at
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
at
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) at
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
at
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) at
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109) at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748) Suppressed:
org.apache.spark.util.TaskCompletionListenerException: Exception 0: Data
Loading failed for table binarytableException 1: Data Loading failed for table
binarytable
Previous exception in task: Dataload failed, String length cannot exceed 32000
characters
org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53)
org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71)
org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358)
scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253)
org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248)
scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462)
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
org.apache.spark.scheduler.Task.run(Task.scala:109)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748) at
org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at
org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at
org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more
--
This message was sent by Atlassian Jira
(v8.3.4#803005)