QiangCai commented on a change in pull request #3538: [CARBONDATA-3637] 
Optimize insert into flow
URL: https://github.com/apache/carbondata/pull/3538#discussion_r378715601
 
 

 ##########
 File path: 
integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
 ##########
 @@ -263,17 +265,35 @@ class NewDataFrameLoaderRDD[K, V](
         carbonLoadModel.setPreFetch(false)
 
         val recordReaders = mutable.Buffer[CarbonIterator[Array[AnyRef]]]()
-        val partitionIterator = 
firstParent[DataLoadPartitionWrap[Row]].iterator(theSplit, context)
         val serializer = SparkEnv.get.closureSerializer.newInstance()
         var serializeBytes: Array[Byte] = null
-        while(partitionIterator.hasNext) {
-          val value = partitionIterator.next()
-          if (serializeBytes == null) {
-            serializeBytes = serializer.serialize[RDD[Row]](value.rdd).array()
+        if (isDataFrame) {
+          val partitionIterator = 
firstParent[DataLoadPartitionWrap[Row]].iterator(theSplit,
+            context)
+          while (partitionIterator.hasNext) {
+            val value = partitionIterator.next()
+            if (serializeBytes == null) {
+              serializeBytes = 
serializer.serialize[RDD[Row]](value.rdd).array()
+            }
+            recordReaders += new LazyRddIterator(serializer, serializeBytes, 
value.partition,
+              carbonLoadModel, context)
           }
-          recordReaders += new LazyRddIterator(serializer, serializeBytes, 
value.partition,
+        } else {
+          // For internal row, no need of converter and re-arrange step,
+          model.setLoadWithoutConverterWithoutReArrangeStep(true)
 
 Review comment:
   how about set it in driver side and remove 'isDataFrame' of 
NewDataFrameLoaderRDD

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to