Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1716#discussion_r158575738
--- Diff:
integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
---
@@ -564,55 +572,49 @@ case class CarbonLoadDataCommand(
val data = new Array[Any](len)
var i = 0
val input = value.get()
- while (i < input.length) {
- // TODO find a way to avoid double conversion of date and
time.
- data(i) = CarbonScalaUtil.convertToUTF8String(
- input(i),
- rowDataTypes(i),
- timeStampFormat,
- dateFormat,
- serializationNullFormat)
- i = i + 1
+ val inputLen = Math.min(input.length, len)
+ try {
+ while (i < inputLen) {
+ // TODO find a way to avoid double conversion of date and
time.
+ data(i) = CarbonScalaUtil.convertToUTF8String(
+ input(i),
+ rowDataTypes(i),
+ timeStampFormat,
+ dateFormat,
+ serializationNullFormat,
+ failAction,
+ ignoreAction)
+ i = i + 1
+ }
+ InternalRow.fromSeq(data)
+ } catch {
+ case e: BadRecordFoundException => throw e
+ case e: Exception => InternalRow.empty // It is bad record
ignore case
}
- InternalRow.fromSeq(data)
- }
+
+ }.filter(f => f.numFields != 0) // In bad record ignore case
filter the empty values
--- End diff --
I will move rdd creation to another private function.
---