[
https://issues.apache.org/jira/browse/CARBONDATA-1541?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
xubo245 updated CARBONDATA-1541:
--------------------------------
Description:
There are some errors when bad_records_action is IGNORE
{code:java}
17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task
launch
worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb]
Failed for table: int_table in DataWriterProcessorStepImpl
org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException:
unable to generate the mdkey
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276)
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162)
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123)
at
org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51)
at
org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254)
at
org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229)
at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62)
{code}
1. When table only have one column and the column data is INT, there is an
error:
code:
{code:java}
test("Loading table: int, bad_records_action is IGNORE") {
val fileLocation =
s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv"
sql("drop table if exists int_table")
sql("CREATE TABLE if not exists int_table(intField INT) STORED BY
'carbondata'")
sql(
s"""
| LOAD DATA LOCAL INPATH '$fileLocation'
| INTO TABLE int_table
| OPTIONS('FILEHEADER' =
'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE')
""".stripMargin)
sql("select * from int_table").show()
checkAnswer(sql("select * from int_table where intField = 1"),
Seq(Row(1), Row(1)))
sql("drop table if exists int_table")
}
{code}
2. when sort_columns is null, there is an error :
{code:java}
test("sort_columns is null, error") {
sql("drop table if exists sales")
sql(
"""CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country
String,
actual_price Double, Quantity int, sold_price Decimal(19,2))
STORED BY 'carbondata'
TBLPROPERTIES('sort_columns'='')""")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
new File("./target/test/badRecords")
.getCanonicalPath)
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
var csvFilePath = s"$resourcesPath/badrecords/datasample.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales OPTIONS"
+
"('bad_records_logger_enable'='true','bad_records_action'='redirect',
'DELIMITER'=" +
" ',', 'QUOTECHAR'= '\"')");
checkAnswer(
sql("select count(*) from sales"),
Seq(Row(2)
)
)
}
{code}
The test code has been pushed into
https://github.com/xubo245/carbondata/tree/badRecordAction
{code:java}
org.apache.carbondata.integration.spark.testsuite.dataload.LoadDataWithBadRecords
{code}
was:
There are some errors when bad_records_action is IGNORE
{code:java}
17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task
launch
worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb]
Failed for table: int_table in DataWriterProcessorStepImpl
org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException:
unable to generate the mdkey
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276)
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162)
at
org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123)
at
org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51)
at
org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254)
at
org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229)
at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62)
{code}
1. When table only have one column and the column data is INT, there is an
error:
code:
{code:java}
test("Loading table: int, bad_records_action is IGNORE") {
val fileLocation =
s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv"
sql("drop table if exists int_table")
sql("CREATE TABLE if not exists int_table(intField INT) STORED BY
'carbondata'")
sql(
s"""
| LOAD DATA LOCAL INPATH '$fileLocation'
| INTO TABLE int_table
| OPTIONS('FILEHEADER' =
'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE')
""".stripMargin)
sql("select * from int_table").show()
checkAnswer(sql("select * from int_table where intField = 1"),
Seq(Row(1), Row(1)))
sql("drop table if exists int_table")
}
{code}
2. when sort_columns is null, there is an error :
{code:java}
test("sort_columns is null, error") {
sql("drop table if exists sales")
sql(
"""CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country
String,
actual_price Double, Quantity int, sold_price Decimal(19,2))
STORED BY 'carbondata'
TBLPROPERTIES('sort_columns'='')""")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
new File("./target/test/badRecords")
.getCanonicalPath)
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
var csvFilePath = s"$resourcesPath/badrecords/datasample.csv"
sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales OPTIONS"
+
"('bad_records_logger_enable'='true','bad_records_action'='redirect',
'DELIMITER'=" +
" ',', 'QUOTECHAR'= '\"')");
checkAnswer(
sql("select count(*) from sales"),
Seq(Row(2)
)
)
}
{code}
> There are some errors when bad_records_action is IGNORE
> -------------------------------------------------------
>
> Key: CARBONDATA-1541
> URL: https://issues.apache.org/jira/browse/CARBONDATA-1541
> Project: CarbonData
> Issue Type: Bug
> Components: data-load
> Affects Versions: 1.1.1
> Reporter: xubo245
> Priority: Minor
> Original Estimate: 240h
> Remaining Estimate: 240h
>
> There are some errors when bad_records_action is IGNORE
> {code:java}
> 17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task
> launch
> worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb]
> Failed for table: int_table in DataWriterProcessorStepImpl
> org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException:
> unable to generate the mdkey
> at
> org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276)
> at
> org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162)
> at
> org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123)
> at
> org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51)
> at
> org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254)
> at
> org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229)
> at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62)
> {code}
>
> 1. When table only have one column and the column data is INT, there is an
> error:
> code:
> {code:java}
> test("Loading table: int, bad_records_action is IGNORE") {
> val fileLocation =
> s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv"
> sql("drop table if exists int_table")
> sql("CREATE TABLE if not exists int_table(intField INT) STORED BY
> 'carbondata'")
> sql(
> s"""
> | LOAD DATA LOCAL INPATH '$fileLocation'
> | INTO TABLE int_table
> | OPTIONS('FILEHEADER' =
> 'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE')
> """.stripMargin)
> sql("select * from int_table").show()
> checkAnswer(sql("select * from int_table where intField = 1"),
> Seq(Row(1), Row(1)))
> sql("drop table if exists int_table")
> }
> {code}
> 2. when sort_columns is null, there is an error :
> {code:java}
> test("sort_columns is null, error") {
> sql("drop table if exists sales")
> sql(
> """CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country
> String,
> actual_price Double, Quantity int, sold_price Decimal(19,2))
> STORED BY 'carbondata'
> TBLPROPERTIES('sort_columns'='')""")
> CarbonProperties.getInstance()
> .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
> new File("./target/test/badRecords")
> .getCanonicalPath)
> CarbonProperties.getInstance()
> .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
> "yyyy/MM/dd")
> var csvFilePath = s"$resourcesPath/badrecords/datasample.csv"
> sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales
> OPTIONS"
> +
> "('bad_records_logger_enable'='true','bad_records_action'='redirect',
> 'DELIMITER'=" +
> " ',', 'QUOTECHAR'= '\"')");
> checkAnswer(
> sql("select count(*) from sales"),
> Seq(Row(2)
> )
> )
> }
> {code}
> The test code has been pushed into
> https://github.com/xubo245/carbondata/tree/badRecordAction
> {code:java}
> org.apache.carbondata.integration.spark.testsuite.dataload.LoadDataWithBadRecords
> {code}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)