[ 
https://issues.apache.org/jira/browse/CARBONDATA-3847?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Chetan Bhat closed CARBONDATA-3847.
-----------------------------------
    Resolution: Cannot Reproduce

Cant reproduce this more than once thereafter. Might be related to cluster 
configuration. Hence closing the issue.

> Dataload fails for table with data of 10 records having string type bucket 
> column for if number of buckets exceed large no (300).
> ---------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: CARBONDATA-3847
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3847
>             Project: CarbonData
>          Issue Type: Bug
>          Components: data-query
>    Affects Versions: 2.0.0
>         Environment: Spark 2.3.2, Spark 2.4.5
>            Reporter: Chetan Bhat
>            Priority: Minor
>
> *Steps -*
> 0: jdbc:hive2://10.20.251.163:23040/default> create table if not exists 
> all_data_types1(bool_1 boolean,bool_2 boolean,chinese string,Number 
> int,smallNumber smallint,BigNumber bigint,LargeDecimal double,smalldecimal 
> float,customdecimal decimal(38,15),words string,smallwords char(8),varwords 
> varchar(20),time timestamp,day date,emptyNumber int,emptysmallNumber 
> smallint,emptyBigNumber bigint,emptyLargeDecimal double,emptysmalldecimal 
> float,emptycustomdecimal decimal(38,38),emptywords string,emptysmallwords 
> char(8),emptyvarwords varchar(20)) stored as carbondata TBLPROPERTIES 
> (*'BUCKET_NUMBER'='300'*, 'BUCKET_COLUMNS'='chinese');
> +---------+--+
> | Result |
> +---------+--+
> +---------+--+
> No rows selected (0.241 seconds)
> 0: jdbc:hive2://10.20.251.163:23040/default> LOAD DATA INPATH 
> 'hdfs://hacluster/chetan/datafile_0.csv' into table all_data_types1 
> OPTIONS('DELIMITER'=',' , 
> 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='bool_1 ,bool_2 
> ,chinese ,Number ,smallNumber ,BigNumber ,LargeDecimal ,smalldecimal 
> ,customdecimal,words ,smallwords ,varwords ,time ,day ,emptyNumber 
> ,emptysmallNumber ,emptyBigNumber ,emptyLargeDecimal 
> ,emptysmalldecimal,emptycustomdecimal ,emptywords ,emptysmallwords 
> ,emptyvarwords');
> *Error: java.lang.Exception: DataLoad failure (state=,code=0)*
>  
> *Log -*
> java.lang.Exception: DataLoad failure
>  at 
> org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:565)
>  at 
> org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:207)
>  at 
> org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:168)
>  at 
> org.apache.spark.sql.execution.command.AtomicRunnableCommand$$anonfun$run$3.apply(package.scala:148)
>  at 
> org.apache.spark.sql.execution.command.AtomicRunnableCommand$$anonfun$run$3.apply(package.scala:145)
>  at 
> org.apache.spark.sql.execution.command.Auditable$class.runWithAudit(package.scala:104)
>  at 
> org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:141)
>  at 
> org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:145)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:71)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:69)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:80)
>  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:196)
>  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:196)
>  at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3379)
>  at 
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:90)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:137)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:85)
>  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3378)
>  at org.apache.spark.sql.Dataset.<init>(Dataset.scala:196)
>  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
>  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
>  at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:248)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:178)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:174)
>  at java.security.AccessController.doPrivileged(Native Method)
>  at javax.security.auth.Subject.doAs(Subject.java:422)
>  at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:188)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>  at java.lang.Thread.run(Thread.java:745)
> 2020-06-05 02:32:15,318 | ERROR | [HiveServer2-Background-Pool: Thread-9500] 
> | Error running hive query: | 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:182)
> org.apache.hive.service.cli.HiveSQLException: java.lang.Exception: DataLoad 
> failure
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:294)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:178)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:174)
>  at java.security.AccessController.doPrivileged(Native Method)
>  at javax.security.auth.Subject.doAs(Subject.java:422)
>  at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
>  at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:188)
>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>  at java.lang.Thread.run(Thread.java:745)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to