Chetan Bhat created CARBONDATA-4241:
---------------------------------------
Summary: in 1.6.1 version table with local dictionary is created
and in 2.2.0 if the sort scope is changed to global sort and data loaded, major
compaction fails
Key: CARBONDATA-4241
URL: https://issues.apache.org/jira/browse/CARBONDATA-4241
Project: CarbonData
Issue Type: Bug
Components: data-load
Affects Versions: 2.2.0
Environment: Spark 2.3.2 Carbon 1.6.1 , Spark 3.1.1 Carbon 2.2.0
Reporter: Chetan Bhat
*In 1.6.1 version create table and alter table set local dictionary.*
CREATE TABLE uniqdata_sortcol_bloom_locdic (CUST_ID int,CUST_NAME
String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1
bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2
decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1
int) STORED as carbondata
tblproperties('sort_columns'='cust_id,cust_name,dob,doj,bigint_column1');
LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table
uniqdata_sortcol_bloom_locdic OPTIONS('DELIMITER'=',',
'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');
alter table uniqdata_sortcol_bloom_locdic set
tblproperties('local_dictionary_enable'='true','local_dictionary_threshold'='1000');
*In 2.2.0 set sortscope as global sort , load and do major compaction.*
alter table uniqdata_sortcol_bloom_locdic set
tblproperties('sort_columns'='CUST_ID','sort_scope'='global_sort');
LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table
uniqdata_sortcol_bloom_locdic partition(active_emui_version='xyz')
OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME ,ACTIVE_EMUI_VERSION,DOB,DOJ,
BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,
Double_COLUMN2,INTEGER_COLUMN1','BAD_RECORDS_ACTION'='FORCE');
*0: jdbc:hive2://10.21.19.14:23040/default> alter table
uniqdata_sortcol_bloom_locdic compact 'major';*
Error: org.apache.hive.service.cli.HiveSQLException: Error running query:
org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs
for more info. Exception in compaction Compaction Failure in Merger Rdd.
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78)
at
org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.sql.AnalysisException: Compaction failed. Please
check logs for more info. Exception in compaction Compaction Failure in Merger
Rdd.
at
org.apache.spark.sql.util.CarbonException$.analysisException(CarbonException.scala:23)
at
org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.$anonfun$processData$3(CarbonAlterTableCompactionCommand.scala:197)
at org.apache.carbondata.events.package$.withEvents(package.scala:27)
at
org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.processData(CarbonAlterTableCompactionCommand.scala:185)
at
org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162)
at
org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118)
at
org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114)
at
org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155)
at
org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
at
org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:325)
... 16 more (state=,code=0)
--
This message was sent by Atlassian Jira
(v8.3.4#803005)