[jira] [Updated] (CARBONDATA-4241) if the sort scope is changed to global sort and data loaded, major compaction fails
[ https://issues.apache.org/jira/browse/CARBONDATA-4241?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chetan Bhat updated CARBONDATA-4241: Description: *Scenario 1 : create table with table_page_size_inmb'='1', load data ,* *set sortscope as global sort , load data and do major compaction.*** 0: jdbc:hive2://10.21.19.14:23040/default> CREATE TABLE uniqdata_pagesize (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,36),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED as carbondata TBLPROPERTIES('table_page_size_inmb'='1'); +-+ | Result | +-+ +-+ No rows selected (0.229 seconds) 0: jdbc:hive2://10.21.19.14:23040/default> LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table uniqdata_pagesize OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +-+ | Segment ID | +-+ | 0 | +-+ 1 row selected (1.016 seconds) 0: jdbc:hive2://10.21.19.14:23040/default> alter table uniqdata_pagesize set tblproperties('sort_columns'='CUST_ID','sort_scope'='global_sort'); +-+ | Result | +-+ +-+ No rows selected (0.446 seconds) 0: jdbc:hive2://10.21.19.14:23040/default> LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table uniqdata_pagesize OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +-+ | Segment ID | +-+ | 1 | +-+ 1 row selected (0.767 seconds) 0: jdbc:hive2://10.21.19.14:23040/default> alter table uniqdata_pagesize compact 'major'; Error: org.apache.hive.service.cli.HiveSQLException: Error running query: org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs for more info. Exception in compaction Compaction Failure in Merger Rdd. at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs for more info. Exception in compaction Compaction Failure in Merger Rdd. at org.apache.spark.sql.util.CarbonException$.analysisException(CarbonException.scala:23) at org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.$anonfun$processData$3(CarbonAlterTableCompactionCommand.scala:197) at org.apache.carbondata.events.package$.withEvents(package.scala:27) at org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.processData(CarbonAlterTableCompactionCommand.scala:185) at org.apache.spark.sql.execution.command.AtomicRunnableComm
[jira] [Updated] (CARBONDATA-4241) if the sort scope is changed to global sort and data loaded, major compaction fails
[ https://issues.apache.org/jira/browse/CARBONDATA-4241?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chetan Bhat updated CARBONDATA-4241: Description: *create table and alter table set local dictionary.* *set sortscope as global sort , load and do major compaction.* CREATE TABLE uniqdata_sortcol_bloom_locdic (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED as carbondata tblproperties('sort_columns'='cust_id,cust_name,dob,doj,bigint_column1'); LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table uniqdata_sortcol_bloom_locdic OPTIONS('DELIMITER'=',', 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); alter table uniqdata_sortcol_bloom_locdic set tblproperties('local_dictionary_enable'='true','local_dictionary_threshold'='1000'); alter table uniqdata_sortcol_bloom_locdic set tblproperties('sort_columns'='CUST_ID','sort_scope'='global_sort'); LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table uniqdata_sortcol_bloom_locdic partition(active_emui_version='xyz') OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME ,ACTIVE_EMUI_VERSION,DOB,DOJ, BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1, Double_COLUMN2,INTEGER_COLUMN1','BAD_RECORDS_ACTION'='FORCE'); *Compaction fails* *0: jdbc:hive2://10.21.19.14:23040/default> alter table uniqdata_sortcol_bloom_locdic compact 'major';* Error: org.apache.hive.service.cli.HiveSQLException: Error running query: org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs for more info. Exception in compaction Compaction Failure in Merger Rdd. at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs for more info. Exception in compaction Compaction Failure in Merger Rdd. at org.apache.spark.sql.util.CarbonException$.analysisException(CarbonException.scala:23) at org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.$anonfun$processData$3(CarbonAlterTableCompactionCommand.scala:197) at org.apache.carbondata.events.package$.withEvents(package.scala:27) at org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.processData(CarbonAlterTableCompactionCommand.scala:185) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162) at org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) at org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155) at org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.s
[jira] [Updated] (CARBONDATA-4241) if the sort scope is changed to global sort and data loaded, major compaction fails
[ https://issues.apache.org/jira/browse/CARBONDATA-4241?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chetan Bhat updated CARBONDATA-4241: Summary: if the sort scope is changed to global sort and data loaded, major compaction fails (was: in 1.6.1 version table with local dictionary is created and in 2.2.0 if the sort scope is changed to global sort and data loaded, major compaction fails) > if the sort scope is changed to global sort and data loaded, major compaction > fails > --- > > Key: CARBONDATA-4241 > URL: https://issues.apache.org/jira/browse/CARBONDATA-4241 > Project: CarbonData > Issue Type: Bug > Components: data-load >Affects Versions: 2.2.0 > Environment: Spark 2.3.2 Carbon 1.6.1 , Spark 3.1.1 Carbon 2.2.0 >Reporter: Chetan Bhat >Priority: Major > > *In 1.6.1 version create table and alter table set local dictionary.* > CREATE TABLE uniqdata_sortcol_bloom_locdic (CUST_ID int,CUST_NAME > String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, > BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), > DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 > double,INTEGER_COLUMN1 int) STORED as carbondata > tblproperties('sort_columns'='cust_id,cust_name,dob,doj,bigint_column1'); > LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table > uniqdata_sortcol_bloom_locdic OPTIONS('DELIMITER'=',', > 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > alter table uniqdata_sortcol_bloom_locdic set > tblproperties('local_dictionary_enable'='true','local_dictionary_threshold'='1000'); > *In 2.2.0 set sortscope as global sort , load and do major compaction.* > alter table uniqdata_sortcol_bloom_locdic set > tblproperties('sort_columns'='CUST_ID','sort_scope'='global_sort'); > LOAD DATA INPATH 'hdfs://hacluster/chetan/2000_UniqData.csv' into table > uniqdata_sortcol_bloom_locdic partition(active_emui_version='xyz') > OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME ,ACTIVE_EMUI_VERSION,DOB,DOJ, > BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1, > Double_COLUMN2,INTEGER_COLUMN1','BAD_RECORDS_ACTION'='FORCE'); > *0: jdbc:hive2://10.21.19.14:23040/default> alter table > uniqdata_sortcol_bloom_locdic compact 'major';* > Error: org.apache.hive.service.cli.HiveSQLException: Error running query: > org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs > for more info. Exception in compaction Compaction Failure in Merger Rdd. > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:361) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:263) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:43) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:263) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:258) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:272) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.spark.sql.AnalysisException: Compaction failed. Please > check logs for more info. Exception in compaction Compaction Failure in > Merger Rdd. > at > org.apache.spark.sql.util.CarbonException$.analysisException(CarbonException.scala:23) > at > org.apache.spark.sql.execution.command