[ https://issues.apache.org/jira/browse/CARBONDATA-4203?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kunal Kapoor resolved CARBONDATA-4203. -------------------------------------- Fix Version/s: 2.3.0 Resolution: Fixed > Compaction in SDK segments added is causing compaction issue after update, > delete operations. > --------------------------------------------------------------------------------------------- > > Key: CARBONDATA-4203 > URL: https://issues.apache.org/jira/browse/CARBONDATA-4203 > Project: CarbonData > Issue Type: Bug > Affects Versions: 2.1.1 > Environment: FI cluster - 3 node > Reporter: Prasanna Ravichandran > Priority: Major > Fix For: 2.3.0 > > Attachments: primitive- SDK files.rar > > > Compaction in SDK segments added through add segments is causing compaction > issue after update, delete operations. This issue is present only when delete > and update happens on one of the added segment. This issue is not seen > without delete and update on 1 segment. > Place the attached SDK files in the > /sdkfiles/primitive/,/sdkfiles/primitive2/, > /sdkfiles/primitive3/,/sdkfiles/primitive4/ and /sdkfiles/primitive5/ folders > in HDFS and then execute the below queries. > Test queries: > drop table if exists external_primitive; > create table external_primitive (id int, name string, rank smallint, salary > double, active boolean, dob date, doj timestamp, city string, dept string) > stored as carbondata; > insert into external_primitive select > 1,"Pr",1,10,true,"1992-12-09","1992-10-07 22:00:20.0","chennai","CSE"; > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive','format'='carbon'); > delete from external_primitive where id =2; > update external_primitive set (name)=("RAMU") where name="CCC"; > drop table if exists external_primitive; > create table external_primitive (id int, name string, rank smallint, salary > double, active boolean, dob date, doj timestamp, city string, dept string) > stored as carbondata; > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive','format'='carbon'); > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive2','format'='carbon'); > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive3','format'='carbon'); > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive4','format'='carbon'); > alter table external_primitive add segment > options('path'='hdfs://hacluster/sdkfiles/primitive5','format'='carbon'); > alter table external_primitive compact 'minor'; > > !image-2021-06-08-16-54-52-412.png! > Error traces: > Error: org.apache.hive.service.cli.HiveSQLException: Error running query: > org.apache.spark.sql.AnalysisException: Compaction failed. Please check logs > for more info. Exception in compaction Compaction Failure in Merger Rdd. > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:396) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$3(SparkExecuteStatementOperation.scala:281) > at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) > at > org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:46) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:281) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:268) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1761) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:295) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.spark.sql.AnalysisException: Compaction failed. Please > check logs for more info. Exception in compaction Compaction Failure in > Merger Rdd. > at > org.apache.spark.sql.util.CarbonException$.analysisException(CarbonException.scala:23) > at > org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.$anonfun$processData$3(CarbonAlterTableCompactionCommand.scala:197) > at org.apache.carbondata.events.package$.withEvents(package.scala:27) > at > org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand.processData(CarbonAlterTableCompactionCommand.scala:185) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.$anonfun$run$3(package.scala:162) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit(package.scala:118) > at > org.apache.spark.sql.execution.command.Auditable.runWithAudit$(package.scala:114) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:155) > at > org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:168) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:71) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:69) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:80) > at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:231) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3697) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:108) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:170) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:91) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3695) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:231) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) > at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:777) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) > at > org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:356) > ... 16 more (state=,code=0) > Expected result: Compaction should pass. > Actual result: Compaction failed. -- This message was sent by Atlassian Jira (v8.3.4#803005)