deniskuzZ commented on code in PR #5123: URL: https://github.com/apache/hive/pull/5123#discussion_r1561226378
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergMajorQueryCompactor.java: ########## @@ -21,45 +21,159 @@ import java.io.IOException; import java.util.Map; +import jline.internal.Log; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context.RewritePolicy; import org.apache.hadoop.hive.ql.DriverUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.AlterTableSnapshotRefSpec; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.txn.compactor.CompactorContext; import org.apache.hadoop.hive.ql.txn.compactor.QueryCompactor; import org.apache.hive.iceberg.org.apache.orc.storage.common.TableName; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.PartitionData; +import org.apache.iceberg.Table; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.mr.hive.HiveIcebergOutputCommitter; +import org.apache.iceberg.mr.hive.IcebergBranchExec; +import org.apache.iceberg.mr.hive.IcebergTableUtil; +import org.apache.iceberg.types.Type; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class IcebergMajorQueryCompactor extends QueryCompactor { private static final Logger LOG = LoggerFactory.getLogger(IcebergMajorQueryCompactor.class.getName()); + private static final String branchName = "compaction"; + private String compactTableName; @Override public boolean run(CompactorContext context) throws IOException, HiveException, InterruptedException { - String compactTableName = TableName.getDbTable(context.getTable().getDbName(), context.getTable().getTableName()); + compactTableName = TableName.getDbTable(context.getTable().getDbName(), context.getTable().getTableName()); Map<String, String> tblProperties = context.getTable().getParameters(); LOG.debug("Initiating compaction for the {} table", compactTableName); - String compactionQuery = String.format("insert overwrite table %s select * from %<s", - compactTableName); + String partSpec = context.getCompactionInfo().partName; + String compactionQuery; SessionState sessionState = setupQueryCompactionSession(context.getConf(), context.getCompactionInfo(), tblProperties); - HiveConf.setVar(context.getConf(), ConfVars.REWRITE_POLICY, RewritePolicy.ALL_PARTITIONS.name()); + try { - DriverUtils.runOnDriver(context.getConf(), sessionState, compactionQuery); - LOG.info("Completed compaction for table {}", compactTableName); - } catch (HiveException e) { - LOG.error("Error doing query based {} compaction", RewritePolicy.ALL_PARTITIONS.name(), e); - throw new RuntimeException(e); + if (partSpec == null) { + HiveConf.setVar(context.getConf(), ConfVars.REWRITE_POLICY, RewritePolicy.ALL_PARTITIONS.name()); + + compactionQuery = String.format("insert overwrite table %s select * from %<s", compactTableName); + + executeHiveQuery(context, compactionQuery, sessionState, + String.format("compacting table %s", compactTableName)); + } else { + Table table = IcebergTableUtil.getTable(context.getConf(), context.getTable()); + AlterTableSnapshotRefSpec.DropSnapshotRefSpec dropBranchSpec = + new AlterTableSnapshotRefSpec.DropSnapshotRefSpec(branchName, true); + IcebergBranchExec.dropBranch(table, dropBranchSpec); Review Comment: branch should be unique, you should be able execute number of compaction requests in parallel -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org