difin commented on code in PR #5792:
URL: https://github.com/apache/hive/pull/5792#discussion_r2096690629
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/compaction/IcebergQueryCompactor.java:
##########
@@ -96,31 +127,46 @@ public boolean run(CompactorContext context) throws
IOException, HiveException,
throw new HiveException(ErrorMsg.COMPACTION_NO_PARTITION);
}
} else {
- long partitionHash = IcebergTableUtil.getPartitionHash(icebergTable,
partSpec);
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_CONVERT_JOIN, false);
+ conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false);
HiveConf.setVar(conf, ConfVars.REWRITE_POLICY,
RewritePolicy.PARTITION.name());
- conf.set(IcebergCompactionService.PARTITION_PATH, new
Path(partSpec).toString());
+ conf.set(IcebergCompactionService.PARTITION_PATH, new
Path(ci.partName).toString());
- Map<String, String> partSpecMap = new LinkedHashMap<>();
- Warehouse.makeSpecFromName(partSpecMap, new Path(partSpec), null);
+ int specId = IcebergTableUtil.getPartitionSpecId(icebergTable,
ci.partName);
+ String partitionPredicate = buildPartitionPredicate(ci, icebergTable);
- compactionQuery = String.format("insert overwrite table %1$s select *
from %1$s where %2$s=%3$d " +
- "and %4$s is not null %5$s %6$s", compactTableName,
VirtualColumn.PARTITION_HASH.getName(), partitionHash,
- VirtualColumn.FILE_PATH.getName(), fileSizePredicate == null ? "" :
"and " + fileSizePredicate, orderBy);
+ compactionQuery = String.format("INSERT OVERWRITE TABLE %1$s SELECT *
FROM %1$s WHERE %2$s IN " +
+ "(SELECT FILE_PATH FROM %1$s.FILES WHERE %3$s AND SPEC_ID = %7$d)
AND %6$s = %7$d %4$s %5$s",
+ compactTableName, VirtualColumn.FILE_PATH.getName(), partitionPredicate,
+ fileSizePredicate == null ? "" : "AND " + fileSizePredicate, orderBy,
+ VirtualColumn.PARTITION_SPEC_ID.getName(), specId);
}
+ return compactionQuery;
+ }
- SessionState sessionState = setupQueryCompactionSession(conf, ci,
tblProperties);
- String compactionTarget = "table " +
HiveUtils.unparseIdentifier(compactTableName) +
- (partSpec != null ? ", partition " +
HiveUtils.unparseIdentifier(partSpec) : "");
+ private String buildPartitionPredicate(CompactionInfo ci, Table
icebergTable) {
+ Map<String, String> partSpecMap = new LinkedHashMap<>();
+ Warehouse.makeSpecFromName(partSpecMap, new Path(ci.partName), null);
- try {
- DriverUtils.runOnDriver(conf, sessionState, compactionQuery);
- LOG.info("Completed compaction for {}", compactionTarget);
- return true;
- } catch (HiveException e) {
- LOG.error("Failed compacting {}", compactionTarget, e);
- throw e;
- } finally {
- sessionState.setCompaction(false);
- }
+ Map<String, PartitionField> partitionFieldMap =
IcebergTableUtil.getPartitionFields(icebergTable, false)
+ .stream().collect(Collectors.toMap(PartitionField::name,
Function.identity()));
+
+ Types.StructType partitionType = Partitioning.partitionType(icebergTable);
Review Comment:
As answered in previous comment, `getPartitionSpec()` doesn't return all
partition fields from all specs.
I changed the method `getPartitionSpecId` to return `PartitionSpec` to make
it more generic for reuse, but didn't replace `partitionType` and `fields`
parts.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]