rdblue commented on code in PR #4902:
URL: https://github.com/apache/iceberg/pull/4902#discussion_r915066958
##########
spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/procedures/RewriteDataFilesProcedure.java:
##########
@@ -140,36 +141,55 @@ private RewriteDataFiles checkAndApplyOptions(InternalRow
args, RewriteDataFiles
return action.options(options);
}
- private RewriteDataFiles checkAndApplyStrategy(RewriteDataFiles action,
String strategy, SortOrder sortOrder) {
+ private RewriteDataFiles checkAndApplyStrategy(
+ RewriteDataFiles action,
+ String strategy,
+ String sortOrderString,
+ Schema schema) {
+ List<ExtendedParser.RawOrderField> zOrderFields;
+ List<ExtendedParser.RawOrderField> sortOrderFields;
+ if (sortOrderString != null) {
+ List<ExtendedParser.RawOrderField> rawOrderFields =
ExtendedParser.parseSortOrder(spark(), sortOrderString);
+ Map<Boolean, List<ExtendedParser.RawOrderField>> partitions =
rawOrderFields.stream().collect(
+ Collectors.partitioningBy(field -> field.term() instanceof
Zorder));
+ zOrderFields = partitions.get(true);
+ sortOrderFields = partitions.get(false);
+
+ if (!zOrderFields.isEmpty() && !sortOrderFields.isEmpty()) {
+ // TODO: we need to allow this in future when SparkAction has handling
for this.
+ throw new IllegalArgumentException("Both SortOrder and Zorder is
configured: " + sortOrderString);
+ }
+ } else {
+ zOrderFields = Collections.emptyList();
+ sortOrderFields = Collections.emptyList();
+ }
+
// caller of this function ensures that between strategy and sortOrder, at
least one of them is not null.
if (strategy == null || strategy.equalsIgnoreCase("sort")) {
- return action.sort(sortOrder);
+ if (!zOrderFields.isEmpty()) {
+ String[] columnNames = zOrderFields.stream().flatMap(
+ field -> ((Zorder)
field.term()).refs().stream().map(NamedReference::name)).toArray(String[]::new);
+ return action.zOrder(columnNames);
+ }
+ return action.sort(buildSortOrder(sortOrderFields, schema));
}
if (strategy.equalsIgnoreCase("binpack")) {
RewriteDataFiles rewriteDataFiles = action.binPack();
- if (sortOrder != null) {
+ if (sortOrderString != null) {
// calling below method to throw the error as user has set both
binpack strategy and sort order
- return rewriteDataFiles.sort(sortOrder);
+ return rewriteDataFiles.sort(buildSortOrder(sortOrderFields, schema));
}
return rewriteDataFiles;
} else {
- throw new IllegalArgumentException("unsupported strategy: " + strategy +
". Only binpack,sort is supported");
+ throw new IllegalArgumentException(
+ "unsupported strategy: " + strategy + ". Only binpack or sort is
supported");
Review Comment:
Is this change needed?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]