rdblue commented on a change in pull request #153: [Baseline] Apply baseline
linting to iceberg-core
URL: https://github.com/apache/incubator-iceberg/pull/153#discussion_r274062018
##########
File path: core/src/main/java/org/apache/iceberg/MergingSnapshotUpdate.java
##########
@@ -355,80 +351,110 @@ private ManifestFile filterManifest(Expression
deleteExpression,
// this assumes that the manifest doesn't have files to remove and
streams through the
// manifest without copying data. if a manifest does have a file to
remove, this will break
// out of the loop and move on to filtering the manifest.
- boolean hasDeletedFiles = false;
- for (ManifestEntry entry : reader.entries()) {
- DataFile file = entry.file();
- boolean fileDelete =
(deletePaths.contains(pathWrapper.set(file.path())) ||
- dropPartitions.contains(partitionWrapper.set(file.partition())));
- if (fileDelete || inclusive.eval(file.partition())) {
- ValidationException.check(
- fileDelete || strict.eval(file.partition()) ||
metricsEvaluator.eval(file),
- "Cannot delete file where some, but not all, rows match filter
%s: %s",
- this.deleteExpression, file.path());
-
- hasDeletedFiles = true;
- if (failAnyDelete) {
- throw new
DeleteException(writeSpec().partitionToPath(file.partition()));
- }
- break; // as soon as a deleted file is detected, stop scanning
- }
- }
+ boolean hasDeletedFiles =
+ manifestHasDeletedFiles(metricsEvaluator, reader, inclusive, strict,
pathWrapper, partitionWrapper);
if (!hasDeletedFiles) {
filteredManifests.put(manifest, manifest);
return manifest;
}
- // when this point is reached, there is at least one file that will be
deleted in the
- // manifest. produce a copy of the manifest with all deleted files
removed.
- List<DataFile> deletedFiles = Lists.newArrayList();
- Set<CharSequenceWrapper> deletedPaths = Sets.newHashSet();
- OutputFile filteredCopy = manifestPath(manifestCount.getAndIncrement());
- ManifestWriter writer = new ManifestWriter(reader.spec(), filteredCopy,
snapshotId());
- try {
- for (ManifestEntry entry : reader.entries()) {
- DataFile file = entry.file();
- boolean fileDelete =
(deletePaths.contains(pathWrapper.set(file.path())) ||
- dropPartitions.contains(partitionWrapper.set(file.partition())));
- if (entry.status() != Status.DELETED) {
- if (fileDelete || inclusive.eval(file.partition())) {
- ValidationException.check(
- fileDelete || strict.eval(file.partition()) ||
metricsEvaluator.eval(file),
- "Cannot delete file where some, but not all, rows match
filter %s: %s",
- this.deleteExpression, file.path());
-
- writer.delete(entry);
-
- CharSequenceWrapper wrapper =
CharSequenceWrapper.wrap(entry.file().path());
- if (deletedPaths.contains(wrapper)) {
- LOG.warn("Deleting a duplicate path from manifest {}: {}",
- manifest.path(), wrapper.get());
- summaryBuilder.incrementDuplicateDeletes();
- } else {
- // only add the file to deletes if it is a new delete
- // this keeps the snapshot summary accurate for non-duplicate
data
- deletedFiles.add(entry.file().copy());
- }
- deletedPaths.add(wrapper);
+ return filterManifestsWithDeletedFiles(
+ metricsEvaluator,
+ manifest,
+ reader,
+ inclusive,
+ strict,
+ pathWrapper,
+ partitionWrapper);
+ }
+ }
+ private boolean manifestHasDeletedFiles(
Review comment:
I thought the convention we agreed on was multiple arguments per line until
they no longer fit.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]