yuzelin commented on code in PR #1443:
URL: https://github.com/apache/incubator-paimon/pull/1443#discussion_r1252004793
##########
paimon-core/src/main/java/org/apache/paimon/operation/SnapshotDeletion.java:
##########
@@ -224,46 +92,53 @@ void doDeleteExpiredDataFiles(
(path, pair) -> {
ManifestEntry entry = pair.getLeft();
// check whether we should skip the data file
- if (!dataFileSkipper.test(entry)) {
+ if (!skipper.test(entry)) {
// delete data files
fileIO.deleteQuietly(path);
pair.getRight().forEach(fileIO::deleteQuietly);
- // record changed buckets
- deletionBuckets
- .computeIfAbsent(entry.partition(), p -> new
HashSet<>())
- .add(entry.bucket());
+
+ recordDeletionBuckets(entry);
}
});
}
- private Iterable<ManifestEntry> getManifestEntriesFromManifestList(String
manifestListName) {
- Queue<String> files =
- tryReadManifestList(manifestListName).stream()
- .map(ManifestFileMeta::fileName)
- .collect(Collectors.toCollection(LinkedList::new));
- return Iterables.concat(
- (Iterable<Iterable<ManifestEntry>>)
- () ->
- new Iterator<Iterable<ManifestEntry>>() {
- @Override
- public boolean hasNext() {
- return files.size() > 0;
- }
+ /**
+ * Delete added file in the manifest list files. Added files marked as
"ADD" in manifests.
+ *
+ * @param manifestListName name of manifest list
+ */
+ public void deleteAddedDataFiles(String manifestListName) {
+ for (ManifestEntry entry : tryReadManifestEntries(manifestListName)) {
+ if (entry.kind() == FileKind.ADD) {
+ fileIO.deleteQuietly(
+ new Path(
+ pathFactory.bucketPath(entry.partition(),
entry.bucket()),
+ entry.file().fileName()));
+ recordDeletionBuckets(entry);
+ }
+ }
+ }
- @Override
- public Iterable<ManifestEntry> next() {
- String file = files.poll();
- try {
- return manifestFile.read(file);
- } catch (Exception e) {
- LOG.warn("Failed to read manifest
file " + file, e);
- return Collections.emptyList();
- }
- }
- });
+ private Iterable<ManifestEntry> tryReadManifestEntries(String
manifestListName) {
+ return readManifestEntries(tryReadManifestList(manifestListName));
}
- public Set<String> collectManifestSkippingSet(Snapshot snapshot) {
- return DeletionUtils.collectManifestSkippingSet(snapshot,
manifestList, indexFileHandler);
+ /** Used to record which tag is cached in tagged snapshots list. */
+ private int cachedTagIndex = -1;
+
+ /** Used to cache data files used by current tag. */
+ private final Map<BinaryRow, Map<Integer, Set<String>>> cachedTagDataFiles
= new HashMap<>();
+
+ public Predicate<ManifestEntry> dataFileSkipper(
Review Comment:
It will use some methods in FileDeletionBase, so I think it's not necessary
to move them out.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]