This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit d269d85fc28fd1fc711eee9800a82e5e9458ce91 Author: Benoit TELLIER <btell...@linagora.com> AuthorDate: Mon Dec 2 22:58:32 2024 +0100 [ENHANCEMENT] Make GC implementable by other GC aware blobId implementations --- .../server/blob/deduplication/BlobGCTask.java | 11 +++++---- .../server/blob/deduplication/BlobGCTaskDTO.java | 3 ++- .../blob/deduplication/BloomFilterGCAlgorithm.java | 19 +++++++++------ .../server/blob/deduplication/GenerationAware.java | 27 ++++++++++++++++++++++ .../blob/deduplication/GenerationAwareBlobId.java | 5 ++-- .../blobstore/BlobDeduplicationGCModule.java | 5 ++-- .../apache/james/webadmin/routes/BlobRoutes.java | 5 ++-- 7 files changed, 55 insertions(+), 20 deletions(-) diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTask.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTask.java index abfcf70a8b..775f5c5061 100644 --- a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTask.java +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTask.java @@ -24,6 +24,7 @@ import java.time.Instant; import java.util.Optional; import java.util.Set; +import org.apache.james.blob.api.BlobId; import org.apache.james.blob.api.BlobReferenceSource; import org.apache.james.blob.api.BlobStoreDAO; import org.apache.james.blob.api.BucketName; @@ -150,7 +151,7 @@ public class BlobGCTask implements Task { @FunctionalInterface public interface RequireGenerationAwareBlobIdFactory { - RequireGenerationAwareBlobIdConfiguration generationAwareBlobIdFactory(GenerationAwareBlobId.Factory generationAwareBlobIdFactory); + RequireGenerationAwareBlobIdConfiguration generationAwareBlobIdFactory(BlobId.Factory generationAwareBlobIdFactory); } @FunctionalInterface @@ -159,7 +160,7 @@ public class BlobGCTask implements Task { } private final BlobStoreDAO blobStoreDAO; - private final GenerationAwareBlobId.Factory generationAwareBlobIdFactory; + private final BlobId.Factory generationAwareBlobIdFactory; private final GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration; private final Set<BlobReferenceSource> blobReferenceSources; private final Clock clock; @@ -168,7 +169,7 @@ public class BlobGCTask implements Task { private final double associatedProbability; private Optional<Integer> deletionWindowSize; - public Builder(BlobStoreDAO blobStoreDAO, GenerationAwareBlobId.Factory generationAwareBlobIdFactory, + public Builder(BlobStoreDAO blobStoreDAO, BlobId.Factory generationAwareBlobIdFactory, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, Set<BlobReferenceSource> blobReferenceSources, Clock clock, BucketName bucketName, int expectedBlobCount, double associatedProbability) { @@ -223,7 +224,7 @@ public class BlobGCTask implements Task { private final BlobStoreDAO blobStoreDAO; - private final GenerationAwareBlobId.Factory generationAwareBlobIdFactory; + private final BlobId.Factory generationAwareBlobIdFactory; private final GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration; private final Set<BlobReferenceSource> blobReferenceSources; private final Clock clock; @@ -235,7 +236,7 @@ public class BlobGCTask implements Task { public BlobGCTask(BlobStoreDAO blobStoreDAO, - GenerationAwareBlobId.Factory generationAwareBlobIdFactory, + BlobId.Factory generationAwareBlobIdFactory, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, Set<BlobReferenceSource> blobReferenceSources, BucketName bucketName, diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTaskDTO.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTaskDTO.java index 2aa7c724fb..bd9f473cc8 100644 --- a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTaskDTO.java +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BlobGCTaskDTO.java @@ -23,6 +23,7 @@ import java.time.Clock; import java.util.Optional; import java.util.Set; +import org.apache.james.blob.api.BlobId; import org.apache.james.blob.api.BlobReferenceSource; import org.apache.james.blob.api.BlobStoreDAO; import org.apache.james.blob.api.BucketName; @@ -53,7 +54,7 @@ public class BlobGCTaskDTO implements TaskDTO { } public static TaskDTOModule<BlobGCTask, BlobGCTaskDTO> module(BlobStoreDAO blobStoreDAO, - GenerationAwareBlobId.Factory generationAwareBlobIdFactory, + BlobId.Factory generationAwareBlobIdFactory, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, Set<BlobReferenceSource> blobReferenceSources, Clock clock) { diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BloomFilterGCAlgorithm.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BloomFilterGCAlgorithm.java index 243a3f15d9..67fa255aec 100644 --- a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BloomFilterGCAlgorithm.java +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/BloomFilterGCAlgorithm.java @@ -30,6 +30,7 @@ import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; +import org.apache.james.blob.api.BlobId; import org.apache.james.blob.api.BlobReferenceSource; import org.apache.james.blob.api.BlobStoreDAO; import org.apache.james.blob.api.BucketName; @@ -50,7 +51,6 @@ public class BloomFilterGCAlgorithm { private static final Logger LOGGER = LoggerFactory.getLogger(BloomFilterGCAlgorithm.class); private static final Funnel<CharSequence> BLOOM_FILTER_FUNNEL = Funnels.stringFunnel(StandardCharsets.US_ASCII); - private static final int DELETION_BATCH_SIZE = 1000; public static class Context { @@ -242,7 +242,7 @@ public class BloomFilterGCAlgorithm { private final BlobReferenceSource referenceSource; private final BlobStoreDAO blobStoreDAO; - private final GenerationAwareBlobId.Factory generationAwareBlobIdFactory; + private final BlobId.Factory blobIdFactory; private final GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration; private final Instant now; @@ -251,12 +251,12 @@ public class BloomFilterGCAlgorithm { public BloomFilterGCAlgorithm(BlobReferenceSource referenceSource, BlobStoreDAO blobStoreDAO, - GenerationAwareBlobId.Factory generationAwareBlobIdFactory, + BlobId.Factory generationAwareBlobIdFactory, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, Clock clock) { this.referenceSource = referenceSource; this.blobStoreDAO = blobStoreDAO; - this.generationAwareBlobIdFactory = generationAwareBlobIdFactory; + this.blobIdFactory = generationAwareBlobIdFactory; this.generationAwareBlobIdConfiguration = generationAwareBlobIdConfiguration; this.salt = UUID.randomUUID().toString(); this.now = clock.instant(); @@ -274,8 +274,13 @@ public class BloomFilterGCAlgorithm { private Mono<Result> gc(BloomFilter<CharSequence> bloomFilter, BucketName bucketName, Context context, int deletionWindowSize) { return Flux.from(blobStoreDAO.listBlobs(bucketName)) .doOnNext(blobId -> context.incrementBlobCount()) - .flatMap(blobId -> Mono.fromCallable(() -> generationAwareBlobIdFactory.parse(blobId.asString()))) - .filter(blobId -> !blobId.inActiveGeneration(generationAwareBlobIdConfiguration, now)) + .flatMap(blobId -> Mono.fromCallable(() -> blobIdFactory.parse(blobId.asString()))) + .filter(blobId -> { + if (blobId instanceof GenerationAware generationAware) { + return !generationAware.inActiveGeneration(generationAwareBlobIdConfiguration, now); + } + return false; + }) .filter(blobId -> !bloomFilter.mightContain(salt + blobId.asString())) .window(deletionWindowSize) .flatMap(blobIdFlux -> handlePagedDeletion(bucketName, context, blobIdFlux), DEFAULT_CONCURRENCY) @@ -283,7 +288,7 @@ public class BloomFilterGCAlgorithm { .switchIfEmpty(Mono.just(Result.COMPLETED)); } - private Mono<Result> handlePagedDeletion(BucketName bucketName, Context context, Flux<GenerationAwareBlobId> blobIdFlux) { + private Mono<Result> handlePagedDeletion(BucketName bucketName, Context context, Flux<BlobId> blobIdFlux) { return blobIdFlux.collectList() .flatMap(orphanBlobIds -> Mono.from(blobStoreDAO.delete(bucketName, (Collection) orphanBlobIds)) .then(Mono.fromCallable(() -> { diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAware.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAware.java new file mode 100644 index 0000000000..1145f1d2da --- /dev/null +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAware.java @@ -0,0 +1,27 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.server.blob.deduplication; + +import java.time.Instant; + +@FunctionalInterface +public interface GenerationAware { + boolean inActiveGeneration(GenerationAwareBlobId.Configuration configuration, Instant now); +} diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAwareBlobId.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAwareBlobId.java index 5fe4c48b5f..67fd492b16 100644 --- a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAwareBlobId.java +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/GenerationAwareBlobId.java @@ -33,7 +33,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; -public class GenerationAwareBlobId implements BlobId { +public class GenerationAwareBlobId implements BlobId, GenerationAware { public static class Configuration { public static final Duration DEFAULT_DURATION = Duration.ofDays(30); @@ -157,7 +157,7 @@ public class GenerationAwareBlobId implements BlobId { public static final int NO_FAMILY = 0; public static final int NO_GENERATION = 0; - private static long computeGeneration(Configuration configuration, Instant now) { + static long computeGeneration(Configuration configuration, Instant now) { return now.getEpochSecond() / configuration.getDuration().toSeconds(); } @@ -182,6 +182,7 @@ public class GenerationAwareBlobId implements BlobId { return family + "_" + generation + "_" + delegate.asString(); } + @Override public boolean inActiveGeneration(Configuration configuration, Instant now) { return configuration.getFamily() == this.family && generation + 1 >= computeGeneration(configuration, now); diff --git a/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java b/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java index 4c8dff091b..07c23818e0 100644 --- a/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java +++ b/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java @@ -58,7 +58,6 @@ public class BlobDeduplicationGCModule extends AbstractModule { @Override protected void configure() { bind(PlainBlobId.Factory.class).in(Scopes.SINGLETON); - bind(BlobId.Factory.class).to(GenerationAwareBlobId.Factory.class); bind(MetricableBlobStore.class).in(Scopes.SINGLETON); bind(BlobStore.class).to(MetricableBlobStore.class); @@ -68,7 +67,7 @@ public class BlobDeduplicationGCModule extends AbstractModule { @Singleton @Provides - public GenerationAwareBlobId.Factory generationAwareBlobIdFactory(Clock clock, PlainBlobId.Factory delegate, GenerationAwareBlobId.Configuration configuration) { + public BlobId.Factory generationAwareBlobIdFactory(Clock clock, PlainBlobId.Factory delegate, GenerationAwareBlobId.Configuration configuration) { return new GenerationAwareBlobId.Factory(clock, delegate, configuration); } @@ -85,7 +84,7 @@ public class BlobDeduplicationGCModule extends AbstractModule { @ProvidesIntoSet public TaskDTOModule<? extends Task, ? extends TaskDTO> blobGCTask(BlobStoreDAO blobStoreDAO, - GenerationAwareBlobId.Factory generationAwareBlobIdFactory, + BlobId.Factory generationAwareBlobIdFactory, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, Set<BlobReferenceSource> blobReferenceSources, Clock clock) { diff --git a/server/protocols/webadmin/webadmin-data/src/main/java/org/apache/james/webadmin/routes/BlobRoutes.java b/server/protocols/webadmin/webadmin-data/src/main/java/org/apache/james/webadmin/routes/BlobRoutes.java index 3aee001370..ff37a2aa71 100644 --- a/server/protocols/webadmin/webadmin-data/src/main/java/org/apache/james/webadmin/routes/BlobRoutes.java +++ b/server/protocols/webadmin/webadmin-data/src/main/java/org/apache/james/webadmin/routes/BlobRoutes.java @@ -26,6 +26,7 @@ import java.util.Set; import jakarta.inject.Inject; import jakarta.inject.Named; +import org.apache.james.blob.api.BlobId; import org.apache.james.blob.api.BlobReferenceSource; import org.apache.james.blob.api.BlobStore; import org.apache.james.blob.api.BlobStoreDAO; @@ -56,7 +57,7 @@ public class BlobRoutes implements Routes { private final BucketName bucketName; private final Set<BlobReferenceSource> blobReferenceSources; private final GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration; - private final GenerationAwareBlobId.Factory generationAwareBlobIdFactory; + private final BlobId.Factory generationAwareBlobIdFactory; @Inject public BlobRoutes(TaskManager taskManager, @@ -66,7 +67,7 @@ public class BlobRoutes implements Routes { @Named(BlobStore.DEFAULT_BUCKET_NAME_QUALIFIER) BucketName defaultBucketName, Set<BlobReferenceSource> blobReferenceSources, GenerationAwareBlobId.Configuration generationAwareBlobIdConfiguration, - GenerationAwareBlobId.Factory generationAwareBlobIdFactory) { + BlobId.Factory generationAwareBlobIdFactory) { this.taskManager = taskManager; this.jsonTransformer = jsonTransformer; this.clock = clock; --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org