This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 48c3a40b29a4d0c9600b4cfba8711e769a2441a0 Author: Benoit Tellier <[email protected]> AuthorDate: Wed Feb 16 14:27:06 2022 +0700 JAMES-3713 Enable rules caching for DLP Querying DLP rules on top of Cassandra relies on Event sourcing, involves reading a potentially large event stream and involves some SERIAL reads (LightWeight transactions) for each processed emails. --- pom.xml | 8 +++ .../apache/james/transport/matchers/dlp/Dlp.java | 60 ++++++++++++++-- .../transport/matchers/dlp/DlpRulesLoader.java | 38 ++++++++++ .../james/transport/matchers/dlp/DlpTest.java | 83 +++++++++++++++------- 4 files changed, 160 insertions(+), 29 deletions(-) diff --git a/pom.xml b/pom.xml index e123774..43276ff 100644 --- a/pom.xml +++ b/pom.xml @@ -2938,6 +2938,8 @@ <artifactId>scala-maven-plugin</artifactId> <version>4.5.6</version> <configuration> + <fork>false</fork> + <recompileMode>incremental</recompileMode> <source>${target.jdk}</source> <target>${target.jdk}</target> <args> @@ -2948,6 +2950,12 @@ <arg>-explaintypes</arg> <arg>-Ywarn-unused</arg> </args> + <jvmArgs> + <jvmArg>-Xss128m</jvmArg> + <jvmArg>-Xms4g</jvmArg> + <jvmArg>-Xmx4g</jvmArg> + <jvmArg>-XX:MaxMetaspaceSize=2g</jvmArg> + </jvmArgs> <compilerPlugins> <compilerPlugin> <groupId>org.scalameta</groupId> diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java index 945b170..a9b3e8f 100644 --- a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java @@ -19,14 +19,18 @@ package org.apache.james.transport.matchers.dlp; +import java.time.temporal.ChronoUnit; import java.util.Collection; import java.util.Optional; import javax.inject.Inject; +import javax.mail.MessagingException; import org.apache.james.core.MailAddress; import org.apache.james.dlp.api.DLPConfigurationItem; import org.apache.james.dlp.api.DLPConfigurationStore; +import org.apache.james.metrics.api.GaugeRegistry; +import org.apache.james.util.DurationParser; import org.apache.mailet.Attribute; import org.apache.mailet.AttributeName; import org.apache.mailet.AttributeValue; @@ -36,20 +40,66 @@ import org.apache.mailet.base.GenericMatcher; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +/** + * Enable evaluation of incoming emails against DLP rules (Data Leak Prevention) attached to the sender domains. + * + * Example: + * + * <mailet match="DLP" class="ToRepository"> + * <repositoryPath>/var/mail/quarantine</repositoryPath> + * </mailet> + * + * Rules can be administered via webAdmin, cf: https://james.apache.org/server/manage-webadmin.html#Administrating_DLP_Configuration + * + * Only available on top of Memory and Cassandra storages. + * + * Additionally a cache can be added to reduce queries done to the underlying database. + * + * Example: + * + * <mailet match="DLP=cache:60s" class="ToRepository"> + * <repositoryPath>/var/mail/quarantine</repositoryPath> + * </mailet> + * + * Will query the DLP rules for a given domain only every 60 seconds. + * + * Please note that querying DLP rules on top of Cassandra relies on Event sourcing, involves reading a potentially + * large event stream and involves some SERIAL reads (LightWeight transactions) for each processed emails. + * + * Efficiency of the cache can be tracked with the following metrics: + * + * - dlp.cache.hitRate + * - dlp.cache.missCount + * - dlp.cache.hitCount + * - dlp.cache.size + */ public class Dlp extends GenericMatcher { private static final AttributeName DLP_MATCHED_RULE = AttributeName.of("DlpMatchedRule"); + public static final String CACHE_PREFIX = "cache:"; - private final DlpRulesLoader rulesLoader; + private final DlpRulesLoader backendRulesLoader; + private final GaugeRegistry gaugeRegistry; + private DlpRulesLoader rulesLoader; @VisibleForTesting - Dlp(DlpRulesLoader rulesLoader) { - this.rulesLoader = rulesLoader; + Dlp(DlpRulesLoader rulesLoader, GaugeRegistry gaugeRegistry) { + this.backendRulesLoader = rulesLoader; + this.gaugeRegistry = gaugeRegistry; + this.rulesLoader = backendRulesLoader; } @Inject - public Dlp(DLPConfigurationStore configurationStore) { - this(new DlpRulesLoader.Impl(configurationStore)); + public Dlp(DLPConfigurationStore configurationStore, GaugeRegistry gaugeRegistry) { + this(new DlpRulesLoader.Impl(configurationStore), gaugeRegistry); + } + + @Override + public void init() throws MessagingException { + if (getCondition() != null && getCondition().startsWith(CACHE_PREFIX)) { + rulesLoader = new DlpRulesLoader.Caching(backendRulesLoader, gaugeRegistry, + DurationParser.parse(getCondition().substring(CACHE_PREFIX.length()), ChronoUnit.SECONDS)); + } } @Override diff --git a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java index 3d1f95f..00aa487 100644 --- a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java +++ b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java @@ -19,11 +19,19 @@ package org.apache.james.transport.matchers.dlp; +import java.time.Duration; +import java.util.concurrent.ExecutionException; + import javax.inject.Inject; import org.apache.james.core.Domain; import org.apache.james.dlp.api.DLPConfigurationStore; import org.apache.james.dlp.api.DLPRules; +import org.apache.james.metrics.api.GaugeRegistry; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import reactor.core.publisher.Mono; @@ -54,4 +62,34 @@ public interface DlpRulesLoader { return builder.build(); } } + + class Caching implements DlpRulesLoader { + private final LoadingCache<Domain, DlpDomainRules> cache; + + public Caching(DlpRulesLoader wrapped, GaugeRegistry gaugeRegistry, Duration cacheDuration) { + cache = CacheBuilder.newBuilder() + .expireAfterWrite(cacheDuration) + .recordStats() + .build(new CacheLoader<>() { + @Override + public DlpDomainRules load(Domain domain) { + return wrapped.load(domain); + } + }); + + gaugeRegistry.register("dlp.cache.hitRate", () -> cache.stats().hitRate()); + gaugeRegistry.register("dlp.cache.missCount", () -> cache.stats().missCount()); + gaugeRegistry.register("dlp.cache.hitCount", () -> cache.stats().hitCount()); + gaugeRegistry.register("dlp.cache.size", cache::size); + } + + @Override + public DlpDomainRules load(Domain domain) { + try { + return cache.get(domain); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + } } diff --git a/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java b/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java index 0f3669b..b3dca60 100644 --- a/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java +++ b/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java @@ -37,10 +37,13 @@ import org.apache.james.core.Domain; import org.apache.james.core.MailAddress; import org.apache.james.core.builder.MimeMessageBuilder; import org.apache.james.dlp.api.DLPConfigurationItem.Id; +import org.apache.james.metrics.api.NoopGaugeRegistry; import org.apache.mailet.Attribute; import org.apache.mailet.AttributeName; import org.apache.mailet.AttributeValue; import org.apache.mailet.base.test.FakeMail; +import org.apache.mailet.base.test.FakeMatcherConfig; +import org.assertj.core.api.SoftAssertions; import org.junit.jupiter.api.Test; class DlpTest { @@ -58,7 +61,7 @@ class DlpTest { @Test void matchShouldReturnEmptyWhenNoRecipient() throws Exception { - Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS); + Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder().name("name").sender(RECIPIENT1).build(); @@ -67,7 +70,7 @@ class DlpTest { @Test void matchShouldReturnEmptyWhenNoSender() throws Exception { - Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS); + Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder().name("name").recipient(RECIPIENT1).build(); @@ -76,14 +79,14 @@ class DlpTest { @Test void matchShouldThrowOnNullMail() { - Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS); + Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry()); assertThatThrownBy(() -> dlp.match(null)).isInstanceOf(NullPointerException.class); } @Test void matchShouldReturnEmptyWhenNoRuleMatch() throws Exception { - Dlp dlp = new Dlp(MATCH_NOTHING_FOR_ALL_DOMAINS); + Dlp dlp = new Dlp(MATCH_NOTHING_FOR_ALL_DOMAINS, new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder() .name("name") @@ -100,7 +103,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().senderRule(Id.of("match sender"), Pattern.compile(ANY_AT_JAMES.asString())).build())); + DlpDomainRules.builder().senderRule(Id.of("match sender"), Pattern.compile(ANY_AT_JAMES.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder().name("name").sender(ANY_AT_JAMES).recipient(RECIPIENT1).build(); @@ -112,7 +115,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().recipientRule(Id.of("match all recipient"), Pattern.compile(".*")).build())); + DlpDomainRules.builder().recipientRule(Id.of("match all recipient"), Pattern.compile(".*")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder().name("name").sender(MailAddress.nullSender()).recipient(RECIPIENT1).build(); @@ -124,7 +127,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().senderRule(Id.of("match sender"), Pattern.compile(ANY_AT_JAMES.asString())).build())); + DlpDomainRules.builder().senderRule(Id.of("match sender"), Pattern.compile(ANY_AT_JAMES.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -144,7 +147,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT1.asString())).build())); + DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT1.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder() .name("name") @@ -156,12 +159,44 @@ class DlpTest { assertThat(dlp.match(mail)).contains(RECIPIENT1, RECIPIENT2); } + + @Test + void matchShouldReturnRecipientsWhenCached() throws Exception { + Dlp dlp = new Dlp( + asRulesLoaderFor( + JAMES_APACHE_ORG_DOMAIN, + DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT1.asString())).build()), new NoopGaugeRegistry()); + + dlp.init(FakeMatcherConfig.builder() + .condition("cache:60s") + .matcherName("DLP") + .build()); + + FakeMail mail1 = FakeMail.builder() + .name("name") + .sender(ANY_AT_JAMES) + .recipient(RECIPIENT1) + .recipient(RECIPIENT2) + .build(); + FakeMail mail2 = FakeMail.builder() + .name("name") + .sender(ANY_AT_JAMES) + .recipient(RECIPIENT1) + .recipient(RECIPIENT2) + .build(); + + SoftAssertions.assertSoftly(softly -> { + softly.assertThat(dlp.match(mail1)).contains(RECIPIENT1, RECIPIENT2); + softly.assertThat(dlp.match(mail2)).contains(RECIPIENT1, RECIPIENT2); + }); + } + @Test void matchShouldReturnRecipientsWhenToHeaderMatches() throws Exception { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build())); + DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -181,7 +216,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build())); + DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -201,7 +236,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build())); + DlpDomainRules.builder().recipientRule(Id.of("match recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -221,7 +256,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().contentRule(Id.of("match subject"), Pattern.compile("pony")).build())); + DlpDomainRules.builder().contentRule(Id.of("match subject"), Pattern.compile("pony")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -241,7 +276,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build())); + DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -262,7 +297,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build())); + DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -282,7 +317,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build())); + DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -307,7 +342,7 @@ class DlpTest { Dlp dlp = new Dlp( asRulesLoaderFor( JAMES_APACHE_ORG_DOMAIN, - DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build())); + DlpDomainRules.builder().contentRule(Id.of("match content"), Pattern.compile("horse")).build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -339,7 +374,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .senderRule(Id.of("match content"), Pattern.compile(RECIPIENT2.asString())) - .build())); + .build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -372,7 +407,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .recipientRule(Id.of("match content"), Pattern.compile(RECIPIENT2.asString())) - .build())); + .build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -405,7 +440,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .contentRule(Id.of("match content"), Pattern.compile("poné")) - .build())); + .build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -429,7 +464,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .contentRule(Id.of("match content"), Pattern.compile("poné")) - .build())); + .build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail .builder() @@ -453,7 +488,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .recipientRule(Id.of("id1"), Pattern.compile(RECIPIENT1.asString())) - .build())); + .build()), new NoopGaugeRegistry()); MimeMessageBuilder meaninglessText = MimeMessageBuilder .mimeMessageBuilder() @@ -479,7 +514,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .recipientRule(Id.of("id1"), Pattern.compile("Benoît")) - .build())); + .build()), new NoopGaugeRegistry()); MimeMessageBuilder meaninglessText = MimeMessageBuilder .mimeMessageBuilder() @@ -505,7 +540,7 @@ class DlpTest { JAMES_APACHE_ORG_DOMAIN, DlpDomainRules.builder() .senderRule(Id.of("id1"), Pattern.compile("Benoît")) - .build())); + .build()), new NoopGaugeRegistry()); MimeMessageBuilder meaninglessText = MimeMessageBuilder .mimeMessageBuilder() @@ -533,7 +568,7 @@ class DlpTest { DlpDomainRules.builder() .recipientRule(Id.of("should not match recipient"), Pattern.compile(RECIPIENT3.asString())) .senderRule(Id.of(attributeValue), Pattern.compile(JAMES_APACHE_ORG)) - .build())); + .build()), new NoopGaugeRegistry()); FakeMail mail = FakeMail.builder() .name("name") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
