This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 48c3a40b29a4d0c9600b4cfba8711e769a2441a0
Author: Benoit Tellier <[email protected]>
AuthorDate: Wed Feb 16 14:27:06 2022 +0700

    JAMES-3713 Enable rules caching for DLP
    
    Querying DLP rules on top of Cassandra relies on Event
    sourcing, involves reading a potentially large event
    stream and involves some SERIAL reads (LightWeight
    transactions) for each processed emails.
---
 pom.xml                                            |  8 +++
 .../apache/james/transport/matchers/dlp/Dlp.java   | 60 ++++++++++++++--
 .../transport/matchers/dlp/DlpRulesLoader.java     | 38 ++++++++++
 .../james/transport/matchers/dlp/DlpTest.java      | 83 +++++++++++++++-------
 4 files changed, 160 insertions(+), 29 deletions(-)

diff --git a/pom.xml b/pom.xml
index e123774..43276ff 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2938,6 +2938,8 @@
                     <artifactId>scala-maven-plugin</artifactId>
                     <version>4.5.6</version>
                     <configuration>
+                        <fork>false</fork>
+                        <recompileMode>incremental</recompileMode>
                         <source>${target.jdk}</source>
                         <target>${target.jdk}</target>
                         <args>
@@ -2948,6 +2950,12 @@
                             <arg>-explaintypes</arg>
                             <arg>-Ywarn-unused</arg>
                         </args>
+                        <jvmArgs>
+                            <jvmArg>-Xss128m</jvmArg>
+                            <jvmArg>-Xms4g</jvmArg>
+                            <jvmArg>-Xmx4g</jvmArg>
+                            <jvmArg>-XX:MaxMetaspaceSize=2g</jvmArg>
+                        </jvmArgs>
                         <compilerPlugins>
                             <compilerPlugin>
                                 <groupId>org.scalameta</groupId>
diff --git 
a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java
 
b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java
index 945b170..a9b3e8f 100644
--- 
a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java
+++ 
b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/Dlp.java
@@ -19,14 +19,18 @@
 
 package org.apache.james.transport.matchers.dlp;
 
+import java.time.temporal.ChronoUnit;
 import java.util.Collection;
 import java.util.Optional;
 
 import javax.inject.Inject;
+import javax.mail.MessagingException;
 
 import org.apache.james.core.MailAddress;
 import org.apache.james.dlp.api.DLPConfigurationItem;
 import org.apache.james.dlp.api.DLPConfigurationStore;
+import org.apache.james.metrics.api.GaugeRegistry;
+import org.apache.james.util.DurationParser;
 import org.apache.mailet.Attribute;
 import org.apache.mailet.AttributeName;
 import org.apache.mailet.AttributeValue;
@@ -36,20 +40,66 @@ import org.apache.mailet.base.GenericMatcher;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 
+/**
+ * Enable evaluation of incoming emails against DLP rules (Data Leak 
Prevention) attached to the sender domains.
+ *
+ * Example:
+ *
+ *  &lt;mailet match="DLP" class="ToRepository"&gt;
+ *     &lt;repositoryPath&gt;/var/mail/quarantine&lt;/repositoryPath&gt;
+ *  &lt;/mailet&gt;
+ *
+ *  Rules can be administered via webAdmin, cf: 
https://james.apache.org/server/manage-webadmin.html#Administrating_DLP_Configuration
+ *
+ *  Only available on top of Memory and Cassandra storages.
+ *
+ *  Additionally a cache can be added to reduce queries done to the underlying 
database.
+ *
+ *  Example:
+ *
+ *  &lt;mailet match="DLP=cache:60s" class="ToRepository"&gt;
+ *     &lt;repositoryPath&gt;/var/mail/quarantine&lt;/repositoryPath&gt;
+ *  &lt;/mailet&gt;
+ *
+ *  Will query the DLP rules for a given domain only every 60 seconds.
+ *
+ *  Please note that querying DLP rules on top of Cassandra relies on Event 
sourcing, involves reading a potentially
+ *  large event stream and involves some SERIAL reads (LightWeight 
transactions) for each processed emails.
+ *
+ *  Efficiency of the cache can be tracked with the following metrics:
+ *
+ *   - dlp.cache.hitRate
+ *   - dlp.cache.missCount
+ *   - dlp.cache.hitCount
+ *   - dlp.cache.size
+ */
 public class Dlp extends GenericMatcher {
 
     private static final AttributeName DLP_MATCHED_RULE = 
AttributeName.of("DlpMatchedRule");
+    public static final String CACHE_PREFIX = "cache:";
 
-    private final DlpRulesLoader rulesLoader;
+    private final DlpRulesLoader backendRulesLoader;
+    private final GaugeRegistry gaugeRegistry;
+    private DlpRulesLoader rulesLoader;
 
     @VisibleForTesting
-    Dlp(DlpRulesLoader rulesLoader) {
-        this.rulesLoader = rulesLoader;
+    Dlp(DlpRulesLoader rulesLoader, GaugeRegistry gaugeRegistry) {
+        this.backendRulesLoader = rulesLoader;
+        this.gaugeRegistry = gaugeRegistry;
+        this.rulesLoader = backendRulesLoader;
     }
 
     @Inject
-    public Dlp(DLPConfigurationStore configurationStore) {
-        this(new DlpRulesLoader.Impl(configurationStore));
+    public Dlp(DLPConfigurationStore configurationStore, GaugeRegistry 
gaugeRegistry) {
+        this(new DlpRulesLoader.Impl(configurationStore), gaugeRegistry);
+    }
+
+    @Override
+    public void init() throws MessagingException {
+        if (getCondition() != null && getCondition().startsWith(CACHE_PREFIX)) 
{
+            rulesLoader = new DlpRulesLoader.Caching(backendRulesLoader, 
gaugeRegistry,
+                
DurationParser.parse(getCondition().substring(CACHE_PREFIX.length()), 
ChronoUnit.SECONDS));
+        }
     }
 
     @Override
diff --git 
a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java
 
b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java
index 3d1f95f..00aa487 100644
--- 
a/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java
+++ 
b/server/mailet/mailets/src/main/java/org/apache/james/transport/matchers/dlp/DlpRulesLoader.java
@@ -19,11 +19,19 @@
 
 package org.apache.james.transport.matchers.dlp;
 
+import java.time.Duration;
+import java.util.concurrent.ExecutionException;
+
 import javax.inject.Inject;
 
 import org.apache.james.core.Domain;
 import org.apache.james.dlp.api.DLPConfigurationStore;
 import org.apache.james.dlp.api.DLPRules;
+import org.apache.james.metrics.api.GaugeRegistry;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
 
 import reactor.core.publisher.Mono;
 
@@ -54,4 +62,34 @@ public interface DlpRulesLoader {
             return builder.build();
         }
     }
+
+    class Caching implements DlpRulesLoader {
+        private final LoadingCache<Domain, DlpDomainRules> cache;
+
+        public Caching(DlpRulesLoader wrapped, GaugeRegistry gaugeRegistry, 
Duration cacheDuration) {
+            cache = CacheBuilder.newBuilder()
+                .expireAfterWrite(cacheDuration)
+                .recordStats()
+                .build(new CacheLoader<>() {
+                    @Override
+                    public DlpDomainRules load(Domain domain) {
+                        return wrapped.load(domain);
+                    }
+                });
+
+            gaugeRegistry.register("dlp.cache.hitRate", () -> 
cache.stats().hitRate());
+            gaugeRegistry.register("dlp.cache.missCount", () -> 
cache.stats().missCount());
+            gaugeRegistry.register("dlp.cache.hitCount", () -> 
cache.stats().hitCount());
+            gaugeRegistry.register("dlp.cache.size", cache::size);
+        }
+
+        @Override
+        public DlpDomainRules load(Domain domain) {
+            try {
+                return cache.get(domain);
+            } catch (ExecutionException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
 }
diff --git 
a/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java
 
b/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java
index 0f3669b..b3dca60 100644
--- 
a/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java
+++ 
b/server/mailet/mailets/src/test/java/org/apache/james/transport/matchers/dlp/DlpTest.java
@@ -37,10 +37,13 @@ import org.apache.james.core.Domain;
 import org.apache.james.core.MailAddress;
 import org.apache.james.core.builder.MimeMessageBuilder;
 import org.apache.james.dlp.api.DLPConfigurationItem.Id;
+import org.apache.james.metrics.api.NoopGaugeRegistry;
 import org.apache.mailet.Attribute;
 import org.apache.mailet.AttributeName;
 import org.apache.mailet.AttributeValue;
 import org.apache.mailet.base.test.FakeMail;
+import org.apache.mailet.base.test.FakeMatcherConfig;
+import org.assertj.core.api.SoftAssertions;
 import org.junit.jupiter.api.Test;
 
 class DlpTest {
@@ -58,7 +61,7 @@ class DlpTest {
 
     @Test
     void matchShouldReturnEmptyWhenNoRecipient() throws Exception {
-        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS);
+        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry());
 
         FakeMail mail = 
FakeMail.builder().name("name").sender(RECIPIENT1).build();
 
@@ -67,7 +70,7 @@ class DlpTest {
 
     @Test
     void matchShouldReturnEmptyWhenNoSender() throws Exception {
-        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS);
+        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry());
 
         FakeMail mail = 
FakeMail.builder().name("name").recipient(RECIPIENT1).build();
 
@@ -76,14 +79,14 @@ class DlpTest {
 
     @Test
     void matchShouldThrowOnNullMail() {
-        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS);
+        Dlp dlp = new Dlp(MATCH_ALL_FOR_ALL_DOMAINS, new NoopGaugeRegistry());
 
         assertThatThrownBy(() -> 
dlp.match(null)).isInstanceOf(NullPointerException.class);
     }
 
     @Test
     void matchShouldReturnEmptyWhenNoRuleMatch() throws Exception {
-        Dlp dlp = new Dlp(MATCH_NOTHING_FOR_ALL_DOMAINS);
+        Dlp dlp = new Dlp(MATCH_NOTHING_FOR_ALL_DOMAINS, new 
NoopGaugeRegistry());
 
         FakeMail mail = FakeMail.builder()
             .name("name")
@@ -100,7 +103,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().senderRule(Id.of("match sender"), 
Pattern.compile(ANY_AT_JAMES.asString())).build()));
+                DlpDomainRules.builder().senderRule(Id.of("match sender"), 
Pattern.compile(ANY_AT_JAMES.asString())).build()), new NoopGaugeRegistry());
 
         FakeMail mail = 
FakeMail.builder().name("name").sender(ANY_AT_JAMES).recipient(RECIPIENT1).build();
 
@@ -112,7 +115,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().recipientRule(Id.of("match all 
recipient"), Pattern.compile(".*")).build()));
+                DlpDomainRules.builder().recipientRule(Id.of("match all 
recipient"), Pattern.compile(".*")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = 
FakeMail.builder().name("name").sender(MailAddress.nullSender()).recipient(RECIPIENT1).build();
 
@@ -124,7 +127,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().senderRule(Id.of("match sender"), 
Pattern.compile(ANY_AT_JAMES.asString())).build()));
+                DlpDomainRules.builder().senderRule(Id.of("match sender"), 
Pattern.compile(ANY_AT_JAMES.asString())).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -144,7 +147,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT1.asString())).build()));
+                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT1.asString())).build()), new 
NoopGaugeRegistry());
 
         FakeMail mail = FakeMail.builder()
             .name("name")
@@ -156,12 +159,44 @@ class DlpTest {
         assertThat(dlp.match(mail)).contains(RECIPIENT1, RECIPIENT2);
     }
 
+
+    @Test
+    void matchShouldReturnRecipientsWhenCached() throws Exception {
+        Dlp dlp = new Dlp(
+            asRulesLoaderFor(
+                JAMES_APACHE_ORG_DOMAIN,
+                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT1.asString())).build()), new 
NoopGaugeRegistry());
+
+        dlp.init(FakeMatcherConfig.builder()
+            .condition("cache:60s")
+            .matcherName("DLP")
+            .build());
+
+        FakeMail mail1 = FakeMail.builder()
+            .name("name")
+            .sender(ANY_AT_JAMES)
+            .recipient(RECIPIENT1)
+            .recipient(RECIPIENT2)
+            .build();
+        FakeMail mail2 = FakeMail.builder()
+            .name("name")
+            .sender(ANY_AT_JAMES)
+            .recipient(RECIPIENT1)
+            .recipient(RECIPIENT2)
+            .build();
+
+        SoftAssertions.assertSoftly(softly -> {
+                softly.assertThat(dlp.match(mail1)).contains(RECIPIENT1, 
RECIPIENT2);
+                softly.assertThat(dlp.match(mail2)).contains(RECIPIENT1, 
RECIPIENT2);
+            });
+    }
+
     @Test
     void matchShouldReturnRecipientsWhenToHeaderMatches() throws Exception {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()));
+                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new 
NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -181,7 +216,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()));
+                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new 
NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -201,7 +236,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()));
+                DlpDomainRules.builder().recipientRule(Id.of("match 
recipient"), Pattern.compile(RECIPIENT2.asString())).build()), new 
NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -221,7 +256,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().contentRule(Id.of("match subject"), 
Pattern.compile("pony")).build()));
+                DlpDomainRules.builder().contentRule(Id.of("match subject"), 
Pattern.compile("pony")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -241,7 +276,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()));
+                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -262,7 +297,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()));
+                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -282,7 +317,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()));
+                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -307,7 +342,7 @@ class DlpTest {
         Dlp dlp = new Dlp(
             asRulesLoaderFor(
                 JAMES_APACHE_ORG_DOMAIN,
-                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()));
+                DlpDomainRules.builder().contentRule(Id.of("match content"), 
Pattern.compile("horse")).build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -339,7 +374,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .senderRule(Id.of("match content"), 
Pattern.compile(RECIPIENT2.asString()))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -372,7 +407,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .recipientRule(Id.of("match content"), 
Pattern.compile(RECIPIENT2.asString()))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -405,7 +440,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .contentRule(Id.of("match content"), 
Pattern.compile("poné"))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -429,7 +464,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .contentRule(Id.of("match content"), 
Pattern.compile("poné"))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail
             .builder()
@@ -453,7 +488,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .recipientRule(Id.of("id1"), 
Pattern.compile(RECIPIENT1.asString()))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         MimeMessageBuilder meaninglessText = MimeMessageBuilder
             .mimeMessageBuilder()
@@ -479,7 +514,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .recipientRule(Id.of("id1"), Pattern.compile("Benoît"))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         MimeMessageBuilder meaninglessText = MimeMessageBuilder
             .mimeMessageBuilder()
@@ -505,7 +540,7 @@ class DlpTest {
                 JAMES_APACHE_ORG_DOMAIN,
                 DlpDomainRules.builder()
                     .senderRule(Id.of("id1"), Pattern.compile("Benoît"))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         MimeMessageBuilder meaninglessText = MimeMessageBuilder
             .mimeMessageBuilder()
@@ -533,7 +568,7 @@ class DlpTest {
                 DlpDomainRules.builder()
                     .recipientRule(Id.of("should not match recipient"), 
Pattern.compile(RECIPIENT3.asString()))
                     .senderRule(Id.of(attributeValue), 
Pattern.compile(JAMES_APACHE_ORG))
-                    .build()));
+                    .build()), new NoopGaugeRegistry());
 
         FakeMail mail = FakeMail.builder()
             .name("name")

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to