JAMES-2581 Add contentType blacklist for Tika Configuration
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/51f1d7ba Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/51f1d7ba Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/51f1d7ba Branch: refs/heads/master Commit: 51f1d7ba254c4a8d1aef4056a5299e4d54df6171 Parents: d91fe69 Author: datph <dphamho...@linagora.com> Authored: Tue Nov 6 18:24:52 2018 +0700 Committer: datph <dphamho...@linagora.com> Committed: Fri Nov 9 16:28:02 2018 +0700 ---------------------------------------------------------------------- .../james/mailbox/tika/TikaConfiguration.java | 27 +++++-- .../mailbox/tika/TikaConfigurationTest.java | 33 +++++++++ .../mailbox/TikaConfigurationReader.java | 10 +++ .../mailbox/TikaConfigurationReaderTest.java | 74 ++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/51f1d7ba/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java ---------------------------------------------------------------------- diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java index 113c1e9..ce978ab 100644 --- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java +++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaConfiguration.java @@ -20,6 +20,7 @@ package org.apache.james.mailbox.tika; import java.time.Duration; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.concurrent.TimeUnit; @@ -27,6 +28,7 @@ import java.util.concurrent.TimeUnit; import org.apache.james.util.Port; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.primitives.Ints; public class TikaConfiguration { @@ -39,6 +41,7 @@ public class TikaConfiguration { private Optional<Integer> timeoutInMillis; private Optional<Duration> cacheEvictionPeriod; private Optional<Long> cacheWeightInBytes; + private ImmutableList.Builder<String> contentTypeBlacklist; private Builder() { isEnabled = Optional.empty(); @@ -48,6 +51,7 @@ public class TikaConfiguration { timeoutInMillis = Optional.empty(); cacheEvictionPeriod = Optional.empty(); cacheWeightInBytes = Optional.empty(); + contentTypeBlacklist = ImmutableList.builder(); } public Builder enable(Optional<Boolean> isEnabled) { @@ -136,6 +140,12 @@ public class TikaConfiguration { return this; } + public Builder contentTypeBlacklist(List<String> contentTypeBlacklist) { + Preconditions.checkNotNull(contentTypeBlacklist); + this.contentTypeBlacklist.addAll(contentTypeBlacklist); + return this; + } + public TikaConfiguration build() { port.ifPresent(Port::assertValid); @@ -146,7 +156,8 @@ public class TikaConfiguration { port.orElse(DEFAULT_PORT), timeoutInMillis.orElse(DEFAULT_TIMEOUT_IN_MS), cacheEvictionPeriod.orElse(DEFAULT_CACHE_EVICTION_PERIOD), - cacheWeightInBytes.orElse(DEFAULT_CACHE_LIMIT_100_MB)); + cacheWeightInBytes.orElse(DEFAULT_CACHE_LIMIT_100_MB), + contentTypeBlacklist.build()); } } @@ -168,8 +179,9 @@ public class TikaConfiguration { private final int timeoutInMillis; private final Duration cacheEvictionPeriod; private final long cacheWeightInBytes; + private final ImmutableList<String> contentTypeBlacklist; - private TikaConfiguration(boolean enabled, boolean cacheEnabled, String host, int port, int timeoutInMillis, Duration cacheEvictionPeriod, long cacheWeightInBytes) { + private TikaConfiguration(boolean enabled, boolean cacheEnabled, String host, int port, int timeoutInMillis, Duration cacheEvictionPeriod, long cacheWeightInBytes, ImmutableList<String> contentTypeBlacklist) { this.enabled = enabled; this.cacheEnabled = cacheEnabled; this.host = host; @@ -177,6 +189,7 @@ public class TikaConfiguration { this.timeoutInMillis = timeoutInMillis; this.cacheEvictionPeriod = cacheEvictionPeriod; this.cacheWeightInBytes = cacheWeightInBytes; + this.contentTypeBlacklist = contentTypeBlacklist; } public boolean isEnabled() { @@ -207,24 +220,30 @@ public class TikaConfiguration { return cacheWeightInBytes; } + public List<String> getContentTypeBlacklist() { + return contentTypeBlacklist; + } + @Override public final boolean equals(Object o) { if (o instanceof TikaConfiguration) { TikaConfiguration that = (TikaConfiguration) o; return Objects.equals(this.enabled, that.enabled) + && Objects.equals(this.cacheEnabled, that.cacheEnabled) && Objects.equals(this.port, that.port) && Objects.equals(this.timeoutInMillis, that.timeoutInMillis) && Objects.equals(this.cacheWeightInBytes, that.cacheWeightInBytes) && Objects.equals(this.host, that.host) - && Objects.equals(this.cacheEvictionPeriod, that.cacheEvictionPeriod); + && Objects.equals(this.cacheEvictionPeriod, that.cacheEvictionPeriod) + && Objects.equals(this.contentTypeBlacklist, that.contentTypeBlacklist); } return false; } @Override public final int hashCode() { - return Objects.hash(enabled, host, port, timeoutInMillis, cacheEvictionPeriod, cacheWeightInBytes); + return Objects.hash(enabled, cacheEnabled, host, port, timeoutInMillis, cacheEvictionPeriod, cacheWeightInBytes, contentTypeBlacklist); } } http://git-wip-us.apache.org/repos/asf/james-project/blob/51f1d7ba/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaConfigurationTest.java ---------------------------------------------------------------------- diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaConfigurationTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaConfigurationTest.java new file mode 100644 index 0000000..b83ed85 --- /dev/null +++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaConfigurationTest.java @@ -0,0 +1,33 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.tika; + +import org.junit.Test; + +import nl.jqno.equalsverifier.EqualsVerifier; + +public class TikaConfigurationTest { + + @Test + public void shouldMatchBeanContract() { + EqualsVerifier.forClass(TikaConfiguration.class) + .verify(); + } +} http://git-wip-us.apache.org/repos/asf/james-project/blob/51f1d7ba/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java ---------------------------------------------------------------------- diff --git a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java index d39edea..2ab5240 100644 --- a/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java +++ b/server/container/guice/cassandra-guice/src/main/java/org/apache/james/modules/mailbox/TikaConfigurationReader.java @@ -20,14 +20,17 @@ package org.apache.james.modules.mailbox; import java.time.Duration; +import java.util.List; import java.util.Optional; import org.apache.commons.configuration.Configuration; import org.apache.james.mailbox.tika.TikaConfiguration; import org.apache.james.util.Size; +import org.apache.james.util.StreamUtils; import org.apache.james.util.TimeConverter; import com.github.fge.lambdas.Throwing; +import com.google.common.collect.ImmutableList; public class TikaConfigurationReader { public static final String TIKA_ENABLED = "tika.enabled"; @@ -37,6 +40,7 @@ public class TikaConfigurationReader { public static final String TIKA_TIMEOUT_IN_MS = "tika.timeoutInMillis"; public static final String TIKA_CACHE_EVICTION_PERIOD = "tika.cache.eviction.period"; public static final String TIKA_CACHE_WEIGHT_MAX = "tika.cache.weight.max"; + public static final String TIKA_CONTENT_TYPE_BLACKLIST = "tika.contentType.blacklist"; public static TikaConfiguration readTikaConfiguration(Configuration configuration) { Optional<Boolean> enabled = Optional.ofNullable( @@ -65,6 +69,11 @@ public class TikaConfigurationReader { .map(Throwing.function(Size::parse)) .map(Size::asBytes); + List<String> contentTypeBlacklist = StreamUtils + .ofNullable(configuration.getStringArray(TIKA_CONTENT_TYPE_BLACKLIST)) + .map(String::trim) + .collect(ImmutableList.toImmutableList()); + return TikaConfiguration.builder() .enable(enabled) .host(host) @@ -73,6 +82,7 @@ public class TikaConfigurationReader { .cacheEnable(cacheEnabled) .cacheEvictionPeriod(cacheEvictionPeriod) .cacheWeightInBytes(cacheWeight) + .contentTypeBlacklist(contentTypeBlacklist) .build(); } } http://git-wip-us.apache.org/repos/asf/james-project/blob/51f1d7ba/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java ---------------------------------------------------------------------- diff --git a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java index 14ae536..ab37be3 100644 --- a/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java +++ b/server/container/guice/cassandra-guice/src/test/java/org/apache/james/modules/mailbox/TikaConfigurationReaderTest.java @@ -24,10 +24,13 @@ import static org.assertj.core.api.Assertions.assertThat; import java.io.StringReader; import java.time.Duration; +import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.james.mailbox.tika.TikaConfiguration; import org.junit.Test; +import com.google.common.collect.ImmutableList; + public class TikaConfigurationReaderTest { @Test @@ -225,5 +228,76 @@ public class TikaConfigurationReaderTest { .build()); } + @Test + public void readTikaConfigurationShouldNotHaveContentTypeBlacklist() throws ConfigurationException { + PropertiesConfiguration configuration = new PropertiesConfiguration(); + configuration.load(new StringReader( + "tika.enabled=true\n" + + "tika.cache.enabled=true\n" + + "tika.host=172.0.0.5\n" + + "tika.port=889\n" + + "tika.timeoutInMillis=500\n" + + "tika.cache.weight.max=1520000")); + assertThat(TikaConfigurationReader.readTikaConfiguration(configuration)) + .isEqualTo( + TikaConfiguration.builder() + .enabled() + .cacheEnabled() + .host("172.0.0.5") + .port(889) + .timeoutInMillis(500) + .cacheWeightInBytes(1520000) + .contentTypeBlacklist(ImmutableList.of()) + .build()); + } + + @Test + public void readTikaConfigurationShouldHaveContentTypeBlacklist() throws ConfigurationException { + PropertiesConfiguration configuration = new PropertiesConfiguration(); + configuration.load(new StringReader( + "tika.enabled=true\n" + + "tika.cache.enabled=true\n" + + "tika.host=172.0.0.5\n" + + "tika.port=889\n" + + "tika.timeoutInMillis=500\n" + + "tika.cache.weight.max=1520000\n" + + "tika.contentType.blacklist=application/ics,application/zip")); + assertThat(TikaConfigurationReader.readTikaConfiguration(configuration)) + .isEqualTo( + TikaConfiguration.builder() + .enabled() + .cacheEnabled() + .host("172.0.0.5") + .port(889) + .timeoutInMillis(500) + .cacheWeightInBytes(1520000) + .contentTypeBlacklist(ImmutableList.of("application/ics", "application/zip")) + .build()); + } + + @Test + public void readTikaConfigurationShouldHaveContentTypeBlacklistWithWhiteSpace() throws ConfigurationException { + PropertiesConfiguration configuration = new PropertiesConfiguration(); + configuration.load(new StringReader( + "tika.enabled=true\n" + + "tika.cache.enabled=true\n" + + "tika.host=172.0.0.5\n" + + "tika.port=889\n" + + "tika.timeoutInMillis=500\n" + + "tika.cache.weight.max=1520000\n" + + "tika.contentType.blacklist=application/ics, application/zip")); + + assertThat(TikaConfigurationReader.readTikaConfiguration(configuration)) + .isEqualTo( + TikaConfiguration.builder() + .enabled() + .cacheEnabled() + .host("172.0.0.5") + .port(889) + .timeoutInMillis(500) + .cacheWeightInBytes(1520000) + .contentTypeBlacklist(ImmutableList.of("application/ics", "application/zip")) + .build()); + } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org