This is an automated email from the ASF dual-hosted git repository. matthieu pushed a commit to branch refactorings-2 in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 7cc6df02e8a589d9c64bcdb4e9f6eb38da14be01 Author: Matthieu Baechler <[email protected]> AuthorDate: Fri Feb 3 09:12:49 2023 +0100 Provide factory methods to simplify ParsedContent instantiations --- .../james/mailbox/extractor/ParsedContent.java | 21 ++++++++++++++++----- .../james/mailbox/opensearch/json/MimePart.java | 5 +---- .../opensearch/json/IndexableMessageTest.java | 4 ++-- .../mailbox/store/search/PDFTextExtractor.java | 7 ++----- .../store/extractor/DefaultTextExtractor.java | 10 ++++------ .../mailbox/store/extractor/JsoupTextExtractor.java | 10 ++-------- .../james/mailbox/tika/TikaTextExtractor.java | 2 +- .../mailbox/tika/CachingTextExtractorTest.java | 4 ++-- 8 files changed, 30 insertions(+), 33 deletions(-) diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/ParsedContent.java b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/ParsedContent.java index 4be66463f3..080bc42d0a 100644 --- a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/ParsedContent.java +++ b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/ParsedContent.java @@ -27,11 +27,26 @@ import java.util.Optional; import com.google.common.collect.ImmutableMap; public class ParsedContent { + public static ParsedContent empty() { + return new ParsedContent(Optional.empty(), ImmutableMap.of()); + } + + public static ParsedContent of(String textualContent) { + return new ParsedContent(Optional.of(textualContent), ImmutableMap.of()); + } + + public static ParsedContent of(Optional<String> textualContent) { + return new ParsedContent(textualContent, ImmutableMap.of()); + } + + public static ParsedContent of(Optional<String> textualContent, Map<String, List<String>> metadata) { + return new ParsedContent(textualContent, metadata); + } private final Optional<String> textualContent; private final Map<String, List<String>> metadata; - public ParsedContent(Optional<String> textualContent, Map<String, List<String>> metadata) { + private ParsedContent(Optional<String> textualContent, Map<String, List<String>> metadata) { this.textualContent = textualContent; this.metadata = metadata; } @@ -44,10 +59,6 @@ public class ParsedContent { return metadata; } - public static ParsedContent empty() { - return new ParsedContent(Optional.empty(), ImmutableMap.of()); - } - @Override public final boolean equals(Object o) { if (o instanceof ParsedContent) { diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePart.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePart.java index 8adf2d8d45..bce4c80cbe 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePart.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePart.java @@ -42,7 +42,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.github.fge.lambdas.Throwing; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import reactor.core.publisher.Flux; @@ -205,9 +204,7 @@ public class MimePart { new ByteArrayInputStream(bodyContent.get()), contentType.orElse(null)); } - return Mono.fromCallable(() -> new ParsedContent( - Optional.ofNullable(IOUtils.toString(new ByteArrayInputStream(bodyContent.get()), charset.orElse(StandardCharsets.UTF_8))), - ImmutableMap.of())); + return Mono.fromCallable(() -> ParsedContent.of(IOUtils.toString(new ByteArrayInputStream(bodyContent.get()), charset.orElse(StandardCharsets.UTF_8)))); } private boolean shouldPerformTextExtraction() { diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java index 97e48b19f9..b477c69f49 100644 --- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java @@ -236,9 +236,9 @@ class IndexableMessageTest { TextExtractor textExtractor = mock(TextExtractor.class); when(textExtractor.applicable(any())).thenReturn(true); when(textExtractor.extractContentReactive(any(), any())) - .thenReturn(Mono.just(new ParsedContent(Optional.of("first attachment content"), ImmutableMap.of()))) + .thenReturn(Mono.just(ParsedContent.of("first attachment content"))) .thenReturn(Mono.error(new RuntimeException("second cannot be parsed"))) - .thenReturn(Mono.just(new ParsedContent(Optional.of("third attachment content"), ImmutableMap.of()))); + .thenReturn(Mono.just(ParsedContent.of("third attachment content"))); // When IndexableMessage indexableMessage = IndexableMessage.builder() diff --git a/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java index 8f82685165..0149246809 100644 --- a/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java +++ b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java @@ -46,7 +46,7 @@ public class PDFTextExtractor implements TextExtractor { if (isPDF(contentType)) { return extractTextFromPDF(inputStream); } - return new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, StandardCharsets.UTF_8)), ImmutableMap.of()); + return ParsedContent.of(IOUtils.toString(inputStream, StandardCharsets.UTF_8)); } private boolean isPDF(ContentType contentType) { @@ -54,9 +54,6 @@ public class PDFTextExtractor implements TextExtractor { } private ParsedContent extractTextFromPDF(InputStream inputStream) throws IOException { - return new ParsedContent( - Optional.ofNullable(new PDFTextStripper().getText( - PDDocument.load(inputStream))), - ImmutableMap.of()); + return ParsedContent.of(new PDFTextStripper().getText(PDDocument.load(inputStream))); } } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java index 2bf97902d7..b710879541 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java @@ -22,8 +22,6 @@ package org.apache.james.mailbox.store.extractor; import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.extractor.ParsedContent; @@ -51,9 +49,9 @@ public class DefaultTextExtractor implements TextExtractor { try (var input = inputStream) { if (applicable(contentType)) { Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8); - return new ParsedContent(Optional.ofNullable(IOUtils.toString(input, charset)), new HashMap<>()); + return ParsedContent.of(IOUtils.toString(input, charset)); } else { - return new ParsedContent(Optional.empty(), new HashMap<>()); + return ParsedContent.empty(); } } } @@ -63,11 +61,11 @@ public class DefaultTextExtractor implements TextExtractor { if (applicable(contentType)) { Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8); return Mono.using(() -> inputStream, - stream -> Mono.fromCallable(() -> new ParsedContent(Optional.ofNullable(IOUtils.toString(stream, charset)), new HashMap<>())) + stream -> Mono.fromCallable(() -> ParsedContent.of(IOUtils.toString(stream, charset))) .subscribeOn(Schedulers.boundedElastic()), Throwing.consumer(InputStream::close).orDoNothing()); } else { - return Mono.just(new ParsedContent(Optional.empty(), new HashMap<>())); + return Mono.just(ParsedContent.empty()); } } } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java index 01281e04cb..282d4499c6 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java @@ -23,9 +23,6 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Map; -import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.extractor.ParsedContent; @@ -35,15 +32,12 @@ import org.apache.james.mailbox.model.ContentType.MimeType; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import com.google.common.collect.ImmutableMap; - import reactor.core.publisher.Mono; import reactor.core.scheduler.Schedulers; public class JsoupTextExtractor implements TextExtractor { private static final String TITLE_HTML_TAG = "title"; private static final String NO_BASE_URI = ""; - private static final Map<String, List<String>> EMPTY_METADATA = ImmutableMap.of(); private static final MimeType TEXT_HTML = MimeType.of("text/html"); private static final MimeType TEXT_PLAIN = MimeType.of("text/plain"); @@ -88,12 +82,12 @@ public class JsoupTextExtractor implements TextExtractor { } private ParsedContent parsePlainTextContent(InputStream inputStream, Charset charset) throws IOException { - return new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), EMPTY_METADATA); + return ParsedContent.of(IOUtils.toString(inputStream, charset)); } private ParsedContent parseHtmlContent(InputStream inputStream, Charset charset) throws IOException { Document doc = Jsoup.parse(inputStream, charset.name(), NO_BASE_URI); doc.select(TITLE_HTML_TAG).remove(); - return new ParsedContent(Optional.ofNullable(doc.text()), EMPTY_METADATA); + return ParsedContent.of(doc.text()); } } diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java index 8c639096b7..0c1e833cf9 100644 --- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java +++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java @@ -96,7 +96,7 @@ public class TikaTextExtractor implements TextExtractor { public Mono<ParsedContent> performContentExtraction(InputStream inputStream, ContentType contentType) { Mono<ContentAndMetadata> contentAndMetadata = convert(tikaHttpClient.recursiveMetaDataAsJson(inputStream, contentType)); return contentAndMetadata - .map(result -> new ParsedContent(result.getContent(), result.getMetadata())); + .map(result -> ParsedContent.of(result.getContent(), result.getMetadata())); } private Mono<ContentAndMetadata> convert(Mono<InputStream> maybeInputStream) { diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java index e402edf027..494a80e896 100644 --- a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java +++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/CachingTextExtractorTest.java @@ -56,9 +56,9 @@ import reactor.core.publisher.Mono; class CachingTextExtractorTest { - private static final ParsedContent RESULT = new ParsedContent(Optional.of("content"), ImmutableMap.of()); + private static final ParsedContent RESULT = ParsedContent.of("content"); public static final String BIG_STRING = Strings.repeat("0123456789", 103 * 1024); - private static final ParsedContent _2MiB_RESULT = new ParsedContent(Optional.of(BIG_STRING), ImmutableMap.of()); + private static final ParsedContent _2MiB_RESULT = ParsedContent.of(BIG_STRING); private static final Function<Integer, InputStream> STREAM_GENERATOR = i -> new ByteArrayInputStream(String.format("content%d", i).getBytes(StandardCharsets.UTF_8)); private static final Supplier<InputStream> INPUT_STREAM = () -> STREAM_GENERATOR.apply(1); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
