This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 14fb2f6d7c7df15459a87032df5c3a38adbbdb42 Author: Benoit Tellier <[email protected]> AuthorDate: Wed May 11 15:15:58 2022 +0700 JAMES-3719 Reading inputstream might be blocking Subscribes on an elastic scheduler when blocking reads might be performed. --- .../james/mailbox/extractor/TextExtractor.java | 4 +++- .../store/extractor/DefaultTextExtractor.java | 14 ++++++++++++++ .../mailbox/store/extractor/JsoupTextExtractor.java | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java index 2822ee02e8..7891557039 100644 --- a/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java +++ b/mailbox/api/src/main/java/org/apache/james/mailbox/extractor/TextExtractor.java @@ -24,6 +24,7 @@ import java.io.InputStream; import org.apache.james.mailbox.model.ContentType; import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; public interface TextExtractor { default boolean applicable(ContentType contentType) { @@ -33,7 +34,8 @@ public interface TextExtractor { ParsedContent extractContent(InputStream inputStream, ContentType contentType) throws Exception; default Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) { - return Mono.fromCallable(() -> extractContent(inputStream, contentType)); + return Mono.fromCallable(() -> extractContent(inputStream, contentType)) + .subscribeOn(Schedulers.elastic()); } } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java index 50cc8b68e3..2605f850e6 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java @@ -30,6 +30,9 @@ import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.model.ContentType; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; + /** * A default text extractor that is directly based on the input file provided. * @@ -50,4 +53,15 @@ public class DefaultTextExtractor implements TextExtractor { return new ParsedContent(Optional.empty(), new HashMap<>()); } } + + @Override + public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) { + if (applicable(contentType)) { + Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8); + return Mono.fromCallable(() -> new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), new HashMap<>())) + .subscribeOn(Schedulers.elastic()); + } else { + return Mono.just(new ParsedContent(Optional.empty(), new HashMap<>())); + } + } } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java index b06f55ffc0..48988c1dd0 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/extractor/JsoupTextExtractor.java @@ -37,6 +37,9 @@ import org.jsoup.nodes.Document; import com.google.common.collect.ImmutableMap; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; + public class JsoupTextExtractor implements TextExtractor { private static final String TITLE_HTML_TAG = "title"; private static final String NO_BASE_URI = ""; @@ -67,6 +70,23 @@ public class JsoupTextExtractor implements TextExtractor { return ParsedContent.empty(); } + @Override + public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) { + if (inputStream == null || contentType == null) { + return Mono.just(ParsedContent.empty()); + } + Charset charset = contentType.charset().orElse(StandardCharsets.UTF_8); + if (contentType.mimeType().equals(TEXT_HTML)) { + return Mono.fromCallable(() -> parseHtmlContent(inputStream, charset)) + .subscribeOn(Schedulers.elastic()); + } + if (contentType.mimeType().equals(TEXT_PLAIN)) { + return Mono.fromCallable(() -> parsePlainTextContent(inputStream, charset)) + .subscribeOn(Schedulers.elastic()); + } + return Mono.just(ParsedContent.empty()); + } + private ParsedContent parsePlainTextContent(InputStream inputStream, Charset charset) throws IOException { return new ParsedContent(Optional.ofNullable(IOUtils.toString(inputStream, charset)), EMPTY_METADATA); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
