This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch 3.7.x in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 6ffe98944c66f61bdee0958fdc80acc399ebe79d Author: Benoit TELLIER <btell...@linagora.com> AuthorDate: Thu Jan 23 12:34:59 2025 +0700 [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs --- .../james/jmap/draft/utils/JsoupHtmlTextExtractor.java | 17 +++++++++++++++++ .../jmap/draft/utils/JsoupHtmlTextExtractorTest.java | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java b/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java index 41fbe88b65..cc8fb90685 100644 --- a/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java +++ b/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java @@ -40,6 +40,21 @@ import org.slf4j.LoggerFactory; import com.google.common.base.Strings; public class JsoupHtmlTextExtractor implements HtmlTextExtractor { + private static class Context { + private final long limit; + private long outputSize = 0; + + private Context() { + this.limit = 20_000_000L; + } + + void add(String s) { + outputSize += s.length(); + if (outputSize > limit) { + throw new IllegalStateException("text/plain rendering exceeds message limit"); + } + } + } private static final Logger LOGGER = LoggerFactory.getLogger(JsoupHtmlTextExtractor.class); public static final String BR_TAG = "br"; @@ -58,8 +73,10 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor { Element body = Optional.ofNullable(document.body()).orElse(document); + Context context = new Context(); return flatten(body) .map(this::convertNodeToText) + .peek(context::add) .collect(Collectors.joining()); } catch (Exception e) { LOGGER.warn("Failed extracting text from html", e); diff --git a/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java b/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java index 4829c00c43..d6c68e39ae 100644 --- a/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java +++ b/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java @@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest { assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText); } + @Test + public void asymmetricOutputShouldNotThrowOOM() { + int count = 20000; + String html = "<ul><li>a</li><li>a</li>".repeat(count) + "</ul>".repeat(count); + + // Computation aborted + assertThat(textExtractor.toPlainText(html)) + .isEqualTo(html); + } + @Test public void deeplyNestedHtmlShouldNotThrowStackOverflow() { final int count = 2048; --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org