This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 9b036e11a2febb4e59f684d46011aad57c807f6f Author: Benoit TELLIER <[email protected]> AuthorDate: Mon Aug 26 16:21:02 2024 +0200 [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs --- .../james/jmap/utils/JsoupHtmlTextExtractor.java | 34 +++++++++++++++++++++- .../org/apache/james/jmap/core/Capability.scala | 4 ++- .../jmap/utils/JsoupHtmlTextExtractorTest.java | 10 +++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java index 14f10a0414..f1f60da203 100644 --- a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java +++ b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java @@ -26,7 +26,10 @@ import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; +import jakarta.inject.Inject; + import org.apache.commons.lang3.StringUtils; +import org.apache.james.jmap.core.JmapRfc8621Configuration; import org.apache.james.util.html.HtmlTextExtractor; import org.apache.james.util.streams.Iterators; import org.jsoup.Jsoup; @@ -40,8 +43,24 @@ import org.slf4j.LoggerFactory; import com.google.common.base.Strings; public class JsoupHtmlTextExtractor implements HtmlTextExtractor { - private static final Logger LOGGER = LoggerFactory.getLogger(JsoupHtmlTextExtractor.class); + + private static class Context { + private final long limit; + private long outputSize = 0; + + private Context(JmapRfc8621Configuration configuration) { + this.limit = configuration.maxSizeAttachmentsPerEmail().asLong(); + } + + void add(String s) { + outputSize += s.length(); + if (outputSize > limit) { + throw new IllegalStateException("text/plain redering exceeds message limit"); + } + } + } + public static final String BR_TAG = "br"; public static final String UL_TAG = "ul"; public static final String OL_TAG = "ol"; @@ -51,6 +70,17 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor { public static final String ALT_TAG = "alt"; public static final int INITIAL_LIST_NESTED_LEVEL = 0; + private final JmapRfc8621Configuration configuration; + + @Inject + public JsoupHtmlTextExtractor(JmapRfc8621Configuration configuration) { + this.configuration = configuration; + } + + public JsoupHtmlTextExtractor() { + this.configuration = JmapRfc8621Configuration.LOCALHOST_CONFIGURATION(); + } + @Override public String toPlainText(String html) { try { @@ -58,8 +88,10 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor { Element body = Optional.ofNullable(document.body()).orElse(document); + Context context = new Context(configuration); return flatten(body) .map(this::convertNodeToText) + .peek(context::add) .collect(Collectors.joining()); } catch (Exception e) { LOGGER.warn("Failed extracting text from html", e); diff --git a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala index cf99b0832a..407961b067 100644 --- a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala +++ b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala @@ -239,7 +239,9 @@ object MaxSizeAttachmentsPerEmail { case class MaxMailboxesPerEmail(value: Option[UnsignedInt]) case class MaxMailboxDepth(value: Option[UnsignedInt]) case class MaxSizeMailboxName(value: UnsignedInt) -case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) +case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) { + def asLong()= value.value +} object JmapUploadQuotaLimit { def of(size: Size): Try[JmapUploadQuotaLimit] = refined.refineV[UnsignedIntConstraint](size.asBytes()) match { diff --git a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java index 4b6da3eae0..9f113c303b 100644 --- a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java +++ b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java @@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest { assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText); } + @Test + public void asymmetricOutputShouldNotThrowOOM() { + int count = 20000; + String html = "<ul><li>a</li><li>a</li>".repeat(count) + "</ul>".repeat(count); + + // Computation aborted + assertThat(textExtractor.toPlainText(html)) + .isEqualTo(html); + } + @Test public void deeplyNestedHtmlShouldNotThrowStackOverflow() { final int count = 2048; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
