This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch 3.8.x in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 2fadf8d7aa32b65d35866d4b3ed2c5ef139b4a3f Author: Benoit TELLIER <btell...@linagora.com> AuthorDate: Thu Jan 23 13:34:30 2025 +0700 [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs --- .../james/jmap/utils/JsoupHtmlTextExtractor.java | 34 +++++++++++++++++++++- .../org/apache/james/jmap/core/Capability.scala | 4 ++- .../jmap/utils/JsoupHtmlTextExtractorTest.java | 10 +++++++ .../jmap-rfc-8621/src/test/resources/example.html | 19 ++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java index f84c5e0c88..4409d8bb74 100644 --- a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java +++ b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java @@ -26,7 +26,10 @@ import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.inject.Inject; + import org.apache.commons.lang3.StringUtils; +import org.apache.james.jmap.core.JmapRfc8621Configuration; import org.apache.james.util.html.HtmlTextExtractor; import org.apache.james.util.streams.Iterators; import org.jsoup.Jsoup; @@ -40,8 +43,24 @@ import org.slf4j.LoggerFactory; import com.google.common.base.Strings; public class JsoupHtmlTextExtractor implements HtmlTextExtractor { - private static final Logger LOGGER = LoggerFactory.getLogger(JsoupHtmlTextExtractor.class); + + private static class Context { + private final long limit; + private long outputSize = 0; + + private Context(JmapRfc8621Configuration configuration) { + this.limit = configuration.maxSizeAttachmentsPerEmail().asLong(); + } + + void add(String s) { + outputSize += s.length(); + if (outputSize > limit) { + throw new IllegalStateException("text/plain redering exceeds message limit"); + } + } + } + public static final String BR_TAG = "br"; public static final String UL_TAG = "ul"; public static final String OL_TAG = "ol"; @@ -51,6 +70,17 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor { public static final String ALT_TAG = "alt"; public static final int INITIAL_LIST_NESTED_LEVEL = 0; + private final JmapRfc8621Configuration configuration; + + @Inject + public JsoupHtmlTextExtractor(JmapRfc8621Configuration configuration) { + this.configuration = configuration; + } + + public JsoupHtmlTextExtractor() { + this.configuration = JmapRfc8621Configuration.LOCALHOST_CONFIGURATION(); + } + @Override public String toPlainText(String html) { try { @@ -58,8 +88,10 @@ public class JsoupHtmlTextExtractor implements HtmlTextExtractor { Element body = Optional.ofNullable(document.body()).orElse(document); + Context context = new Context(configuration); return flatten(body) .map(this::convertNodeToText) + .peek(context::add) .collect(Collectors.joining()); } catch (Exception e) { LOGGER.warn("Failed extracting text from html", e); diff --git a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala index b198b92f1c..94651b6fb3 100644 --- a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala +++ b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala @@ -204,7 +204,9 @@ object MaxSizeAttachmentsPerEmail { case class MaxMailboxesPerEmail(value: Option[UnsignedInt]) case class MaxMailboxDepth(value: Option[UnsignedInt]) case class MaxSizeMailboxName(value: UnsignedInt) -case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) +case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) { + def asLong()= value.value +} case class MayCreateTopLevelMailbox(value: Boolean) extends AnyVal final case class MailCapabilityProperties(maxMailboxesPerEmail: MaxMailboxesPerEmail, diff --git a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java index 685e0f1ee8..c17a21e5ec 100644 --- a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java +++ b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java @@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest { assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText); } + @Test + public void asymmetricOutputShouldNotThrowOOM() { + int count = 20000; + String html = "<ul><li>a</li><li>a</li>".repeat(count) + "</ul>".repeat(count); + + // Computation aborted + assertThat(textExtractor.toPlainText(html)) + .isEqualTo(html); + } + @Test public void deeplyNestedHtmlShouldNotThrowStackOverflow() { final int count = 2048; diff --git a/server/protocols/jmap-rfc-8621/src/test/resources/example.html b/server/protocols/jmap-rfc-8621/src/test/resources/example.html index 59d339593b..aa88bdbe81 100644 --- a/server/protocols/jmap-rfc-8621/src/test/resources/example.html +++ b/server/protocols/jmap-rfc-8621/src/test/resources/example.html @@ -1,3 +1,22 @@ +<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--> + <div class="section"> <h3>Why a new Logo?<a name="Why_a_new_Logo"></a></h3> --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org