This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch 3.8.x
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 2fadf8d7aa32b65d35866d4b3ed2c5ef139b4a3f
Author: Benoit TELLIER <btell...@linagora.com>
AuthorDate: Thu Jan 23 13:34:30 2025 +0700

    [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs
---
 .../james/jmap/utils/JsoupHtmlTextExtractor.java   | 34 +++++++++++++++++++++-
 .../org/apache/james/jmap/core/Capability.scala    |  4 ++-
 .../jmap/utils/JsoupHtmlTextExtractorTest.java     | 10 +++++++
 .../jmap-rfc-8621/src/test/resources/example.html  | 19 ++++++++++++
 4 files changed, 65 insertions(+), 2 deletions(-)

diff --git 
a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
 
b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
index f84c5e0c88..4409d8bb74 100644
--- 
a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
+++ 
b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
@@ -26,7 +26,10 @@ import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import javax.inject.Inject;
+
 import org.apache.commons.lang3.StringUtils;
+import org.apache.james.jmap.core.JmapRfc8621Configuration;
 import org.apache.james.util.html.HtmlTextExtractor;
 import org.apache.james.util.streams.Iterators;
 import org.jsoup.Jsoup;
@@ -40,8 +43,24 @@ import org.slf4j.LoggerFactory;
 import com.google.common.base.Strings;
 
 public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
-
     private static final Logger LOGGER = 
LoggerFactory.getLogger(JsoupHtmlTextExtractor.class);
+
+    private static class Context {
+        private final long limit;
+        private long outputSize = 0;
+
+        private Context(JmapRfc8621Configuration configuration) {
+            this.limit = configuration.maxSizeAttachmentsPerEmail().asLong();
+        }
+
+        void add(String s) {
+            outputSize += s.length();
+            if (outputSize > limit) {
+                throw new IllegalStateException("text/plain redering exceeds 
message limit");
+            }
+        }
+    }
+
     public static final String BR_TAG = "br";
     public static final String UL_TAG = "ul";
     public static final String OL_TAG = "ol";
@@ -51,6 +70,17 @@ public class JsoupHtmlTextExtractor implements 
HtmlTextExtractor {
     public static final String ALT_TAG = "alt";
     public static final int INITIAL_LIST_NESTED_LEVEL = 0;
 
+    private final JmapRfc8621Configuration configuration;
+
+    @Inject
+    public JsoupHtmlTextExtractor(JmapRfc8621Configuration configuration) {
+        this.configuration = configuration;
+    }
+
+    public JsoupHtmlTextExtractor() {
+        this.configuration = 
JmapRfc8621Configuration.LOCALHOST_CONFIGURATION();
+    }
+
     @Override
     public String toPlainText(String html) {
         try {
@@ -58,8 +88,10 @@ public class JsoupHtmlTextExtractor implements 
HtmlTextExtractor {
 
             Element body = 
Optional.ofNullable(document.body()).orElse(document);
 
+            Context context = new Context(configuration);
             return flatten(body)
                 .map(this::convertNodeToText)
+                .peek(context::add)
                 .collect(Collectors.joining());
         } catch (Exception e) {
             LOGGER.warn("Failed extracting text from html", e);
diff --git 
a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
 
b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
index b198b92f1c..94651b6fb3 100644
--- 
a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
+++ 
b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
@@ -204,7 +204,9 @@ object MaxSizeAttachmentsPerEmail {
 case class MaxMailboxesPerEmail(value: Option[UnsignedInt])
 case class MaxMailboxDepth(value: Option[UnsignedInt])
 case class MaxSizeMailboxName(value: UnsignedInt)
-case class MaxSizeAttachmentsPerEmail(value: UnsignedInt)
+case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) {
+  def asLong()= value.value
+}
 case class MayCreateTopLevelMailbox(value: Boolean) extends AnyVal
 
 final case class MailCapabilityProperties(maxMailboxesPerEmail: 
MaxMailboxesPerEmail,
diff --git 
a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
 
b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
index 685e0f1ee8..c17a21e5ec 100644
--- 
a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
+++ 
b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
@@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest {
         
assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText);
     }
 
+    @Test
+    public void asymmetricOutputShouldNotThrowOOM() {
+        int count = 20000;
+        String html = "<ul><li>a</li><li>a</li>".repeat(count) + 
"</ul>".repeat(count);
+
+        // Computation aborted
+        assertThat(textExtractor.toPlainText(html))
+            .isEqualTo(html);
+    }
+
     @Test
     public void deeplyNestedHtmlShouldNotThrowStackOverflow() {
         final int count = 2048;
diff --git a/server/protocols/jmap-rfc-8621/src/test/resources/example.html 
b/server/protocols/jmap-rfc-8621/src/test/resources/example.html
index 59d339593b..aa88bdbe81 100644
--- a/server/protocols/jmap-rfc-8621/src/test/resources/example.html
+++ b/server/protocols/jmap-rfc-8621/src/test/resources/example.html
@@ -1,3 +1,22 @@
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  * Licensed to the Apache Software Foundation (ASF) under one   *
+  * or more contributor license agreements.  See the NOTICE file *
+  * distributed with this work for additional information        *
+  * regarding copyright ownership.  The ASF licenses this file   *
+  * to you under the Apache License, Version 2.0 (the            *
+  * "License"); you may not use this file except in compliance   *
+  * with the License.  You may obtain a copy of the License at   *
+  *                                                              *
+  *   http://www.apache.org/licenses/LICENSE-2.0                 *
+  *                                                              *
+  * Unless required by applicable law or agreed to in writing,   *
+  * software distributed under the License is distributed on an  *
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+  * KIND, either express or implied.  See the License for the    *
+  * specific language governing permissions and limitations      *
+  * under the License.                                           *
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+
 <div class="section">
     <h3>Why a new Logo?<a name="Why_a_new_Logo"></a></h3>
 


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org

Reply via email to