This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch 3.7.x
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 6ffe98944c66f61bdee0958fdc80acc399ebe79d
Author: Benoit TELLIER <btell...@linagora.com>
AuthorDate: Thu Jan 23 12:34:59 2025 +0700

    [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs
---
 .../james/jmap/draft/utils/JsoupHtmlTextExtractor.java  | 17 +++++++++++++++++
 .../jmap/draft/utils/JsoupHtmlTextExtractorTest.java    | 10 ++++++++++
 2 files changed, 27 insertions(+)

diff --git 
a/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java
 
b/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java
index 41fbe88b65..cc8fb90685 100644
--- 
a/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java
+++ 
b/server/protocols/jmap-draft/src/main/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractor.java
@@ -40,6 +40,21 @@ import org.slf4j.LoggerFactory;
 import com.google.common.base.Strings;
 
 public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
+    private static class Context {
+        private final long limit;
+        private long outputSize = 0;
+
+        private Context() {
+            this.limit = 20_000_000L;
+        }
+
+        void add(String s) {
+            outputSize += s.length();
+            if (outputSize > limit) {
+                throw new IllegalStateException("text/plain rendering exceeds 
message limit");
+            }
+        }
+    }
 
     private static final Logger LOGGER = 
LoggerFactory.getLogger(JsoupHtmlTextExtractor.class);
     public static final String BR_TAG = "br";
@@ -58,8 +73,10 @@ public class JsoupHtmlTextExtractor implements 
HtmlTextExtractor {
 
             Element body = 
Optional.ofNullable(document.body()).orElse(document);
 
+            Context context = new Context();
             return flatten(body)
                 .map(this::convertNodeToText)
+                .peek(context::add)
                 .collect(Collectors.joining());
         } catch (Exception e) {
             LOGGER.warn("Failed extracting text from html", e);
diff --git 
a/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java
 
b/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java
index 4829c00c43..d6c68e39ae 100644
--- 
a/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java
+++ 
b/server/protocols/jmap-draft/src/test/java/org/apache/james/jmap/draft/utils/JsoupHtmlTextExtractorTest.java
@@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest {
         
assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText);
     }
 
+    @Test
+    public void asymmetricOutputShouldNotThrowOOM() {
+        int count = 20000;
+        String html = "<ul><li>a</li><li>a</li>".repeat(count) + 
"</ul>".repeat(count);
+
+        // Computation aborted
+        assertThat(textExtractor.toPlainText(html))
+            .isEqualTo(html);
+    }
+
     @Test
     public void deeplyNestedHtmlShouldNotThrowStackOverflow() {
         final int count = 2048;


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org

Reply via email to