This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new a054420813 JAMES-3840 Sanitize UTF-8 string after splitting (#1266)
a054420813 is described below

commit a054420813616e478e30cc1005ccbad6896e3c9d
Author: Benoit TELLIER <[email protected]>
AuthorDate: Wed Oct 26 09:16:49 2022 +0700

    JAMES-3840 Sanitize UTF-8 string after splitting (#1266)
---
 .../org/apache/james/jmap/api/model/Preview.java   | 15 +++-
 .../apache/james/jmap/api/model/PreviewTest.java   | 84 ++++++++++++++++++++++
 2 files changed, 96 insertions(+), 3 deletions(-)

diff --git 
a/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
 
b/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
index c0cc914a80..12e7ef34c9 100644
--- 
a/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
+++ 
b/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
@@ -22,6 +22,7 @@ package org.apache.james.jmap.api.model;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Objects;
 
@@ -90,9 +91,17 @@ public class Preview {
     public static Preview compute(String textBody) {
         int previewOffsetEstimate = estimatePreviewOffset(textBody, 
MAX_LENGTH);
         String previewPart = textBody.substring(0, previewOffsetEstimate);
-        return Preview.from(
-            truncateToMaxLength(
-                StringUtils.normalizeSpace(previewPart)));
+        String normalizeSpace = StringUtils.normalizeSpace(previewPart);
+        String truncateToMaxLength = truncateToMaxLength(normalizeSpace);
+        return Preview.from(sanitizeUTF8String(truncateToMaxLength));
+    }
+
+    private static String sanitizeUTF8String(String truncateToMaxLength) {
+        CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
+        while (!encoder.canEncode(truncateToMaxLength)) {
+            truncateToMaxLength = truncateToMaxLength.substring(0, 
truncateToMaxLength.length() - 1);
+        }
+        return truncateToMaxLength;
     }
 
     private static String truncateToMaxLength(String body) {
diff --git 
a/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
 
b/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
index 5198ac2165..eaae1ed8c8 100644
--- 
a/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
+++ 
b/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
@@ -23,7 +23,12 @@ import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+
 import org.apache.commons.lang3.StringUtils;
+import org.assertj.core.api.SoftAssertions;
 import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;
 
@@ -136,4 +141,83 @@ class PreviewTest {
                 .isEqualTo(Preview.from("this is the preview content"));
         }
     }
+
+    @Test
+    void computeShouldSanitizeBadUtf8Splits() throws Exception {
+        // This value would lead to a split in the middle of an emoji thus 
leading to an invalid UTF-8 string
+
+        String b64 = 
"DQoNCiAgICANCiAgDQogICAgDQogICAgICANCiAgICAgICAgDQogICAgICAgICAg" +
+            
"DQogICAgICAgICAgICANCiAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICANCiAgICAgICAgICA"
 +
+            
"gICAgICAgICAgDQogICAgICAgICAgICAgICAgICAgICAgDQogIA0KICBQcm9kdWN0IEh1bnQNCg0KDQogICAgICAgICAgICAgICAgICAgIA"
 +
+            
"0KICAgICAgICAgICAgICAgICAgICANCiAgICAgICAgICAgICAgICAgICAgICBGcmlkYXksIE9jdG9iZXIgMjFzdA0KICAgICAgICAgICAgI"
 +
+            
"CAgICAgICANCiAgICAgICAgICAgICAgICAgIA0KICAgICAgICAgICAgICAgIA0KICAgICAgICAgICAgICANCiAgICAgICAgICAgIA0KICAg"
 +
+            
"ICAgICAgICAgDQogICAgICAgICAgICAgIA0KICAgICAgICAgICAgDQogICAgICAgICAgICANCiAgICAgICAgICAgICAgDQogICAgICAgICA"
 +
+            
"gICANCiAgICAgICAgICAgIA0KICAgICAgICAgICAgICANCiAgICAgICAgICAgICAgICANCiAgDQogIA0KICAgIA0KICAgICAgDQogICAgIC"
 +
+            
"AgIA0KICAgIEhleSB0aGVyZSwgQWxleGFuZHJlIQoKDQoNCiAgICBEaGVlcmFqIFBhbmRleSBqdXN0IHBvc3RlZCBEZXZSZXYgU3VwcG9yd"
 +
+            
"CAtIEluLWFwcCBzdXBwb3J0IGZvciBQTEcgY29tcGFuaWVzDQoNCiAgICAKCg0KDQogICAgICANCiAgICAgICAgDQogIA0KICAgIA0KICAg"
 +
+            
"ICAgDQogICAgICAgIA0KICAgICAgICAgIA0KICAgICAgICANCiAgICAgICAgDQogICAgICAgICAgRGV2UmV2IFN1cHBvcnQNCiAgICAgICA"
 +
+            
"gDQogICAgICANCiAgICAgIA0KICAgICAgICANCiAgICAgICAgICBJbi1hcHAgc3VwcG9ydCBmb3IgUExHIGNvbXBhbmllcw0KICAgICAgIC"
 +
+            
"ANCiAgICAgIA0KICAgIA0KICANCg0KDQoNCiAgICAgICAgICANCiAgICAgICAgICAgIA0KICAgICAgICAgICAgICAKDQogICAgICAgICAgI"
 +
+            
"CAgIEhlcmUgaXMgd2hhdCB0aGV5IHNhaWQgYWJvdXQgaXQ6DQogICAgICAgICAgICANCiAgICAgICAgICANCiAgICAgICAgICANCiAgDQogI"
 +
+            
"CAgDQogICAgICANCiAgICAgICAgDQogICAgICAgICAgW0RoZWVyYWogUGFuZGV5XQ0KICAgICAgICANCiAgICAgICAgDQogICAgICAgICAg"
 +
+            
"DQogICAgICAgICAgICAiSGV5IE1ha2VycyBhbmQgSHVudGVycyDwn5GLLAoKDQoNCkknbSBEaGVlcmFqIFBhbmRleSwgQ28tZm91bmRlciw"
 +
+            
"gYW5kIENFTyBhdCBEZXZSZXYuIFRvZGF5IGlzIGEgYmlnIGRheSBmb3IgdXMsIHdlJ3JlIHN1cGVyIGV4Y2l0ZWQgdG8gcHJlc2VudCB5b3"
 +
+            
"UgdGhlIERldlJldiBQcm9kdWN0LUxlZCBTdXBwb3J0IGFwcCBvbiBvdXIgRGV2Q1JNIHBsYXRmb3JtLCB3aXRoIHRoZSBnb2FsIG9mIHVuaW"
 +
+            
"Z5aW5nIHlvdXIgY29tcGFueSdzIGZyb250IGFuZCBiYWNrIG9mZmljZSBieSBtYWtpbmcgZXZlcnkgZW1wbG95ZWUgdGhpbmsgYWJvdXQgdGh"
 +
+            
"lIHByb2R1Y3QgYW5kIHRoZSBlbmQgdXNlcnMuCgoNCg0KSHVnZSB0aGFua3MgdG8gS2V2aW4gZm9yIGh1bnRpbmcgdXMg8J+ZjwoKDQoNCld"
 +
+            
"lJ3ZlIGNvbWUgYSBsb25nIHdheSBzaW5jZSBvdXIgbGFzdCBsYXVuY2gsIGFuZCB3ZSdyZSBoYXBweSB0byBhbm5vdW5jZSB0aGF0IHdlIG"
 +
+            
"FyZSBub3cgb2ZmaWNpYWxseSBvdXQgb2YgYmV0YSEgSXQncyBiZWVuIGEgbG9uZyBqb3VybmV5LCBidXQgd2UgY291bGRuJ3QgaGF2ZSBkb"
 +
+            
"25lIGl0IHdpdGhvdXQgdGhlIGJldGEgY3VzdG9tZXJzIHdobyB1c2VkIERldlJldiBhbmQgZ2F2ZSB1cyB0aGVpciB2YWx1YWJsZSBmZWVk"
 +
+            
"YmFjayBhbG9uZyB0aGUgd2F5LiBUaGFuayB5b3UgdG8gZXZlcnlvbmUgd2hvIGhlbHBlZCB1cyBpbXByb3ZlIG91ciBwcm9kdWN0IC0gd2U"
 +
+            
"gdHJ1bHkgY291bGRuJ3QgaGF2ZSBkb25lIGl0IHdpdGhvdXQgeW91LgoKDQoNClNvLCB3aGF0IGlzIERldlJldj8KCg0KDQpEZXZSZXYgaXM"
 +
+            
"gdGhlIHdvcmxkJ3MgZmlyc3QgRGV2Q1JNIGZvciBuZXctYWdlIHByb2R1Y3QtbGVkIGNvbXBhbmllcyBhbmQgc3RhcnR1cHMgd2l0aCBhIH"
 +
+            
"Zpc2lvbiB0byBicmluZyB0b2dldGhlciB0aGUgRGV2KERldmVsb3BlcnMpIGFuZCBSZXYgKEN1c3RvbWVycykgYW5kIGhlbHAgbWFrZSB5"
 +
+            
"b3VyIGNvbXBhbnkgbW9yZSBjdXN0b21lciBhbmQgcHJvZHVjdC1jZW50cmljLgoKDQoNCkRldlJldiBTdXBwb3J0IGFwcCBpcyB0aGUgZml"
 +
+            
"yc3QgYXBwIG9uIHRoZSBEZXZDUk0gcGxhdGZvcm0uIFRoZSBhcHAncyBnb2FsIGlzIHRvIGVtcG93ZXIgeW91ciBlbnRpcmUgb3JnYW5pem"
 +
+            
"F0aW9uIHRvIGVuZ2FnZSB5b3VyIGN1c3RvbWVyLCBhdXRvbWF0ZSBwcm9jZXNzIGFuZCBjb252ZXJ0IHByb2R1Y3QgZmVlZGJhY2sgaW50b"
 +
+            
"yBkZWxpZ2h0ZnVsIGV4cGVyaWVuY2UuCgoNCg0KSXQgY29udmVyZ2VzIGN1c3RvbWVyIGNvbnZlcnNhdGlvbnMgYW5kIGludGVyYWN0aW9u"
 +
+            
"cyBkaXJlY3RseSB0byB5b3VyIGRldmVsb3BlcidzIHdvcmsgYW5kIGtlZXBzIGV2ZXJ5dGhpbmcgaW4gc3luYyBhbmQgcmVhbC10aW1lIGZ"
 +
+            
"vciB5b3VyIGVuZCB1c2Vycy4KCg0KDQpUaGUgY29yZSBmZWF0dXJlcyB0aGF0IG1ha2UgRGV2UmV2IFN1cHBvcnQgZGlmZmVyZW50IGZyb2"
 +
+            
"0gb3RoZXIgcGxhdGZvcm1zOgoKDQoNClBMdUc6IFRoZSBwcm9kdWN0LWxlZCBjaGFubmVsIHRvIHRhbGsgdG8geW91ciB1c2Vycy4gQ2hhd"
 +
+            
"CwgcmVjb21tZW5kLCBhbmQgbnVkZ2UgeW91ciBjdXN0b21lcnMgdG8gZHJpdmUgYWRvcHRpb24gYW5kIGRlbGlnaHQuCgoNCg0KQ3VzdG9t"
 +
+            
"ZXIgSW5ib3g6IEEgYmktZGlyZWN0aW9uYWwsIHN5bmNocm9ub3VzIHZpZXcgaW50byByZWFsLXRpbWUgY3VzdG9tZXIgY29udmVyc2F0aW9"
 +
+            
"ucyBhY3Jvc3MgUEx1RywgU2xhY2ssIGFuZCBlbWFpbCB0byBlbnN1cmUgeW91IG5ldmVyIGxlYXZlIGEgY3VzdG9tZXIgaGFuZ2luZy4KCg"
 +
+            
"0KDQpUaWNrZXQgTWFuYWdlbWVudDogQSBtb2Rlcm4gdGlja2V0IG1hbmFnZW1lbnQgcGxhdGZvcm0gZW5yaWNoZWQgYnkgQ1JNIGRhdGEgY"
 +
+            
"W5kIGNvbm5lY3RlZCB0byBkZXZlbG9wZXIgaXNzdWVzIHRvIGJyaW5nIHByb2R1Y3QgYW5kIGN1c3RvbWVyIGNlbnRyaWNpdHkgdG8gZXZl"
 +
+            
"cnkgdGVhbSBtZW1iZXIuCgoNCg0KUmVhbHRpbWUgVXBkYXRlcyBmb3IgeW91ciB1c2VyczogTm93IGtlZXAgeW91ciB1c2VycyBhbHdheXM"
 +
+            
"gaW4gdGhlIGxvb3AsIHdpdGggdGhlIHJlYWwtdGltZSBzdGF0dXMgb2YgdGhlaXIgdGlja2V0cywgYWxsIHRocm91Z2ggdGhlIFBMdUcgd2"
 +
+            
"lkZ2V0LiAKCg0KDQpJbnRlZ3JhdGUgd2l0aCB5b3VyIGZhdm9yaXRlIGFwcHM6IEludGVncmF0ZSB5b3VyIGZhdm9yaXRlIGFwcHMgbGlrZS"
 +
+            
"BHaXRodWIsIEJpdGJ1Y2tldCwgSmlyYSwgU2xhY2ssIExpbmVhciwgYW5kIG1hbnkgbW9yZS4gCgoNCg0KSW50ZXJhY3Qgd2l0aCBEZXZSZX"
 +
+            
"YgZnJvbSBTbGFjazogTmV2ZXIgbGVhdmUgeW91ciBjdXN0b21lcnMgaW4gdGhlIGRhcmshIENyZWF0ZSBEZXZSZXYgdGlja2V0cyBhbmQga2"
 +
+            
"VlcCBjdXN0b21lcnMgdXAgdG8gZGF0ZSB3aXRob3V0IGV2ZXIgbGVhdmluZyBTbGFjay4gQ3JlYXRlIERldlJldiB0aWNrZXRzIG9yIGlzc3"
 +
+            
"VlcyBmcm9tIFNsYWNrIGNvbnZlcnNhdGlvbnMgYW5kIHN0YXkgdXAgdG8gZGF0ZS4KCg0KDQpLbm93bGVkZ2UtYmFzZWQgYXJ0aWNsZXMgYW5k"
 +
+            
"IHRoZWlyIG1hbmFnZW1lbnQ6IEFsb25nIHdpdGggYWRkaW5nIGFuZCBtYW5hZ2luZyBLQiBhcnRpY2xlcyBieSBEZXZPcmcsIHVzZXJzIHdpbGw"
 +
+            
"gbm93IGhhdmUgdGhlIGFiaWxpdHkgdG8gc2VhcmNoIHRoZW0gdGhyb3VnaCBQTHVHIHdpZGdldC4KCg0KDQpBdXRvbWF0aWMgR2l0SHViIFVwZ"
 +
+            
"GF0ZXM6IFB1dCB5b3VyIHdvcmsgaW4gYXV0b3BpbG90IHdpdGggR2l0aHViIGFuZCBEZXZSZXYgYW5kIG1ha2UgdXBkYXRlcyBpbiByZWFsLXRp"
 +
+            
"bWUsIGFuZCBtYWtlIHlvdXIgZGV2ZWxvcGVyIGZyZWUgZnJvbSBkb2luZyBzdGF0dXMgdXBkYXRlcy4KCg0KDQpOZXZlciBtaXNzIHlvdXIgbWV"
 +
+            
"zc2FnZXM6IEVtYWlsIGludGVncmF0aW9ucyB0byBrZWVwIHlvdSBub3RpZmllZCAyNHg3LgoKDQoNCkxpZ2h0IG1vZGU6IFdlIGFyZSBhbHNvIG"
 +
+            
"dvaW5nIHRvIHJlbGVhc2Ugb3VyIGFsbCAgbmV3IGxpZ2h0IG1vZGUgdGhlbWUgc2hvcnRseSwgYW5kIHdlIHRoaW5rIHlvdSdyZSBnb2luZyB0by"
 +
+            
"Bsb3ZlIGl0ISBUaGlzIHdhcyBvbmUgb2YgdGhlIG1vc3QgcmVxdWVzdGVkIGZlYXR1cmVzLCBhbmQgd2UncmUgc28gaGFwcHkgdG8gYmUgYWJsZ"
 +
+            
"SB0byBvZmZlciBpdCB0byBvdXIgdXNlcnMgZmluYWxseS4gR2l2ZSBEZXZSZXYgYSB0cnkgdG9kYXksIGFuZCBsZXQgdXMga25vdyB3aGF0IHl"
 +
+            
"vdSB0aGluayEKCg0KDQpDaGVlcnMg8J+NuwoKDQoNClRlYW0gRGV2UmV2IgoKDQogICAgICAgICAgDQogICAgICAgIA0KICAgICAgDQogICAgD"
 +
+            
"QogIA0KDQoNCg0KICAgICAgICANCiAgDQogICAgDQogICAgICBWaWV3IG9uIFByb2R1Y3QgSHVudA0KICAgIA0KICANCg0KDQogICAgICANCiA"
 +
+            
"gICANCg0KICAgICAgDQogICAgDQogIA0KDQogIA0KICAgICAgICAgICAgICAgIA0KICANCiAgICAgICAgICAgICAgICAgICAgDQogICAgWW91I"
 +
+            
"GNhbiBvcHQgb3V0IG9mIGZyaWVuZCBwcm9kdWN0IHBvc3Qgbm90aWZpY2F0aW9ucyBvciBtYW5hZ2UgYWxsIG9mIHlvdXIgZW1haWwgbm90aWZ"
 +
+            
"pY2F0aW9ucyBmcm9tIHlvdXIgcHJvZmlsZS4gT3IganVzdCB1bmZvbGxvdyBEaGVlcmFqIFBhbmRleQoKDQoNCiAgICAgICAgICAgICAgICAgI"
 +
+            
"ElmIHlvdSBoYXZlIGFueSBxdWVzdGlvbnMsIGZlZWRiYWNrLCBpZGVhcyBvciBwcm9ibGVtcyBkb24ndCBoZXNpdGF0ZSB0byBjb250YWN0IHVzI"
 +
+            
"QoKDQogICAgICAgICAgICAgICAgICBQcm9kdWN0IEh1bnQgSW5jLiwgOTAgR29sZCBTdCwgRkxSIDMsIFNhbiBGcmFuY2lzY28sIENBIDk0MTM"
 +
+            
"zCgoNCg0KDQogICAgICAgICAgICAgIA0KICAgICAgICAgICAgDQogICAgICAgICAgDQogICAgICAgIA0KICAgICAgDQogICAgDQogIA0KDQoNC"
 +
+            "g0KICANCgoNCg==";
+        String textBody = new 
String(Base64.getDecoder().decode(b64.getBytes(StandardCharsets.UTF_8)), 
StandardCharsets.UTF_8);
+        Preview preview = Preview.compute(textBody);
+
+        SoftAssertions.assertSoftly(softly -> {
+            softly.assertThat(StandardCharsets.UTF_8.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .canEncode(preview.getValue())).isTrue();
+            softly.assertThat(preview).isEqualTo(Preview.from("Product Hunt 
Friday, October 21st Hey there, Alexandre! " +
+                "Dheeraj Pandey just posted DevRev Support - In-app support 
for PLG companies DevRev Support In-app " +
+                "support for PLG companies Here is what they said about it: 
[Dheeraj Pandey] \"Hey Makers and Hunters "));
+        });
+    }
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to