This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new a054420813 JAMES-3840 Sanitize UTF-8 string after splitting (#1266)
a054420813 is described below
commit a054420813616e478e30cc1005ccbad6896e3c9d
Author: Benoit TELLIER <[email protected]>
AuthorDate: Wed Oct 26 09:16:49 2022 +0700
JAMES-3840 Sanitize UTF-8 string after splitting (#1266)
---
.../org/apache/james/jmap/api/model/Preview.java | 15 +++-
.../apache/james/jmap/api/model/PreviewTest.java | 84 ++++++++++++++++++++++
2 files changed, 96 insertions(+), 3 deletions(-)
diff --git
a/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
b/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
index c0cc914a80..12e7ef34c9 100644
---
a/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
+++
b/server/data/data-jmap/src/main/java/org/apache/james/jmap/api/model/Preview.java
@@ -22,6 +22,7 @@ package org.apache.james.jmap.api.model;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
@@ -90,9 +91,17 @@ public class Preview {
public static Preview compute(String textBody) {
int previewOffsetEstimate = estimatePreviewOffset(textBody,
MAX_LENGTH);
String previewPart = textBody.substring(0, previewOffsetEstimate);
- return Preview.from(
- truncateToMaxLength(
- StringUtils.normalizeSpace(previewPart)));
+ String normalizeSpace = StringUtils.normalizeSpace(previewPart);
+ String truncateToMaxLength = truncateToMaxLength(normalizeSpace);
+ return Preview.from(sanitizeUTF8String(truncateToMaxLength));
+ }
+
+ private static String sanitizeUTF8String(String truncateToMaxLength) {
+ CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
+ while (!encoder.canEncode(truncateToMaxLength)) {
+ truncateToMaxLength = truncateToMaxLength.substring(0,
truncateToMaxLength.length() - 1);
+ }
+ return truncateToMaxLength;
}
private static String truncateToMaxLength(String body) {
diff --git
a/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
b/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
index 5198ac2165..eaae1ed8c8 100644
---
a/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
+++
b/server/data/data-jmap/src/test/java/org/apache/james/jmap/api/model/PreviewTest.java
@@ -23,7 +23,12 @@ import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatCode;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+
import org.apache.commons.lang3.StringUtils;
+import org.assertj.core.api.SoftAssertions;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
@@ -136,4 +141,83 @@ class PreviewTest {
.isEqualTo(Preview.from("this is the preview content"));
}
}
+
+ @Test
+ void computeShouldSanitizeBadUtf8Splits() throws Exception {
+ // This value would lead to a split in the middle of an emoji thus
leading to an invalid UTF-8 string
+
+ String b64 =
"DQoNCiAgICANCiAgDQogICAgDQogICAgICANCiAgICAgICAgDQogICAgICAgICAg" +
+
"DQogICAgICAgICAgICANCiAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICANCiAgICAgICAgICA"
+
+
"gICAgICAgICAgDQogICAgICAgICAgICAgICAgICAgICAgDQogIA0KICBQcm9kdWN0IEh1bnQNCg0KDQogICAgICAgICAgICAgICAgICAgIA"
+
+
"0KICAgICAgICAgICAgICAgICAgICANCiAgICAgICAgICAgICAgICAgICAgICBGcmlkYXksIE9jdG9iZXIgMjFzdA0KICAgICAgICAgICAgI"
+
+
"CAgICAgICANCiAgICAgICAgICAgICAgICAgIA0KICAgICAgICAgICAgICAgIA0KICAgICAgICAgICAgICANCiAgICAgICAgICAgIA0KICAg"
+
+
"ICAgICAgICAgDQogICAgICAgICAgICAgIA0KICAgICAgICAgICAgDQogICAgICAgICAgICANCiAgICAgICAgICAgICAgDQogICAgICAgICA"
+
+
"gICANCiAgICAgICAgICAgIA0KICAgICAgICAgICAgICANCiAgICAgICAgICAgICAgICANCiAgDQogIA0KICAgIA0KICAgICAgDQogICAgIC"
+
+
"AgIA0KICAgIEhleSB0aGVyZSwgQWxleGFuZHJlIQoKDQoNCiAgICBEaGVlcmFqIFBhbmRleSBqdXN0IHBvc3RlZCBEZXZSZXYgU3VwcG9yd"
+
+
"CAtIEluLWFwcCBzdXBwb3J0IGZvciBQTEcgY29tcGFuaWVzDQoNCiAgICAKCg0KDQogICAgICANCiAgICAgICAgDQogIA0KICAgIA0KICAg"
+
+
"ICAgDQogICAgICAgIA0KICAgICAgICAgIA0KICAgICAgICANCiAgICAgICAgDQogICAgICAgICAgRGV2UmV2IFN1cHBvcnQNCiAgICAgICA"
+
+
"gDQogICAgICANCiAgICAgIA0KICAgICAgICANCiAgICAgICAgICBJbi1hcHAgc3VwcG9ydCBmb3IgUExHIGNvbXBhbmllcw0KICAgICAgIC"
+
+
"ANCiAgICAgIA0KICAgIA0KICANCg0KDQoNCiAgICAgICAgICANCiAgICAgICAgICAgIA0KICAgICAgICAgICAgICAKDQogICAgICAgICAgI"
+
+
"CAgIEhlcmUgaXMgd2hhdCB0aGV5IHNhaWQgYWJvdXQgaXQ6DQogICAgICAgICAgICANCiAgICAgICAgICANCiAgICAgICAgICANCiAgDQogI"
+
+
"CAgDQogICAgICANCiAgICAgICAgDQogICAgICAgICAgW0RoZWVyYWogUGFuZGV5XQ0KICAgICAgICANCiAgICAgICAgDQogICAgICAgICAg"
+
+
"DQogICAgICAgICAgICAiSGV5IE1ha2VycyBhbmQgSHVudGVycyDwn5GLLAoKDQoNCkknbSBEaGVlcmFqIFBhbmRleSwgQ28tZm91bmRlciw"
+
+
"gYW5kIENFTyBhdCBEZXZSZXYuIFRvZGF5IGlzIGEgYmlnIGRheSBmb3IgdXMsIHdlJ3JlIHN1cGVyIGV4Y2l0ZWQgdG8gcHJlc2VudCB5b3"
+
+
"UgdGhlIERldlJldiBQcm9kdWN0LUxlZCBTdXBwb3J0IGFwcCBvbiBvdXIgRGV2Q1JNIHBsYXRmb3JtLCB3aXRoIHRoZSBnb2FsIG9mIHVuaW"
+
+
"Z5aW5nIHlvdXIgY29tcGFueSdzIGZyb250IGFuZCBiYWNrIG9mZmljZSBieSBtYWtpbmcgZXZlcnkgZW1wbG95ZWUgdGhpbmsgYWJvdXQgdGh"
+
+
"lIHByb2R1Y3QgYW5kIHRoZSBlbmQgdXNlcnMuCgoNCg0KSHVnZSB0aGFua3MgdG8gS2V2aW4gZm9yIGh1bnRpbmcgdXMg8J+ZjwoKDQoNCld"
+
+
"lJ3ZlIGNvbWUgYSBsb25nIHdheSBzaW5jZSBvdXIgbGFzdCBsYXVuY2gsIGFuZCB3ZSdyZSBoYXBweSB0byBhbm5vdW5jZSB0aGF0IHdlIG"
+
+
"FyZSBub3cgb2ZmaWNpYWxseSBvdXQgb2YgYmV0YSEgSXQncyBiZWVuIGEgbG9uZyBqb3VybmV5LCBidXQgd2UgY291bGRuJ3QgaGF2ZSBkb"
+
+
"25lIGl0IHdpdGhvdXQgdGhlIGJldGEgY3VzdG9tZXJzIHdobyB1c2VkIERldlJldiBhbmQgZ2F2ZSB1cyB0aGVpciB2YWx1YWJsZSBmZWVk"
+
+
"YmFjayBhbG9uZyB0aGUgd2F5LiBUaGFuayB5b3UgdG8gZXZlcnlvbmUgd2hvIGhlbHBlZCB1cyBpbXByb3ZlIG91ciBwcm9kdWN0IC0gd2U"
+
+
"gdHJ1bHkgY291bGRuJ3QgaGF2ZSBkb25lIGl0IHdpdGhvdXQgeW91LgoKDQoNClNvLCB3aGF0IGlzIERldlJldj8KCg0KDQpEZXZSZXYgaXM"
+
+
"gdGhlIHdvcmxkJ3MgZmlyc3QgRGV2Q1JNIGZvciBuZXctYWdlIHByb2R1Y3QtbGVkIGNvbXBhbmllcyBhbmQgc3RhcnR1cHMgd2l0aCBhIH"
+
+
"Zpc2lvbiB0byBicmluZyB0b2dldGhlciB0aGUgRGV2KERldmVsb3BlcnMpIGFuZCBSZXYgKEN1c3RvbWVycykgYW5kIGhlbHAgbWFrZSB5"
+
+
"b3VyIGNvbXBhbnkgbW9yZSBjdXN0b21lciBhbmQgcHJvZHVjdC1jZW50cmljLgoKDQoNCkRldlJldiBTdXBwb3J0IGFwcCBpcyB0aGUgZml"
+
+
"yc3QgYXBwIG9uIHRoZSBEZXZDUk0gcGxhdGZvcm0uIFRoZSBhcHAncyBnb2FsIGlzIHRvIGVtcG93ZXIgeW91ciBlbnRpcmUgb3JnYW5pem"
+
+
"F0aW9uIHRvIGVuZ2FnZSB5b3VyIGN1c3RvbWVyLCBhdXRvbWF0ZSBwcm9jZXNzIGFuZCBjb252ZXJ0IHByb2R1Y3QgZmVlZGJhY2sgaW50b"
+
+
"yBkZWxpZ2h0ZnVsIGV4cGVyaWVuY2UuCgoNCg0KSXQgY29udmVyZ2VzIGN1c3RvbWVyIGNvbnZlcnNhdGlvbnMgYW5kIGludGVyYWN0aW9u"
+
+
"cyBkaXJlY3RseSB0byB5b3VyIGRldmVsb3BlcidzIHdvcmsgYW5kIGtlZXBzIGV2ZXJ5dGhpbmcgaW4gc3luYyBhbmQgcmVhbC10aW1lIGZ"
+
+
"vciB5b3VyIGVuZCB1c2Vycy4KCg0KDQpUaGUgY29yZSBmZWF0dXJlcyB0aGF0IG1ha2UgRGV2UmV2IFN1cHBvcnQgZGlmZmVyZW50IGZyb2"
+
+
"0gb3RoZXIgcGxhdGZvcm1zOgoKDQoNClBMdUc6IFRoZSBwcm9kdWN0LWxlZCBjaGFubmVsIHRvIHRhbGsgdG8geW91ciB1c2Vycy4gQ2hhd"
+
+
"CwgcmVjb21tZW5kLCBhbmQgbnVkZ2UgeW91ciBjdXN0b21lcnMgdG8gZHJpdmUgYWRvcHRpb24gYW5kIGRlbGlnaHQuCgoNCg0KQ3VzdG9t"
+
+
"ZXIgSW5ib3g6IEEgYmktZGlyZWN0aW9uYWwsIHN5bmNocm9ub3VzIHZpZXcgaW50byByZWFsLXRpbWUgY3VzdG9tZXIgY29udmVyc2F0aW9"
+
+
"ucyBhY3Jvc3MgUEx1RywgU2xhY2ssIGFuZCBlbWFpbCB0byBlbnN1cmUgeW91IG5ldmVyIGxlYXZlIGEgY3VzdG9tZXIgaGFuZ2luZy4KCg"
+
+
"0KDQpUaWNrZXQgTWFuYWdlbWVudDogQSBtb2Rlcm4gdGlja2V0IG1hbmFnZW1lbnQgcGxhdGZvcm0gZW5yaWNoZWQgYnkgQ1JNIGRhdGEgY"
+
+
"W5kIGNvbm5lY3RlZCB0byBkZXZlbG9wZXIgaXNzdWVzIHRvIGJyaW5nIHByb2R1Y3QgYW5kIGN1c3RvbWVyIGNlbnRyaWNpdHkgdG8gZXZl"
+
+
"cnkgdGVhbSBtZW1iZXIuCgoNCg0KUmVhbHRpbWUgVXBkYXRlcyBmb3IgeW91ciB1c2VyczogTm93IGtlZXAgeW91ciB1c2VycyBhbHdheXM"
+
+
"gaW4gdGhlIGxvb3AsIHdpdGggdGhlIHJlYWwtdGltZSBzdGF0dXMgb2YgdGhlaXIgdGlja2V0cywgYWxsIHRocm91Z2ggdGhlIFBMdUcgd2"
+
+
"lkZ2V0LiAKCg0KDQpJbnRlZ3JhdGUgd2l0aCB5b3VyIGZhdm9yaXRlIGFwcHM6IEludGVncmF0ZSB5b3VyIGZhdm9yaXRlIGFwcHMgbGlrZS"
+
+
"BHaXRodWIsIEJpdGJ1Y2tldCwgSmlyYSwgU2xhY2ssIExpbmVhciwgYW5kIG1hbnkgbW9yZS4gCgoNCg0KSW50ZXJhY3Qgd2l0aCBEZXZSZX"
+
+
"YgZnJvbSBTbGFjazogTmV2ZXIgbGVhdmUgeW91ciBjdXN0b21lcnMgaW4gdGhlIGRhcmshIENyZWF0ZSBEZXZSZXYgdGlja2V0cyBhbmQga2"
+
+
"VlcCBjdXN0b21lcnMgdXAgdG8gZGF0ZSB3aXRob3V0IGV2ZXIgbGVhdmluZyBTbGFjay4gQ3JlYXRlIERldlJldiB0aWNrZXRzIG9yIGlzc3"
+
+
"VlcyBmcm9tIFNsYWNrIGNvbnZlcnNhdGlvbnMgYW5kIHN0YXkgdXAgdG8gZGF0ZS4KCg0KDQpLbm93bGVkZ2UtYmFzZWQgYXJ0aWNsZXMgYW5k"
+
+
"IHRoZWlyIG1hbmFnZW1lbnQ6IEFsb25nIHdpdGggYWRkaW5nIGFuZCBtYW5hZ2luZyBLQiBhcnRpY2xlcyBieSBEZXZPcmcsIHVzZXJzIHdpbGw"
+
+
"gbm93IGhhdmUgdGhlIGFiaWxpdHkgdG8gc2VhcmNoIHRoZW0gdGhyb3VnaCBQTHVHIHdpZGdldC4KCg0KDQpBdXRvbWF0aWMgR2l0SHViIFVwZ"
+
+
"GF0ZXM6IFB1dCB5b3VyIHdvcmsgaW4gYXV0b3BpbG90IHdpdGggR2l0aHViIGFuZCBEZXZSZXYgYW5kIG1ha2UgdXBkYXRlcyBpbiByZWFsLXRp"
+
+
"bWUsIGFuZCBtYWtlIHlvdXIgZGV2ZWxvcGVyIGZyZWUgZnJvbSBkb2luZyBzdGF0dXMgdXBkYXRlcy4KCg0KDQpOZXZlciBtaXNzIHlvdXIgbWV"
+
+
"zc2FnZXM6IEVtYWlsIGludGVncmF0aW9ucyB0byBrZWVwIHlvdSBub3RpZmllZCAyNHg3LgoKDQoNCkxpZ2h0IG1vZGU6IFdlIGFyZSBhbHNvIG"
+
+
"dvaW5nIHRvIHJlbGVhc2Ugb3VyIGFsbCAgbmV3IGxpZ2h0IG1vZGUgdGhlbWUgc2hvcnRseSwgYW5kIHdlIHRoaW5rIHlvdSdyZSBnb2luZyB0by"
+
+
"Bsb3ZlIGl0ISBUaGlzIHdhcyBvbmUgb2YgdGhlIG1vc3QgcmVxdWVzdGVkIGZlYXR1cmVzLCBhbmQgd2UncmUgc28gaGFwcHkgdG8gYmUgYWJsZ"
+
+
"SB0byBvZmZlciBpdCB0byBvdXIgdXNlcnMgZmluYWxseS4gR2l2ZSBEZXZSZXYgYSB0cnkgdG9kYXksIGFuZCBsZXQgdXMga25vdyB3aGF0IHl"
+
+
"vdSB0aGluayEKCg0KDQpDaGVlcnMg8J+NuwoKDQoNClRlYW0gRGV2UmV2IgoKDQogICAgICAgICAgDQogICAgICAgIA0KICAgICAgDQogICAgD"
+
+
"QogIA0KDQoNCg0KICAgICAgICANCiAgDQogICAgDQogICAgICBWaWV3IG9uIFByb2R1Y3QgSHVudA0KICAgIA0KICANCg0KDQogICAgICANCiA"
+
+
"gICANCg0KICAgICAgDQogICAgDQogIA0KDQogIA0KICAgICAgICAgICAgICAgIA0KICANCiAgICAgICAgICAgICAgICAgICAgDQogICAgWW91I"
+
+
"GNhbiBvcHQgb3V0IG9mIGZyaWVuZCBwcm9kdWN0IHBvc3Qgbm90aWZpY2F0aW9ucyBvciBtYW5hZ2UgYWxsIG9mIHlvdXIgZW1haWwgbm90aWZ"
+
+
"pY2F0aW9ucyBmcm9tIHlvdXIgcHJvZmlsZS4gT3IganVzdCB1bmZvbGxvdyBEaGVlcmFqIFBhbmRleQoKDQoNCiAgICAgICAgICAgICAgICAgI"
+
+
"ElmIHlvdSBoYXZlIGFueSBxdWVzdGlvbnMsIGZlZWRiYWNrLCBpZGVhcyBvciBwcm9ibGVtcyBkb24ndCBoZXNpdGF0ZSB0byBjb250YWN0IHVzI"
+
+
"QoKDQogICAgICAgICAgICAgICAgICBQcm9kdWN0IEh1bnQgSW5jLiwgOTAgR29sZCBTdCwgRkxSIDMsIFNhbiBGcmFuY2lzY28sIENBIDk0MTM"
+
+
"zCgoNCg0KDQogICAgICAgICAgICAgIA0KICAgICAgICAgICAgDQogICAgICAgICAgDQogICAgICAgIA0KICAgICAgDQogICAgDQogIA0KDQoNC"
+
+ "g0KICANCgoNCg==";
+ String textBody = new
String(Base64.getDecoder().decode(b64.getBytes(StandardCharsets.UTF_8)),
StandardCharsets.UTF_8);
+ Preview preview = Preview.compute(textBody);
+
+ SoftAssertions.assertSoftly(softly -> {
+ softly.assertThat(StandardCharsets.UTF_8.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .canEncode(preview.getValue())).isTrue();
+ softly.assertThat(preview).isEqualTo(Preview.from("Product Hunt
Friday, October 21st Hey there, Alexandre! " +
+ "Dheeraj Pandey just posted DevRev Support - In-app support
for PLG companies DevRev Support In-app " +
+ "support for PLG companies Here is what they said about it:
[Dheeraj Pandey] \"Hey Makers and Hunters "));
+ });
+ }
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]