This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push:
new 6e8da455 Keep WordUtils.wrap from splitting a surrogate pair (#755)
6e8da455 is described below
commit 6e8da45562a715d264e4cb92373a10b2796dc8c6
Author: alhuda <[email protected]>
AuthorDate: Fri Jun 26 16:39:46 2026 +0530
Keep WordUtils.wrap from splitting a surrogate pair (#755)
---
src/main/java/org/apache/commons/text/WordUtils.java | 10 +++++++---
src/test/java/org/apache/commons/text/WordUtilsTest.java | 8 ++++++++
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/commons/text/WordUtils.java
b/src/main/java/org/apache/commons/text/WordUtils.java
index e03024a4..fea98c71 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -843,10 +843,14 @@ public class WordUtils {
if (matcherSize == 0) {
offset--;
}
- // wrap really long word one line at a time
- wrappedLine.append(str, offset, wrapLength + offset);
+ // wrap really long word one line at a time, but keep a
surrogate pair whole
+ int wrapAt = wrapLength + offset;
+ if (Character.isHighSurrogate(str.charAt(wrapAt - 1)) &&
Character.isLowSurrogate(str.charAt(wrapAt))) {
+ wrapAt++;
+ }
+ wrappedLine.append(str, offset, wrapAt);
wrappedLine.append(newLineStr);
- offset += wrapLength;
+ offset = wrapAt;
matcherSize = -1;
} else {
// do not wrap really long word, just extend beyond limit
diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java
b/src/test/java/org/apache/commons/text/WordUtilsTest.java
index 078ab1ae..f267f093 100644
--- a/src/test/java/org/apache/commons/text/WordUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java
@@ -543,6 +543,14 @@ class WordUtilsTest {
assertEquals(expected, WordUtils.wrap(input, 20, "\n", false));
expected = "Click here,\nhttps://commons.apac\nhe.org, to jump to\nthe
commons website";
assertEquals(expected, WordUtils.wrap(input, 20, "\n", true));
+
+ // a hard break for a long word must not split a surrogate pair across
the new line
+ input = "a\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00";
+ expected = "a\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00\uD83D\uDE00";
+ assertEquals(expected, WordUtils.wrap(input, 4, "\n", true));
+ input = "\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00";
+ expected = "\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00";
+ assertEquals(expected, WordUtils.wrap(input, 3, "\n", true));
}
@Test