This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e8da455 Keep WordUtils.wrap from splitting a surrogate pair (#755)
6e8da455 is described below

commit 6e8da45562a715d264e4cb92373a10b2796dc8c6
Author: alhuda <[email protected]>
AuthorDate: Fri Jun 26 16:39:46 2026 +0530

    Keep WordUtils.wrap from splitting a surrogate pair (#755)
---
 src/main/java/org/apache/commons/text/WordUtils.java     | 10 +++++++---
 src/test/java/org/apache/commons/text/WordUtilsTest.java |  8 ++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/apache/commons/text/WordUtils.java 
b/src/main/java/org/apache/commons/text/WordUtils.java
index e03024a4..fea98c71 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -843,10 +843,14 @@ public class WordUtils {
                 if (matcherSize == 0) {
                     offset--;
                 }
-                // wrap really long word one line at a time
-                wrappedLine.append(str, offset, wrapLength + offset);
+                // wrap really long word one line at a time, but keep a 
surrogate pair whole
+                int wrapAt = wrapLength + offset;
+                if (Character.isHighSurrogate(str.charAt(wrapAt - 1)) && 
Character.isLowSurrogate(str.charAt(wrapAt))) {
+                    wrapAt++;
+                }
+                wrappedLine.append(str, offset, wrapAt);
                 wrappedLine.append(newLineStr);
-                offset += wrapLength;
+                offset = wrapAt;
                 matcherSize = -1;
             } else {
                 // do not wrap really long word, just extend beyond limit
diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java 
b/src/test/java/org/apache/commons/text/WordUtilsTest.java
index 078ab1ae..f267f093 100644
--- a/src/test/java/org/apache/commons/text/WordUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java
@@ -543,6 +543,14 @@ class WordUtilsTest {
         assertEquals(expected, WordUtils.wrap(input, 20, "\n", false));
         expected = "Click here,\nhttps://commons.apac\nhe.org, to jump to\nthe 
commons website";
         assertEquals(expected, WordUtils.wrap(input, 20, "\n", true));
+
+        // a hard break for a long word must not split a surrogate pair across 
the new line
+        input = "a\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00";
+        expected = "a\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00\uD83D\uDE00";
+        assertEquals(expected, WordUtils.wrap(input, 4, "\n", true));
+        input = "\uD83D\uDE00\uD83D\uDE00\uD83D\uDE00";
+        expected = "\uD83D\uDE00\uD83D\uDE00\n\uD83D\uDE00";
+        assertEquals(expected, WordUtils.wrap(input, 3, "\n", true));
     }
 
     @Test

Reply via email to