This is an automated email from the ASF dual-hosted git repository. ppkarwasz pushed a commit to branch fix/pattern-length in repository https://gitbox.apache.org/repos/asf/commons-lang.git
commit 8dffc25a84621762ff23a4b588ac510de1057a3a Author: Piotr P. Karwasz <[email protected]> AuthorDate: Sun May 17 23:19:26 2026 +0200 Fix `WordUtils.wrap` for `wrapOn` matches different from one character `WordUtils.wrap` advanced the offset by a single character past each `wrapOn` match, assuming the separator is always exactly one character long: - When the `wrapOn` regex matched several characters, the surplus separator characters were left in the wrapped output instead of being consumed, - When the `wrapOn` regex matched zero characters, one additional character was consumed. Now it tracks the end offset of each match and resume from there, so that separators of any length, including zero-width matches, are skipped correctly. --- src/changes/changes.xml | 1 + .../org/apache/commons/lang3/text/WordUtils.java | 21 +++--- .../apache/commons/lang3/text/WordUtilsTest.java | 82 ++++++++++++++-------- 3 files changed, 65 insertions(+), 39 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index f04e26cc0..2215ab002 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -154,6 +154,7 @@ java.lang.NullPointerException: Cannot invoke <action type="fix" dev="ggregory" due-to="Omkhar Arasaratnam, Gary Gregory">AtomicSafeInitializer.get() busy-spin without yield burns CPU during slow initialization (#1651).</action> <action type="fix" dev="ggregory" due-to="Omkhar Arasaratnam, Gary Gregory">StrBuilder.readFrom(Readable) exposes stale internal buffer to Readable parameter (#1652).</action> <action type="fix" dev="ggregory" due-to="Omkhar Arasaratnam, Gary Gregory">EqualsBuilder.reflectionEquals() array branch missing cycle guard causes stack overflow on self-referential Object arrays (#1653).</action> + <action type="fix" dev="pkarwasz" due-to="Piotr Karwasz">WordUtils.wrap() leaves separator characters in the output when the wrapOn regex match is longer than one character (#1655).</action> <!-- ADD --> <action type="add" dev="ggregory" due-to="Gary Gregory">Add JavaVersion.JAVA_27.</action> <action type="add" dev="ggregory" due-to="Gary Gregory">Add SystemUtils.IS_JAVA_27.</action> diff --git a/src/main/java/org/apache/commons/lang3/text/WordUtils.java b/src/main/java/org/apache/commons/lang3/text/WordUtils.java index ae59a34e3..feed32d15 100644 --- a/src/main/java/org/apache/commons/lang3/text/WordUtils.java +++ b/src/main/java/org/apache/commons/lang3/text/WordUtils.java @@ -648,15 +648,17 @@ public static String wrap(final String str, int wrapLength, String newLineStr, f while (offset < inputLineLength) { int spaceToWrapAt = -1; + int endOfWrapAt = -1; Matcher matcher = patternToWrapOn.matcher( str.substring(offset, Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); if (matcher.find()) { - if (matcher.start() == 0) { - // If the match is zero-width, advance by at least 1 to avoid infinite loop. - offset += matcher.end() > 0 ? matcher.end() : 1; + spaceToWrapAt = matcher.start() + offset; + endOfWrapAt = matcher.end() + offset; + // Skip leading match, if it is not zero-width + if (spaceToWrapAt == offset && endOfWrapAt != offset) { + offset = endOfWrapAt; continue; } - spaceToWrapAt = matcher.start() + offset; } // only last line without leading spaces is left if (inputLineLength - offset <= wrapLength) { @@ -664,13 +666,13 @@ public static String wrap(final String str, int wrapLength, String newLineStr, f } while (matcher.find()) { spaceToWrapAt = matcher.start() + offset; + endOfWrapAt = matcher.end() + offset; } - if (spaceToWrapAt >= offset) { + if (endOfWrapAt > offset) { // normal case wrappedLine.append(str, offset, spaceToWrapAt); wrappedLine.append(newLineStr); - offset = spaceToWrapAt + 1; - + offset = endOfWrapAt; } else // really long word or URL if (wrapLongWords) { // wrap really long word one line at a time @@ -680,14 +682,17 @@ public static String wrap(final String str, int wrapLength, String newLineStr, f } else { // do not wrap really long word, just extend beyond limit matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); + spaceToWrapAt = -1; if (matcher.find()) { spaceToWrapAt = matcher.start() + offset + wrapLength; + endOfWrapAt = matcher.end() + offset + wrapLength; } if (spaceToWrapAt >= 0) { wrappedLine.append(str, offset, spaceToWrapAt); wrappedLine.append(newLineStr); - offset = spaceToWrapAt + 1; + // at least offset + wrapLength >= offset + 1 + offset = endOfWrapAt; } else { wrappedLine.append(str, offset, str.length()); offset = inputLineLength; diff --git a/src/test/java/org/apache/commons/lang3/text/WordUtilsTest.java b/src/test/java/org/apache/commons/lang3/text/WordUtilsTest.java index e14b14cbf..479b62640 100644 --- a/src/test/java/org/apache/commons/lang3/text/WordUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/text/WordUtilsTest.java @@ -20,15 +20,19 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTimeout; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.params.provider.Arguments.arguments; import java.lang.reflect.Constructor; import java.lang.reflect.Modifier; -import java.time.Duration; +import java.util.stream.Stream; import org.apache.commons.lang3.AbstractLangTest; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * Tests for WordUtils class. @@ -36,6 +40,42 @@ @Deprecated class WordUtilsTest extends AbstractLangTest { + static Stream<Arguments> testWrapStringIntStringBooleanString() { + return Stream.of( + // null passthrough + arguments(null, -1, false, "/", null), + // no changes test + arguments("flammable/inflammable", 30, false, "/", "flammable/inflammable"), + // wrap on / and small width + arguments("flammable/inflammable", 2, false, "/", "flammable\ninflammable"), + // wrap long words on / 1 + arguments("flammable/inflammable", 9, true, "/", "flammable\ninflammab\nle"), + // wrap long words on / 2 + arguments("flammable/inflammable", 15, true, "/", "flammable\ninflammable"), + // wrap long words on / 3 + arguments("flammableinflammable", 15, true, "/", "flammableinflam\nmable"), + // default values + arguments("a/a/a/a", -1, false, "/", "a\na\na\na"), + arguments("a a a a", 1, false, null, "a\na\na\na"), + // strip leading / keep trailing + arguments("///abc///def///ghi", 3, false, "/", "abc\ndef\nghi"), + arguments("///abc///def///ghi", 4, false, "/", "abc/\ndef/\nghi"), + arguments("///abc///def///ghi", 5, false, "/", "abc//\ndef//\nghi"), + // keep only two trailing, wrap on third + arguments("///abc///def///ghi", 6, false, "/", "abc//\ndef//\nghi"), + // zero-width regex match must advance to avoid an infinite loop + arguments("abcabc", 3, false, "(?=a)", "abc\nabc"), + arguments("abcdefabcdef", 4, false, "(?=a)", "abcdef\nabcdef"), + arguments("abcdefabcdef", 4, true, "(?=a)", "abcd\nef\nabcd\nef"), + // width two regex + arguments("abc\\/abc", 3, false, "\\\\/", "abc\nabc"), + arguments("abcdef\\/abcdef", 4, false, "\\\\/", "abcdef\nabcdef"), + arguments("abcdef\\/abcdef", 4, true, "\\\\/", "abcd\nef\nabcd\nef"), + // variable-width regex + arguments(".abc.-def.--ghi", 5, false, "[.]-*", "abc\ndef\nghi") + ); + } + @Test void testCapitalize_String() { assertNull(WordUtils.capitalize(null)); @@ -406,34 +446,14 @@ void testWrap_StringIntStringBoolean() { assertEquals(expected, WordUtils.wrap(input, 20, "\n", true)); } - @Test - void testWrap_StringIntStringBooleanString() { - - //no changes test - String input = "flammable/inflammable"; - String expected = "flammable/inflammable"; - assertEquals(expected, WordUtils.wrap(input, 30, "\n", false, "/")); - - // wrap on / and small width - expected = "flammable\ninflammable"; - assertEquals(expected, WordUtils.wrap(input, 2, "\n", false, "/")); - - // wrap long words on / 1 - expected = "flammable\ninflammab\nle"; - assertEquals(expected, WordUtils.wrap(input, 9, "\n", true, "/")); - - // wrap long words on / 2 - expected = "flammable\ninflammable"; - assertEquals(expected, WordUtils.wrap(input, 15, "\n", true, "/")); - - // wrap long words on / 3 - input = "flammableinflammable"; - expected = "flammableinflam\nmable"; - assertEquals(expected, WordUtils.wrap(input, 15, "\n", true, "/")); - } - - @Test - void testZeroWidthWrapOnRegex() { - assertTimeout(Duration.ofSeconds(2), () -> assertNotNull(WordUtils.wrap("abcdef", 3, "\n", false, "(?=a)"))); + @ParameterizedTest + @MethodSource + @Timeout(2) + void testWrapStringIntStringBooleanString(final String str, final int wrapLength, final boolean wrapLongWords, final String wrapOn, final String expected) { + assertEquals(expected, WordUtils.wrap(str, wrapLength, "\n", wrapLongWords, wrapOn)); + final String sep = System.lineSeparator(); + if (!sep.equals("\n")) { + assertEquals(expected != null ? expected.replace("\n", sep) : null, WordUtils.wrap(str, wrapLength, null, wrapLongWords, wrapOn)); + } } }
