This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git
The following commit(s) were added to refs/heads/master by this push:
new 8c61f12d [TEXT-175] Fix regression for determining whitespace in
WordUtils (#519)
8c61f12d is described below
commit 8c61f12d44f5ec744a11164d1d8a1cca58e190d3
Author: seanfabs <[email protected]>
AuthorDate: Fri Mar 29 12:22:51 2024 +0000
[TEXT-175] Fix regression for determining whitespace in WordUtils (#519)
* Fix regression for determining whitespace
* Declutter
---------
Co-authored-by: sean.fabri <[email protected]>
Co-authored-by: Gary Gregory <[email protected]>
---
.../java/org/apache/commons/text/WordUtils.java | 41 +++++++++++-----------
.../org/apache/commons/text/WordUtilsTest.java | 4 +++
2 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/src/main/java/org/apache/commons/text/WordUtils.java
b/src/main/java/org/apache/commons/text/WordUtils.java
index 306c68af..ac550b0d 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -18,6 +18,7 @@ package org.apache.commons.text;
import java.util.HashSet;
import java.util.Set;
+import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -170,7 +171,7 @@ public class WordUtils {
if (StringUtils.isEmpty(str)) {
return str;
}
- final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+ final Predicate<Integer> isDelimiter =
generateIsDelimiterFunction(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
@@ -179,7 +180,7 @@ public class WordUtils {
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);
- if (delimiterSet.contains(codePoint)) {
+ if (isDelimiter.test(codePoint)) {
capitalizeNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
@@ -290,26 +291,26 @@ public class WordUtils {
}
/**
- * Converts an array of delimiters to a hash set of code points. Code
point of space(32) is added as the default
- * value if delimiters is null. The generated hash set provides O(1)
lookup time.
+ * Given the array of delimiters supplied; returns a function determining
whether a character code point is a delimiter.
+ * The function provides O(1) lookup time.
+ * Whitespace is defined by {@link Character#isWhitespace(char)} and is
used as the defaultvalue if delimiters is null.
*
- * @param delimiters set of characters to determine capitalization, null
means whitespace
- * @return Set<Integer>
+ * @param delimiters set of characters to determine delimiters, null means
whitespace
+ * @return Predicate<Integer> taking a code point value as an argument and
returning true if a delimiter.
*/
- private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
- final Set<Integer> delimiterHashSet = new HashSet<>();
+ private static Predicate<Integer> generateIsDelimiterFunction(final char[]
delimiters) {
+ final Predicate<Integer> isDelimiter;
if (delimiters == null || delimiters.length == 0) {
- if (delimiters == null) {
- delimiterHashSet.add(Character.codePointAt(new char[] {' '},
0));
+ isDelimiter = delimiters == null ? Character::isWhitespace : c ->
false;
+ } else {
+ Set<Integer> delimiterSet = new HashSet<>();
+ for (int index = 0; index < delimiters.length; index++) {
+ delimiterSet.add(Character.codePointAt(delimiters, index));
}
-
- return delimiterHashSet;
+ isDelimiter = delimiterSet::contains;
}
- for (int index = 0; index < delimiters.length; index++) {
- delimiterHashSet.add(Character.codePointAt(delimiters, index));
- }
- return delimiterHashSet;
+ return isDelimiter;
}
/**
@@ -368,7 +369,7 @@ public class WordUtils {
if (delimiters != null && delimiters.length == 0) {
return StringUtils.EMPTY;
}
- final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+ final Predicate<Integer> isDelimiter =
generateIsDelimiterFunction(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen / 2 + 1];
int count = 0;
@@ -376,7 +377,7 @@ public class WordUtils {
for (int i = 0; i < strLen;) {
final int codePoint = str.codePointAt(i);
- if (delimiterSet.contains(codePoint) || delimiters == null &&
Character.isWhitespace(codePoint)) {
+ if (isDelimiter.test(codePoint)) {
lastWasGap = true;
} else if (lastWasGap) {
newCodePoints[count++] = codePoint;
@@ -534,7 +535,7 @@ public class WordUtils {
if (StringUtils.isEmpty(str)) {
return str;
}
- final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
+ final Predicate<Integer> isDelimiter =
generateIsDelimiterFunction(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
@@ -543,7 +544,7 @@ public class WordUtils {
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);
- if (delimiterSet.contains(codePoint)) {
+ if (isDelimiter.test(codePoint)) {
uncapitalizeNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java
b/src/test/java/org/apache/commons/text/WordUtilsTest.java
index 2a6f9d68..2a397d2e 100644
--- a/src/test/java/org/apache/commons/text/WordUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java
@@ -109,6 +109,8 @@ public class WordUtilsTest {
assertThat(WordUtils.capitalizeFully("i am HERE 123")).isEqualTo("I Am
Here 123");
assertThat(WordUtils.capitalizeFully("I AM HERE 123")).isEqualTo("I Am
Here 123");
assertThat(WordUtils.capitalizeFully("alphabet")).isEqualTo("Alphabet"); //
single word
+ assertThat(WordUtils.capitalizeFully("a\tb\nc d")).isEqualTo("A\tB\nC
D");
+ assertThat(WordUtils.capitalizeFully("and \tbut \ncleat
dome")).isEqualTo("And \tBut \nCleat Dome");
}
@Test
@@ -368,6 +370,8 @@ public class WordUtilsTest {
assertThat(WordUtils.uncapitalize("I Am Here 123")).isEqualTo("i am
here 123");
assertThat(WordUtils.uncapitalize("i am HERE 123")).isEqualTo("i am
hERE 123");
assertThat(WordUtils.uncapitalize("I AM HERE 123")).isEqualTo("i aM
hERE 123");
+ assertThat(WordUtils.uncapitalize("A\tB\nC D")).isEqualTo("a\tb\nc d");
+ assertThat(WordUtils.uncapitalize("And \tBut \nCLEAT
Dome")).isEqualTo("and \tbut \ncLEAT dome");
}
@Test