This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push:
new 4c6abb128 Fold supplementary code points in
CharSequenceUtils.regionMatches (#1725).
4c6abb128 is described below
commit 4c6abb128d952e7369fae8a18656bed9338d293c
Author: Gary Gregory <[email protected]>
AuthorDate: Wed Jun 24 11:55:06 2026 +0000
Fold supplementary code points in CharSequenceUtils.regionMatches
(#1725).
- Sort members
- Reduce vertical whitespace
---
src/changes/changes.xml | 1 +
.../apache/commons/lang3/CharSequenceUtils.java | 52 +++++++++++-----------
.../commons/lang3/CharSequenceUtilsTest.java | 22 ++++-----
3 files changed, 37 insertions(+), 38 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ec2b36e2b..f1f89bb53 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -206,6 +206,7 @@ java.lang.NullPointerException: Cannot invoke
<action type="fix" dev="ggregory" due-to="alhudz, Gary
Gregory">Classify supplementary code points in StringUtils is* predicates
(#1724).</action>
<action type="fix" dev="ggregory" due-to="alhudz, Gary
Gregory">Emit surrogate pair for supplementary code points in UnicodeEscaper
(#1726).</action>
<action type="fix" dev="ggregory" due-to="alhudz, Gary
Gregory">Fix MethodUtils.getMatchingMethod false ambiguity on boxed arguments
(#1727).</action>
+ <action type="fix" dev="ggregory" due-to="alhudz, Gary
Gregory">Fold supplementary code points in CharSequenceUtils.regionMatches
(#1725).</action>
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Gary
Gregory">Add JavaVersion.JAVA_27.</action>
<action type="add" dev="ggregory" due-to="Gary
Gregory">Add SystemUtils.IS_JAVA_27.</action>
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
index 6009ba1d3..4a544eb78 100644
--- a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
+++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
@@ -28,15 +28,14 @@ public class CharSequenceUtils {
private static final int NOT_FOUND = -1;
/**
- * Whether the running JDK folds a supplementary code point split across a
surrogate pair when comparing case
- * insensitively in {@link String#regionMatches(boolean, int, String, int,
int)}. JDKs up to and including Java 11
- * compare surrogate by surrogate and never match such a pair; later JDKs
fold the whole code point. Probing what
- * {@link String} actually does (rather than gating on a version constant)
keeps every {@link CharSequence} type in
- * step with {@link String} on whatever JDK is running. DESERET CAPITAL
LETTER LONG I (U+10400) folds to its small
- * form (U+10428).
+ * Whether the running JDK folds a supplementary code point split across a
surrogate pair when comparing case insensitively in
+ * {@link String#regionMatches(boolean, int, String, int, int)}. JDKs up
to and including Java 11 compare surrogate by surrogate and never match such a
+ * pair; later JDKs fold the whole code point. Probing what {@link String}
actually does (rather than gating on a version constant) keeps every
+ * {@link CharSequence} type in step with {@link String} on whatever JDK
is running. DESERET CAPITAL LETTER LONG I (U+10400) folds to its small form
+ * (U+10428).
*/
- private static final boolean STRING_FOLDS_SUPPLEMENTARY_CASE =
- new String(Character.toChars(0x10400)).regionMatches(true, 0, new
String(Character.toChars(0x10428)), 0, 2);
+ private static final boolean STRING_FOLDS_SUPPLEMENTARY_CASE = new
String(Character.toChars(0x10400)).regionMatches(true, 0,
+ new String(Character.toChars(0x10428)), 0, 2);
static final int TO_STRING_LIMIT = 16;
@@ -49,6 +48,19 @@ private static boolean checkLaterThan1(final CharSequence
cs, final CharSequence
return true;
}
+ /**
+ * Tests whether two code points are equal ignoring case, matching the
folding used by {@link String#regionMatches(boolean, int, String, int, int)}.
+ *
+ * @param cp1 the first code point.
+ * @param cp2 the second code point.
+ * @return whether the code points are equal ignoring case.
+ */
+ private static boolean equalsIgnoreCase(final int cp1, final int cp2) {
+ final int u1 = Character.toUpperCase(cp1);
+ final int u2 = Character.toUpperCase(cp2);
+ return u1 == u2 || Character.toLowerCase(u1) ==
Character.toLowerCase(u2);
+ }
+
/**
* Used by the indexOf(CharSequence methods) as a green implementation of
indexOf.
*
@@ -289,12 +301,12 @@ static int lastIndexOf(final CharSequence cs, final int
searchChar, int start) {
/**
* Tests if two string regions are equal.
*
- * @param cs the {@link CharSequence} to be processed.
+ * @param cs the {@link CharSequence} to be processed.
* @param ignoreCase whether or not to be case-insensitive.
- * @param thisStart the index to start on the {@code cs} CharSequence.
- * @param substring the {@link CharSequence} to be looked for.
- * @param start the index to start on the {@code substring} CharSequence.
- * @param length character length of the region.
+ * @param thisStart the index to start on the {@code cs} CharSequence.
+ * @param substring the {@link CharSequence} to be looked for.
+ * @param start the index to start on the {@code substring}
CharSequence.
+ * @param length character length of the region.
* @return whether the region matched.
* @see String#regionMatches(boolean, int, String, int, int)
*/
@@ -365,20 +377,6 @@ static boolean regionMatches(final CharSequence cs, final
boolean ignoreCase, fi
return true;
}
- /**
- * Tests whether two code points are equal ignoring case, matching the
folding used by
- * {@link String#regionMatches(boolean, int, String, int, int)}.
- *
- * @param cp1 the first code point.
- * @param cp2 the second code point.
- * @return whether the code points are equal ignoring case.
- */
- private static boolean equalsIgnoreCase(final int cp1, final int cp2) {
- final int u1 = Character.toUpperCase(cp1);
- final int u2 = Character.toUpperCase(cp2);
- return u1 == u2 || Character.toLowerCase(u1) ==
Character.toLowerCase(u2);
- }
-
/**
* Returns a new {@link CharSequence} that is a subsequence of this
* sequence starting with the {@code char} value at the specified index.
diff --git a/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
b/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
index 48bbf6b60..b45892533 100644
--- a/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/CharSequenceUtilsTest.java
@@ -165,6 +165,17 @@ public String toString() {
// @formatter:on
};
+ private static void assertRegionMatchesParity(final String source, final
boolean ignoreCase, final int toffset, final String other,
+ final int ooffset, final int len) {
+ // String is the reference: whatever the running JDK does for String,
every CharSequence type must match.
+ final boolean expected = source.regionMatches(ignoreCase, toffset,
other, ooffset, len);
+ final CharSequence[] sources = {source, new StringBuilder(source), new
StringBuffer(source), CharBuffer.wrap(source)};
+ for (final CharSequence cs : sources) {
+ assertEquals(expected, CharSequenceUtils.regionMatches(cs,
ignoreCase, toffset, other, ooffset, len),
+ cs.getClass().getSimpleName() + " differs from String for
" + source + " vs " + other);
+ }
+ }
+
static Stream<Arguments> lastIndexWithStandardCharSequence() {
// @formatter:off
return Stream.of(
@@ -277,17 +288,6 @@ boolean invoke() {
}
}
- private static void assertRegionMatchesParity(final String source, final
boolean ignoreCase, final int toffset, final String other,
- final int ooffset, final int len) {
- // String is the reference: whatever the running JDK does for String,
every CharSequence type must match.
- final boolean expected = source.regionMatches(ignoreCase, toffset,
other, ooffset, len);
- final CharSequence[] sources = {source, new StringBuilder(source), new
StringBuffer(source), CharBuffer.wrap(source)};
- for (final CharSequence cs : sources) {
- assertEquals(expected, CharSequenceUtils.regionMatches(cs,
ignoreCase, toffset, other, ooffset, len),
- cs.getClass().getSimpleName() + " differs from String for
" + source + " vs " + other);
- }
- }
-
/**
* A supplementary code point split across a surrogate pair must fold the
same way for every {@link CharSequence}
* type that it does for {@link String} on the running JDK. {@link
String#regionMatches(boolean, int, String, int, int)}