This is an automated email from the ASF dual-hosted git repository.
jamesbognar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/juneau.git
The following commit(s) were added to refs/heads/master by this push:
new 9309e6c775 StringUtils improvements
9309e6c775 is described below
commit 9309e6c7751f7c61be1b6e0ed9cf0b74ff121379
Author: James Bognar <[email protected]>
AuthorDate: Sun Nov 30 10:16:24 2025 -0500
StringUtils improvements
---
.../apache/juneau/common/utils/StringUtils.java | 371 +++++++++++++--------
.../juneau/common/utils/StringUtils_Test.java | 181 ++++++++++
2 files changed, 419 insertions(+), 133 deletions(-)
diff --git
a/juneau-core/juneau-common/src/main/java/org/apache/juneau/common/utils/StringUtils.java
b/juneau-core/juneau-common/src/main/java/org/apache/juneau/common/utils/StringUtils.java
index cf3b879027..f59b9fbda8 100644
---
a/juneau-core/juneau-common/src/main/java/org/apache/juneau/common/utils/StringUtils.java
+++
b/juneau-core/juneau-common/src/main/java/org/apache/juneau/common/utils/StringUtils.java
@@ -932,28 +932,36 @@ public class StringUtils {
* Parses a duration string.
*
* <p>
- * Examples:
+ * Supports simple duration formats:
+ *
+ * <h5 class='section'>Format Examples:</h5>
* <ul>
- * <li><js>"1000"</js> - 1000 milliseconds.
- * <li><js>"10s"</js> - 10 seconds.
- * <li><js>"10 sec"</js> - 10 seconds.
- * <li><js>"10 seconds"</js> - 10 seconds.
+ * <li><js>"1000"</js> - 1000 milliseconds (no suffix)
+ * <li><js>"10s"</js> - 10 seconds
+ * <li><js>"10 sec"</js> - 10 seconds
+ * <li><js>"10 seconds"</js> - 10 seconds
+ * <li><js>"1.5h"</js> - 1.5 hours (5400000 ms)
+ * <li><js>"1h30m"</js> - 1 hour 30 minutes (5400000 ms)
+ * <li><js>"1h 30m"</js> - 1 hour 30 minutes (with spaces)
* </ul>
*
- * <p>
- * Use any of the following suffixes:
+ * <h5 class='section'>Supported Units:</h5>
* <ul>
- * <li>None (time in milliseconds).
- *
<li><js>"s"</js>/<js>"sec"</js>/<js>"second"</js>/<js>"seconds"</js>
- *
<li><js>"m"</js>/<js>"min"</js>/<js>"minutes"</js>/<js>"seconds"</js>
- * <li><js>"h"</js>/<js>"hour"</js>/<js>"hours"</js>
- * <li><js>"d"</js>/<js>"day"</js>/<js>"days"</js>
- * <li><js>"w"</js>/<js>"week"</js>/<js>"weeks"</js>
+ * <li><b>Milliseconds:</b> <js>"ms"</js>, <js>"millis"</js>,
<js>"milliseconds"</js> (or no suffix)
+ * <li><b>Seconds:</b> <js>"s"</js>, <js>"sec"</js>,
<js>"second"</js>, <js>"seconds"</js>
+ * <li><b>Minutes:</b> <js>"m"</js>, <js>"min"</js>,
<js>"minute"</js>, <js>"minutes"</js>
+ * <li><b>Hours:</b> <js>"h"</js>, <js>"hour"</js>,
<js>"hours"</js>
+ * <li><b>Days:</b> <js>"d"</js>, <js>"day"</js>, <js>"days"</js>
+ * <li><b>Weeks:</b> <js>"w"</js>, <js>"week"</js>,
<js>"weeks"</js>
+ * <li><b>Months:</b> <js>"mo"</js>, <js>"month"</js>,
<js>"months"</js> (30 days)
+ * <li><b>Years:</b> <js>"y"</js>, <js>"yr"</js>, <js>"year"</js>,
<js>"years"</js> (365 days)
* </ul>
*
* <p>
* Suffixes are case-insensitive.
* <br>Whitespace is ignored.
+ * <br>Decimal values are supported (e.g., <js>"1.5h"</js>).
+ * <br>Combined formats are supported (e.g., <js>"1h30m"</js>).
*
* @param s The string to parse.
* @return
@@ -963,30 +971,117 @@ public class StringUtils {
s = trim(s);
if (isEmpty(s))
return -1;
- int i;
- for (i = 0; i < s.length(); i++) {
+
+ // Parse simple format (number + unit or combined format)
+ var totalMs = 0L;
+ var i = 0;
+ var len = s.length();
+
+ while (i < len) {
+ // Skip whitespace
+ while (i < len && Character.isWhitespace(s.charAt(i)))
+ i++;
+ if (i >= len)
+ break;
+
+ // Parse number (including decimal)
+ var numStart = i;
+ var hasDecimal = false;
+ while (i < len) {
var c = s.charAt(i);
- if (c < '0' || c > '9')
+ if (c >= '0' && c <= '9') {
+ i++;
+ } else if (c == '.' && !hasDecimal) {
+ hasDecimal = true;
+ i++;
+ } else {
break;
}
- long l;
- if (i == s.length())
- l = Long.parseLong(s);
- else {
- l = Long.parseLong(s.substring(0, i).trim());
- var r = s.substring(i).trim().toLowerCase();
- if (r.startsWith("s"))
- l *= 1000;
- else if (r.startsWith("m"))
- l *= 1000 * 60;
- else if (r.startsWith("h"))
- l *= 1000 * 60 * 60;
- else if (r.startsWith("d"))
- l *= 1000 * 60 * 60 * 24;
- else if (r.startsWith("w"))
- l *= 1000 * 60 * 60 * 24 * 7;
+ }
+
+ if (i == numStart) {
+ // No number found, invalid format
+ return -1;
+ }
+
+ var numStr = s.substring(numStart, i).trim();
+ var value = Double.parseDouble(numStr);
+
+ // Skip whitespace
+ while (i < len && Character.isWhitespace(s.charAt(i)))
+ i++;
+
+ // Parse unit (read all letters until we hit a digit or
whitespace)
+ var unitStart = i;
+ while (i < len && Character.isLetter(s.charAt(i)))
+ i++;
+ var unit = s.substring(unitStart, i).trim().toLowerCase();
+
+ // Convert to milliseconds
+ var ms = parseUnit(unit, value);
+ if (ms < 0)
+ return -1;
+ totalMs += ms;
+ }
+
+ return totalMs;
+ }
+
+ /**
+ * Parses a unit string and converts the value to milliseconds.
+ *
+ * @param unit The unit string (case-insensitive, already lowercased).
+ * @param value The numeric value.
+ * @return The value in milliseconds, or <c>-1</c> if the unit is
invalid.
+ */
+ private static long parseUnit(String unit, double value) {
+ if (isEmpty(unit)) {
+ // No unit means milliseconds
+ return (long)value;
}
- return l;
+
+ // Check milliseconds first (before minutes) - must check exact
"ms" before checking "m"
+ if (unit.equals("ms") || unit.equals("millis") ||
unit.equals("milliseconds"))
+ return (long)value;
+
+ // Seconds
+ if (unit.startsWith("s") && !unit.startsWith("sec"))
+ return (long)(value * 1000);
+ if (unit.startsWith("sec") || unit.startsWith("second"))
+ return (long)(value * 1000);
+
+ // Minutes (must check after milliseconds and months)
+ if (unit.startsWith("m") && !unit.startsWith("mo") &&
!unit.startsWith("mill") && !unit.startsWith("ms"))
+ return (long)(value * 1000 * 60);
+ if (unit.startsWith("min") || unit.startsWith("minute"))
+ return (long)(value * 1000 * 60);
+
+ // Hours
+ if (unit.startsWith("h") || unit.startsWith("hour"))
+ return (long)(value * 1000 * 60 * 60);
+
+ // Days
+ if (unit.startsWith("d") && !unit.startsWith("da"))
+ return (long)(value * 1000 * 60 * 60 * 24);
+ if (unit.startsWith("day"))
+ return (long)(value * 1000 * 60 * 60 * 24);
+
+ // Weeks
+ if (unit.startsWith("w") || unit.startsWith("week"))
+ return (long)(value * 1000 * 60 * 60 * 24 * 7);
+
+ // Months (30 days)
+ if (unit.startsWith("mo") || unit.startsWith("month"))
+ return (long)(value * 1000 * 60 * 60 * 24 * 30);
+
+ // Years (365 days)
+ if (unit.startsWith("y") && !unit.startsWith("yr"))
+ return (long)(value * 1000 * 60 * 60 * 24 * 365);
+ if (unit.startsWith("yr") || unit.startsWith("year"))
+ return (long)(value * 1000 * 60 * 60 * 24 * 365);
+
+ // Unknown unit
+ return -1;
}
/**
@@ -1949,12 +2044,12 @@ public class StringUtils {
public static boolean isValidMacAddress(String mac) {
if (isEmpty(mac))
return false;
-
+
// Remove separators and check if it's 12 hex digits
var cleaned = mac.replaceAll("[:-]", "").toUpperCase();
if (cleaned.length() != 12)
return false;
-
+
// Check if all characters are valid hex digits
return cleaned.matches("^[0-9A-F]{12}$");
}
@@ -1986,41 +2081,41 @@ public class StringUtils {
public static boolean isValidHostname(String hostname) {
if (isEmpty(hostname))
return false;
-
+
// Cannot start or end with a dot
if (hostname.startsWith(".") || hostname.endsWith("."))
return false;
-
+
// Total length cannot exceed 253 characters
if (hostname.length() > 253)
return false;
-
+
// Split by dots (use -1 to preserve trailing empty strings)
var labels = hostname.split("\\.", -1);
-
+
// Must have at least one label
if (labels.length == 0)
return false;
-
+
// Check each label
for (var label : labels) {
// Label cannot be empty
if (label.isEmpty())
return false;
-
+
// Label cannot exceed 63 characters
if (label.length() > 63)
return false;
-
+
// Label cannot start or end with hyphen
if (label.startsWith("-") || label.endsWith("-"))
return false;
-
+
// Label can only contain letters, digits, and hyphens
if (! label.matches("^[a-zA-Z0-9-]+$"))
return false;
}
-
+
return true;
}
@@ -2044,10 +2139,10 @@ public class StringUtils {
public static int wordCount(String str) {
if (isEmpty(str))
return 0;
-
+
var count = 0;
var inWord = false;
-
+
for (var i = 0; i < str.length(); i++) {
var c = str.charAt(i);
if (Character.isLetterOrDigit(c) || c == '_') {
@@ -2059,7 +2154,7 @@ public class StringUtils {
inWord = false;
}
}
-
+
return count;
}
@@ -2082,7 +2177,7 @@ public class StringUtils {
public static int lineCount(String str) {
if (isEmpty(str))
return 0;
-
+
var count = 1; // At least one line
for (var i = 0; i < str.length(); i++) {
var c = str.charAt(i);
@@ -2096,7 +2191,7 @@ public class StringUtils {
count++;
}
}
-
+
return count;
}
@@ -2119,11 +2214,11 @@ public class StringUtils {
public static char mostFrequentChar(String str) {
if (isEmpty(str))
return '\0';
-
+
var charCounts = new int[Character.MAX_VALUE + 1];
var maxCount = 0;
var maxChar = '\0';
-
+
// Count occurrences of each character
for (var i = 0; i < str.length(); i++) {
var c = str.charAt(i);
@@ -2133,7 +2228,7 @@ public class StringUtils {
maxChar = c;
}
}
-
+
return maxChar;
}
@@ -2159,17 +2254,17 @@ public class StringUtils {
public static double entropy(String str) {
if (isEmpty(str))
return 0.0;
-
+
var length = str.length();
if (length == 0)
return 0.0;
-
+
// Count character frequencies
var charCounts = new int[Character.MAX_VALUE + 1];
for (var i = 0; i < length; i++) {
charCounts[str.charAt(i)]++;
}
-
+
// Calculate entropy
var entropy = 0.0;
for (var count : charCounts) {
@@ -2178,7 +2273,7 @@ public class StringUtils {
entropy -= probability * (Math.log(probability)
/ Math.log(2.0));
}
}
-
+
return entropy;
}
@@ -2205,11 +2300,11 @@ public class StringUtils {
public static double readabilityScore(String str) {
if (isEmpty(str))
return 0.0;
-
+
var words = extractWords(str);
if (words.isEmpty())
return 0.0;
-
+
// Count sentences (ending with . ! ?)
var sentenceCount = 0;
for (var i = 0; i < str.length(); i++) {
@@ -2220,22 +2315,22 @@ public class StringUtils {
}
if (sentenceCount == 0)
sentenceCount = 1; // At least one sentence
-
+
// Calculate average words per sentence
var avgWordsPerSentence = (double)words.size() / sentenceCount;
-
+
// Estimate average syllables per word (simplified: count vowel
groups)
var totalSyllables = 0;
for (var word : words) {
totalSyllables += estimateSyllables(word);
}
var avgSyllablesPerWord = (double)totalSyllables / words.size();
-
+
// Simplified Flesch Reading Ease formula
// Score = 206.835 - (1.015 * ASL) - (84.6 * ASW)
// Where ASL = average sentence length (words), ASW = average
syllables per word
var score = 206.835 - (1.015 * avgWordsPerSentence) - (84.6 *
avgSyllablesPerWord);
-
+
// Clamp to 0-100 range
return Math.max(0.0, Math.min(100.0, score));
}
@@ -2246,26 +2341,26 @@ public class StringUtils {
private static int estimateSyllables(String word) {
if (word == null || word.isEmpty())
return 1;
-
+
var lower = word.toLowerCase();
var count = 0;
var prevWasVowel = false;
-
+
for (var i = 0; i < lower.length(); i++) {
var c = lower.charAt(i);
var isVowel = (c == 'a' || c == 'e' || c == 'i' || c ==
'o' || c == 'u' || c == 'y');
-
+
if (isVowel && ! prevWasVowel) {
count++;
}
prevWasVowel = isVowel;
}
-
+
// Handle silent 'e' at the end
if (lower.endsWith("e") && count > 1) {
count--;
}
-
+
// At least one syllable
return Math.max(1, count);
}
@@ -2480,11 +2575,11 @@ public class StringUtils {
return null;
if (variables == null || variables.isEmpty())
return template;
-
+
var result = new StringBuilder();
var i = 0;
var length = template.length();
-
+
while (i < length) {
var dollarIndex = template.indexOf("${", i);
if (dollarIndex == -1) {
@@ -2492,10 +2587,10 @@ public class StringUtils {
result.append(template.substring(i));
break;
}
-
+
// Append text before the variable
result.append(template.substring(i, dollarIndex));
-
+
// Find the closing brace
var braceIndex = template.indexOf('}', dollarIndex + 2);
if (braceIndex == -1) {
@@ -2503,11 +2598,11 @@ public class StringUtils {
result.append(template.substring(dollarIndex));
break;
}
-
+
// Extract variable name
var varName = template.substring(dollarIndex + 2,
braceIndex);
var value = variables.get(varName);
-
+
if (variables.containsKey(varName)) {
// Variable exists in map (even if null)
result.append(value != null ? value.toString()
: "null");
@@ -2515,10 +2610,10 @@ public class StringUtils {
// Variable not found, keep the original
placeholder
result.append("${").append(varName).append("}");
}
-
+
i = braceIndex + 1;
}
-
+
return result.toString();
}
@@ -2546,15 +2641,15 @@ public class StringUtils {
return word;
if (count == 1)
return word;
-
+
var lower = word.toLowerCase();
var length = word.length();
-
+
// Words ending in s, x, z, ch, sh -> add "es"
if (lower.endsWith("s") || lower.endsWith("x") ||
lower.endsWith("z") || lower.endsWith("ch") || lower.endsWith("sh")) {
return word + "es";
}
-
+
// Words ending in "y" preceded by a consonant -> replace "y"
with "ies"
if (length > 1 && lower.endsWith("y")) {
var secondLast = lower.charAt(length - 2);
@@ -2562,7 +2657,7 @@ public class StringUtils {
return word.substring(0, length - 1) + "ies";
}
}
-
+
// Words ending in "f" or "fe" -> replace with "ves" (basic
rule)
if (lower.endsWith("f")) {
return word.substring(0, length - 1) + "ves";
@@ -2570,7 +2665,7 @@ public class StringUtils {
if (lower.endsWith("fe")) {
return word.substring(0, length - 2) + "ves";
}
-
+
// Default: add "s"
return word + "s";
}
@@ -2594,7 +2689,7 @@ public class StringUtils {
public static String ordinal(int number) {
var abs = Math.abs(number);
var suffix = "th";
-
+
// Special cases for 11, 12, 13 (all use "th")
if (abs % 100 != 11 && abs % 100 != 12 && abs % 100 != 13) {
var lastDigit = abs % 10;
@@ -2605,7 +2700,7 @@ public class StringUtils {
else if (lastDigit == 3)
suffix = "rd";
}
-
+
return number + suffix;
}
@@ -2887,16 +2982,16 @@ public class StringUtils {
return -1;
if (str2 == null)
return 1;
-
+
var len1 = str1.length();
var len2 = str2.length();
var i1 = 0;
var i2 = 0;
-
+
while (i1 < len1 && i2 < len2) {
var c1 = str1.charAt(i1);
var c2 = str2.charAt(i2);
-
+
// If both are digits, compare numerically
if (Character.isDigit(c1) && Character.isDigit(c2)) {
// Skip leading zeros
@@ -2904,7 +2999,7 @@ public class StringUtils {
i1++;
while (i2 < len2 && str2.charAt(i2) == '0')
i2++;
-
+
// Find end of number sequences
var end1 = i1;
var end2 = i2;
@@ -2912,13 +3007,13 @@ public class StringUtils {
end1++;
while (end2 < len2 &&
Character.isDigit(str2.charAt(end2)))
end2++;
-
+
// Compare lengths first (longer number is
larger)
var lenNum1 = end1 - i1;
var lenNum2 = end2 - i2;
if (lenNum1 != lenNum2)
return lenNum1 - lenNum2;
-
+
// Same length, compare digit by digit
for (var j = 0; j < lenNum1; j++) {
var d1 = str1.charAt(i1 + j);
@@ -2926,7 +3021,7 @@ public class StringUtils {
if (d1 != d2)
return d1 - d2;
}
-
+
i1 = end1;
i2 = end2;
} else {
@@ -2938,7 +3033,7 @@ public class StringUtils {
i2++;
}
}
-
+
return len1 - len2;
}
@@ -2964,18 +3059,18 @@ public class StringUtils {
str1 = "";
if (str2 == null)
str2 = "";
-
+
var len1 = str1.length();
var len2 = str2.length();
-
+
// Use dynamic programming with optimized space (only need
previous row)
var prev = new int[len2 + 1];
var curr = new int[len2 + 1];
-
+
// Initialize first row
for (var j = 0; j <= len2; j++)
prev[j] = j;
-
+
for (var i = 1; i <= len1; i++) {
curr[0] = i;
for (var j = 1; j <= len2; j++) {
@@ -2990,7 +3085,7 @@ public class StringUtils {
prev = curr;
curr = temp;
}
-
+
return prev[len2];
}
@@ -3016,14 +3111,14 @@ public class StringUtils {
str1 = "";
if (str2 == null)
str2 = "";
-
+
if (str1.equals(str2))
return 1.0;
-
+
var maxLen = Math.max(str1.length(), str2.length());
if (maxLen == 0)
return 1.0;
-
+
var distance = levenshteinDistance(str1, str2);
return 1.0 - ((double)distance / maxLen);
}
@@ -3655,19 +3750,25 @@ public class StringUtils {
/**
* Parses a number from the specified string.
*
+ * <p>
+ * Supports Java 7+ numeric literals with underscores (e.g.,
<js>"1_000_000"</js>).
+ * The underscores are automatically removed before parsing.
+ *
* @param s The string to parse the number from.
* @param type
* The number type to created.
* Can be any of the following:
* <ul>
- * <li> Integer
- * <li> Double
- * <li> Float
- * <li> Long
- * <li> Short
- * <li> Byte
+ * <li> Integer (or <c>int</c> primitive)
+ * <li> Long (or <c>long</c> primitive)
+ * <li> Short (or <c>short</c> primitive)
+ * <li> Byte (or <c>byte</c> primitive)
+ * <li> Float (or <c>float</c> primitive)
+ * <li> Double (or <c>double</c> primitive)
* <li> BigInteger
* <li> BigDecimal
+ * <li> AtomicInteger
+ * <li> AtomicLong
* </ul>
* If <jk>null</jk> or <c>Number</c>, uses the best guess.
* @return The parsed number, or <jk>null</jk> if the string was null.
@@ -3680,6 +3781,10 @@ public class StringUtils {
if (type == null)
type = Number.class;
+ // Remove underscores (Java 7+ numeric literal support) before
parsing
+ // Note: We do this before type detection to ensure clean
parsing
+ s = s.replace("_", "");
+
// Determine the data type if it wasn't specified.
var isAutoDetect = (type == Number.class);
var isDecimal = false;
@@ -4144,14 +4249,14 @@ public class StringUtils {
// Split into words first, then combine words that fit
var words = line.split(" +"); // Split on one or more
spaces
var currentLine = new StringBuilder();
-
+
for (var word : words) {
if (word.isEmpty())
continue;
-
+
var wordLength = word.length();
var currentLength = currentLine.length();
-
+
if (currentLength == 0) {
// First word on line
// Only break single words if there are
multiple words in the input
@@ -4209,7 +4314,7 @@ public class StringUtils {
}
}
}
-
+
// Append any remaining line
if (currentLine.length() > 0) {
if (result.length() > 0)
@@ -5077,7 +5182,7 @@ public class StringUtils {
var result = new LinkedHashMap<String,String>();
if (isEmpty(str))
return result;
-
+
var entries = split(str, entryDelimiter);
for (var entry : entries) {
if (isEmpty(entry))
@@ -5120,7 +5225,7 @@ public class StringUtils {
public static List<String> extractNumbers(String str) {
if (isEmpty(str))
return Collections.emptyList();
-
+
var result = new ArrayList<String>();
var pattern = Pattern.compile("\\d+(?:\\.\\d+)?");
var matcher = pattern.matcher(str);
@@ -5148,7 +5253,7 @@ public class StringUtils {
public static List<String> extractEmails(String str) {
if (isEmpty(str))
return Collections.emptyList();
-
+
var result = new ArrayList<String>();
// Email regex pattern (same as isEmail but without ^ and $
anchors)
var pattern =
Pattern.compile("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}");
@@ -5177,7 +5282,7 @@ public class StringUtils {
public static List<String> extractUrls(String str) {
if (isEmpty(str))
return Collections.emptyList();
-
+
var result = new ArrayList<String>();
// Basic URL pattern: protocol://domain/path
var pattern =
Pattern.compile("(?:https?|ftp)://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+",
Pattern.CASE_INSENSITIVE);
@@ -5206,7 +5311,7 @@ public class StringUtils {
public static List<String> extractWords(String str) {
if (isEmpty(str))
return Collections.emptyList();
-
+
var result = new ArrayList<String>();
// Word pattern: sequence of word characters (letters, digits,
underscore)
var pattern = Pattern.compile("\\w+");
@@ -5239,7 +5344,7 @@ public class StringUtils {
public static List<String> extractBetween(String str, String start,
String end) {
if (isEmpty(str) || isEmpty(start) || isEmpty(end))
return Collections.emptyList();
-
+
var result = new ArrayList<String>();
var startIndex = 0;
while (true) {
@@ -5284,7 +5389,7 @@ public class StringUtils {
return str;
if (fromChars.length() != toChars.length())
throw new IllegalArgumentException("fromChars and
toChars must have the same length");
-
+
var sb = new StringBuilder(str.length());
for (var i = 0; i < str.length(); i++) {
var c = str.charAt(i);
@@ -5317,16 +5422,16 @@ public class StringUtils {
public static String soundex(String str) {
if (isEmpty(str))
return null;
-
+
var upper = str.toUpperCase();
var result = new StringBuilder(4);
result.append(upper.charAt(0));
-
+
// Soundex mapping: 0 = AEIOUHWY, 1 = BFPV, 2 = CGJKQSXZ, 3 =
DT, 4 = L, 5 = MN, 6 = R
// H/W/Y don't get codes but don't break sequences either
// Initialize lastCode to a value that won't match any real code
var lastCode = '\0';
-
+
for (var i = 1; i < upper.length() && result.length() < 4; i++)
{
var c = upper.charAt(i);
var code = getSoundexCode(c);
@@ -5341,12 +5446,12 @@ public class StringUtils {
}
// If code == lastCode, skip it (consecutive same codes)
}
-
+
// Pad with zeros if needed
while (result.length() < 4) {
result.append('0');
}
-
+
return result.toString();
}
@@ -5391,15 +5496,15 @@ public class StringUtils {
public static String metaphone(String str) {
if (isEmpty(str))
return null;
-
+
var upper = str.toUpperCase().replaceAll("[^A-Z]", "");
if (upper.isEmpty())
return "";
-
+
var result = new StringBuilder();
var i = 0;
var len = upper.length();
-
+
// Handle initial characters
if (upper.startsWith("KN") || upper.startsWith("GN") ||
upper.startsWith("PN") || upper.startsWith("AE") || upper.startsWith("WR")) {
i = 1;
@@ -5410,20 +5515,20 @@ public class StringUtils {
result.append('W');
i = 2;
}
-
+
// Process remaining characters
while (i < len && result.length() < 4) {
var c = upper.charAt(i);
var prev = i > 0 ? upper.charAt(i - 1) : '\0';
var next = i < len - 1 ? upper.charAt(i + 1) : '\0';
var next2 = i < len - 2 ? upper.charAt(i + 2) : '\0';
-
+
// Skip duplicates (except C)
if (c == prev && c != 'C') {
i++;
continue;
}
-
+
switch (c) {
case 'B':
if (prev != 'M' || next != '\0')
@@ -5531,7 +5636,7 @@ public class StringUtils {
}
i++;
}
-
+
return result.length() > 0 ? result.toString() :
upper.substring(0, Math.min(1, upper.length()));
}
@@ -5561,16 +5666,16 @@ public class StringUtils {
public static String[] doubleMetaphone(String str) {
if (isEmpty(str))
return null;
-
+
// For simplicity, return the same code for both primary and
alternate
// A full Double Metaphone implementation would be much more
complex
var primary = metaphone(str);
if (primary == null)
return null;
-
+
// Generate alternate code (simplified - full implementation
would have different rules)
var alternate = primary;
-
+
return new String[] { primary, alternate };
}
@@ -5614,10 +5719,10 @@ public class StringUtils {
public static String removeAccents(String str) {
if (str == null)
return null;
-
+
// Normalize to NFD (decomposed form)
var normalized = Normalizer.normalize(str, Normalizer.Form.NFD);
-
+
// Remove combining diacritical marks (Unicode category Mn)
var sb = new StringBuilder(normalized.length());
for (var i = 0; i < normalized.length(); i++) {
@@ -5628,7 +5733,7 @@ public class StringUtils {
sb.append(c);
}
}
-
+
return sb.toString();
}
diff --git
a/juneau-utest/src/test/java/org/apache/juneau/common/utils/StringUtils_Test.java
b/juneau-utest/src/test/java/org/apache/juneau/common/utils/StringUtils_Test.java
index 90cab38646..0422f64e62 100755
---
a/juneau-utest/src/test/java/org/apache/juneau/common/utils/StringUtils_Test.java
+++
b/juneau-utest/src/test/java/org/apache/juneau/common/utils/StringUtils_Test.java
@@ -27,6 +27,8 @@ import static org.apache.juneau.common.utils.Utils.eqic;
import static org.apache.juneau.junit.bct.BctAssertions.*;
import static org.junit.jupiter.api.Assertions.*;
+import java.math.*;
+import java.util.concurrent.atomic.*;
import java.util.*;
import org.apache.juneau.*;
@@ -255,6 +257,111 @@ class StringUtils_Test extends TestBase {
assertThrows(NumberFormatException.class, () ->
parseNumber("2147483640x", Long.class));
}
+
//====================================================================================================
+ // parseNumber - AtomicInteger and AtomicLong
+
//====================================================================================================
+ @Test
+ void a02a_parseNumber_atomicTypes() {
+ // AtomicInteger
+ var ai1 = parseNumber("123", AtomicInteger.class);
+ assertTrue(ai1 instanceof AtomicInteger);
+ assertEquals(123, ((AtomicInteger)ai1).get());
+
+ var ai2 = parseNumber("-456", AtomicInteger.class);
+ assertTrue(ai2 instanceof AtomicInteger);
+ assertEquals(-456, ((AtomicInteger)ai2).get());
+
+ var ai3 = parseNumber("0x10", AtomicInteger.class);
+ assertTrue(ai3 instanceof AtomicInteger);
+ assertEquals(16, ((AtomicInteger)ai3).get());
+
+ // AtomicLong
+ var al1 = parseNumber("123", AtomicLong.class);
+ assertTrue(al1 instanceof AtomicLong);
+ assertEquals(123L, ((AtomicLong)al1).get());
+
+ var al2 = parseNumber("-456", AtomicLong.class);
+ assertTrue(al2 instanceof AtomicLong);
+ assertEquals(-456L, ((AtomicLong)al2).get());
+
+ var al3 = parseNumber("9223372036854775807", AtomicLong.class);
+ assertTrue(al3 instanceof AtomicLong);
+ assertEquals(9223372036854775807L, ((AtomicLong)al3).get());
+
+ var al4 = parseNumber("0x10", AtomicLong.class);
+ assertTrue(al4 instanceof AtomicLong);
+ assertEquals(16L, ((AtomicLong)al4).get());
+ }
+
+
//====================================================================================================
+ // parseNumber - Numbers with underscores (Java 7+ numeric literals)
+
//====================================================================================================
+ @Test
+ void a02b_parseNumber_withUnderscores() {
+ // Integer with underscores
+ assertEquals(1000000, parseNumber("1_000_000", Integer.class));
+ assertEquals(1234567, parseNumber("1_234_567", Integer.class));
+ assertEquals(-1000000, parseNumber("-1_000_000",
Integer.class));
+
+ // Long with underscores
+ assertEquals(1000000L, parseNumber("1_000_000", Long.class));
+ assertEquals(9223372036854775807L,
parseNumber("9_223_372_036_854_775_807", Long.class));
+ assertEquals(-9223372036854775808L,
parseNumber("-9_223_372_036_854_775_808", Long.class));
+
+ // Short with underscores
+ assertEquals((short)32767, parseNumber("32_767", Short.class));
+ assertEquals((short)-32768, parseNumber("-32_768",
Short.class));
+
+ // Byte with underscores
+ assertEquals((byte)127, parseNumber("1_27", Byte.class));
+ assertEquals((byte)-128, parseNumber("-1_28", Byte.class));
+
+ // Float with underscores
+ assertEquals(1000.5f, parseNumber("1_000.5", Float.class));
+ assertEquals(1234567.89f, parseNumber("1_234_567.89",
Float.class));
+
+ // Double with underscores
+ assertEquals(1000.5, parseNumber("1_000.5", Double.class));
+ assertEquals(1234567.89, parseNumber("1_234_567.89",
Double.class));
+
+ // BigInteger with underscores
+ var bi1 = parseNumber("1_000_000_000_000_000_000_000",
BigInteger.class);
+ assertTrue(bi1 instanceof BigInteger);
+ assertEquals(new BigInteger("1000000000000000000000"), bi1);
+
+ var bi2 = parseNumber("-9_223_372_036_854_775_809",
BigInteger.class);
+ assertTrue(bi2 instanceof BigInteger);
+ assertEquals(new BigInteger("-9223372036854775809"), bi2);
+
+ // BigDecimal with underscores
+ var bd1 = parseNumber("1_234_567.89", BigDecimal.class);
+ assertTrue(bd1 instanceof BigDecimal);
+ assertEquals(new BigDecimal("1234567.89"), bd1);
+
+ var bd2 = parseNumber("-1_000_000.123_456", BigDecimal.class);
+ assertTrue(bd2 instanceof BigDecimal);
+ assertEquals(new BigDecimal("-1000000.123456"), bd2);
+
+ // AtomicInteger with underscores
+ var ai1 = parseNumber("1_000_000", AtomicInteger.class);
+ assertTrue(ai1 instanceof AtomicInteger);
+ assertEquals(1000000, ((AtomicInteger)ai1).get());
+
+ // AtomicLong with underscores
+ var al1 = parseNumber("9_223_372_036_854_775_807",
AtomicLong.class);
+ assertTrue(al1 instanceof AtomicLong);
+ assertEquals(9223372036854775807L, ((AtomicLong)al1).get());
+
+ // Hexadecimal with underscores (should work with decode
methods)
+ assertEquals(0x12345678, parseNumber("0x12_34_56_78",
Integer.class));
+ assertEquals(0x1234567890ABCDEFL,
parseNumber("0x12_34_56_78_90_AB_CD_EF", Long.class));
+
+ // Auto-detect with underscores
+ assertEquals(1000000, parseNumber("1_000_000", null));
+ assertEquals(1000000000, parseNumber("1_000_000_000", null));
// 1 billion (fits in Integer, so returns Integer)
+ assertEquals(1000.5f, parseNumber("1_000.5", null)); // Fits
in Float, so returns Float
+ }
+
//====================================================================================================
// testReplaceVars
//====================================================================================================
@@ -917,6 +1024,80 @@ class StringUtils_Test extends TestBase {
assertEquals(10 * w, getDuration(" 10 W "));
}
+
//====================================================================================================
+ // getDuration(String) - Enhanced formats
+
//====================================================================================================
+ @Test
+ void a30a_getDuration_enhanced() {
+ long s = 1000, m = s * 60, h = m * 60, d = h * 24, w = d * 7,
mo = d * 30, y = d * 365;
+
+ // Milliseconds
+ assertEquals(100, getDuration("100ms"));
+ assertEquals(100, getDuration("100 millis"));
+ assertEquals(100, getDuration("100 milliseconds"));
+
+ // Decimal values
+ assertEquals((long)(1.5 * h), getDuration("1.5h"));
+ assertEquals((long)(0.5 * m), getDuration("0.5m"));
+ assertEquals((long)(2.5 * s), getDuration("2.5s"));
+ assertEquals((long)(1.25 * d), getDuration("1.25d"));
+
+ // Combined formats
+ assertEquals(1 * h + 30 * m, getDuration("1h30m"));
+ assertEquals(1 * h + 30 * m, getDuration("1h 30m"));
+ assertEquals(2 * d + 3 * h + 15 * m, getDuration("2d3h15m"));
+ assertEquals(1 * w + 2 * d + 3 * h, getDuration("1w2d3h"));
+
+ // Months
+ assertEquals(1 * mo, getDuration("1mo"));
+ assertEquals(1 * mo, getDuration("1 month"));
+ assertEquals(2 * mo, getDuration("2 months"));
+ assertEquals(6 * mo, getDuration("6mo"));
+
+ // Years
+ assertEquals(1 * y, getDuration("1y"));
+ assertEquals(1 * y, getDuration("1yr"));
+ assertEquals(1 * y, getDuration("1 year"));
+ assertEquals(2 * y, getDuration("2 years"));
+ assertEquals(10 * y, getDuration("10y"));
+
+ // Combined with months and years
+ assertEquals(1 * y + 6 * mo, getDuration("1y6mo"));
+ assertEquals(2 * y + 3 * mo + 5 * d, getDuration("2y3mo5d"));
+ }
+
+
//====================================================================================================
+ // getDuration(String) - ISO 8601 format (disabled - feature reverted)
+
//====================================================================================================
+ // @Test
+ // void a30b_getDuration_iso8601() {
+ // long s = 1000, m = s * 60, h = m * 60, d = h * 24, w = d * 7,
mo = d * 30, y = d * 365;
+ //
+ // // ISO 8601 time components
+ // assertEquals(1 * h, getDuration("PT1H"));
+ // assertEquals(30 * m, getDuration("PT30M"));
+ // assertEquals(45 * s, getDuration("PT45S"));
+ // assertEquals(1 * h + 30 * m, getDuration("PT1H30M"));
+ // assertEquals(1 * h + 30 * m + 45 * s,
getDuration("PT1H30M45S"));
+ // assertEquals(2 * h + 15 * m + 30 * s,
getDuration("PT2H15M30S"));
+ //
+ // // ISO 8601 date components
+ // assertEquals(1 * d, getDuration("P1D"));
+ // assertEquals(7 * d, getDuration("P7D"));
+ // assertEquals(1 * w, getDuration("P1W"));
+ // assertEquals(1 * mo, getDuration("P1M"));
+ // assertEquals(1 * y, getDuration("P1Y"));
+ //
+ // // ISO 8601 combined date and time
+ // assertEquals(1 * d + 2 * h + 30 * m, getDuration("P1DT2H30M"));
+ // assertEquals(1 * y + 2 * mo + 3 * d + 4 * h + 5 * m + 6 * s,
getDuration("P1Y2M3DT4H5M6S"));
+ // assertEquals(2 * w + 1 * h, getDuration("P2WT1H"));
+ //
+ // // Case insensitive
+ // assertEquals(1 * h, getDuration("pt1h"));
+ // assertEquals(1 * d, getDuration("p1d"));
+ // }
+
//====================================================================================================
// getDuration(String)
//====================================================================================================