Author: mbenson
Date: Fri Jan 11 12:13:02 2008
New Revision: 611288
URL: http://svn.apache.org/viewvc?rev=611288&view=rev
Log:
[LANG-192] addSplitByCharacterType and camelCase variant
Modified:
commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringUtils.java
commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringUtilsTest.java
Modified:
commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringUtils.java
URL:
http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringUtils.java?rev=611288&r1=611287&r2=611288&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringUtils.java
(original)
+++ commons/proper/lang/trunk/src/java/org/apache/commons/lang/StringUtils.java
Fri Jan 11 12:13:02 2008
@@ -2607,6 +2607,85 @@
return (String[]) list.toArray(new String[list.size()]);
}
+ /**
+ * <p>Splits a String by Character type as returned by
+ * <code>java.lang.Character.getType(char)</code>. Groups of contiguous
+ * characters of the same type are returned as complete tokens.
+ * <pre>
+ * StringUtils.splitByCamelCase(null) = null
+ * StringUtils.splitByCamelCase("") = []
+ * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ",
"fg"]
+ * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ",
"fg"]
+ * StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":",
"ef"]
+ * StringUtils.splitByCamelCase("fooBar") = ["foo", "B", "ar"]
+ * StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "B", "ar"]
+ * StringUtils.splitByCamelCase("ASFRules") = ["ASFR", "ules"]
+ * </pre>
+ * @param str the String to split, may be <code>null</code>
+ * @return an array of parsed Strings, <code>null</code> if null String
input
+ * @since 2.4
+ */
+ public static String[] splitByCharacterType(String str) {
+ return splitByCharacterType(str, false);
+ }
+
+ /**
+ * <p>Splits a String by Character type as returned by
+ * <code>java.lang.Character.getType(char)</code>. Groups of contiguous
+ * characters of the same type are returned as complete tokens, with the
+ * following exception: if <code>camelCase</code> is <code>true</code>,
+ * the character of type <code>Character.UPPERCASE_LETTER</code>, if any,
+ * immediately preceding a token of type
<code>Character.LOWERCASE_LETTER</code>
+ * will belong to the following token rather than to the preceding, if any,
+ * <code>Character.UPPERCASE_LETTER</code> token.
+ * <pre>
+ * StringUtils.splitByCamelCase(null) = null
+ * StringUtils.splitByCamelCase("") = []
+ * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ",
"fg"]
+ * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ",
"fg"]
+ * StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":",
"ef"]
+ * StringUtils.splitByCamelCase("fooBar") = ["foo", "Bar"]
+ * StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "Bar"]
+ * StringUtils.splitByCamelCase("ASFRules") = ["ASF", "Rules"]
+ * </pre>
+ * @param str the String to split, may be <code>null</code>
+ * @param camelCase whether to use so-called "camel-case" for letter types
+ * @return an array of parsed Strings, <code>null</code> if null String
input
+ * @since 2.4
+ */
+ public static String[] splitByCharacterType(String str, boolean camelCase)
{
+ if (str == null) {
+ return null;
+ }
+ if (str.length() == 0) {
+ return ArrayUtils.EMPTY_STRING_ARRAY;
+ }
+ char[] c = str.toCharArray();
+ List list = new ArrayList();
+ int tokenStart = 0;
+ int currentType = Character.getType(c[tokenStart]);
+ for (int pos = tokenStart + 1; pos < c.length; pos++) {
+ int type = Character.getType(c[pos]);
+ if (type == currentType) {
+ continue;
+ }
+ if (camelCase && type == Character.LOWERCASE_LETTER
+ && currentType == Character.UPPERCASE_LETTER) {
+ int newTokenStart = pos - 1;
+ if (newTokenStart != tokenStart) {
+ list.add(new String(c, tokenStart, newTokenStart -
tokenStart));
+ tokenStart = newTokenStart;
+ }
+ } else {
+ list.add(new String(c, tokenStart, pos - tokenStart));
+ tokenStart = pos;
+ }
+ currentType = type;
+ }
+ list.add(new String(c, tokenStart, c.length - tokenStart));
+ return (String[]) list.toArray(new String[list.size()]);
+ }
+
// Joining
//-----------------------------------------------------------------------
/**
Modified:
commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringUtilsTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringUtilsTest.java?rev=611288&r1=611287&r2=611288&view=diff
==============================================================================
---
commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringUtilsTest.java
(original)
+++
commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringUtilsTest.java
Fri Jan 11 12:13:02 2008
@@ -847,7 +847,53 @@
assertEquals(msg, "a", res[0]);
assertEquals(msg, str.substring(2), res[1]);
}
-
+
+ public void testSplitByCharacterType() {
+ assertNull(StringUtils.splitByCharacterType(null));
+ assertEquals(0, StringUtils.splitByCharacterType("").length);
+ assertNull(StringUtils.splitByCharacterType(null, true));
+ assertEquals(0, StringUtils.splitByCharacterType("", true).length);
+
+ final boolean camelCase = true;
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg",
camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg",
camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
+ "ef" }, StringUtils.splitByCharacterType("ab:cd:ef")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
+ "ef" }, StringUtils.splitByCharacterType("ab:cd:ef",
camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "B", "ar" },
+ StringUtils.splitByCharacterType("fooBar")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "Bar" },
+ StringUtils.splitByCharacterType("fooBar", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "B", "ar"
},
+ StringUtils.splitByCharacterType("foo200Bar")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "Bar" },
+ StringUtils.splitByCharacterType("foo200Bar", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ASFR", "ules" },
+ StringUtils.splitByCharacterType("ASFRules")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ASF", "Rules" },
+ StringUtils.splitByCharacterType("ASFRules", camelCase)));
+
+ }
+
public void testDeprecatedDeleteSpace_String() {
assertEquals(null, StringUtils.deleteSpaces(null));
assertEquals("", StringUtils.deleteSpaces(""));