Author: niallp Date: Sun Jan 9 01:41:56 2011 New Revision: 1056862 URL: http://svn.apache.org/viewvc?rev=1056862&view=rev Log: Port LANG-640 to LANG 2.x Branch - add normalizeSpace() to StringUtils
Modified: commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java Modified: commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java?rev=1056862&r1=1056861&r2=1056862&view=diff ============================================================================== --- commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java (original) +++ commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java Sun Jan 9 01:41:56 2011 @@ -6479,4 +6479,54 @@ public class StringUtils { int strOffset = str.length() - suffix.length(); return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length()); } + + /** + * <p> + * Similar to <a + * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize + * -space</a> + * </p> + * <p> + * The function returns the argument string with whitespace normalized by using + * <code>{...@link #trim(String)}</code> to remove leading and trailing whitespace + * and then replacing sequences of whitespace characters by a single space. + * </p> + * In XML Whitespace characters are the same as those allowed by the <a + * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+ + * <p> + * See Java's {...@link Character#isWhitespace(char)} for which characters are considered whitespace. + * <p> + * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also + * normalize. Additonally <code>{...@link #trim(String)}</code> removes control characters (char <= 32) from both + * ends of this String. + * </p> + * + * @see Character#isWhitespace(char) + * @see #trim(String) + * @see <ahref="http://www.w3.org/TR/xpath/#function-normalize-space"> + * http://www.w3.org/TR/xpath/#function-normalize-space</a> + * @param str the source String to normalize whitespaces from, may be null + * @return the modified string with whitespace normalized, <code>null</code> if null String input + * + * @since 2.6 + */ + public static String normalizeSpace(String str) { + if(str == null) { + return null; + } + str = trim(str); + StringBuffer b = new StringBuffer(str.length()); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (Character.isWhitespace(c)) { + if (i > 0 && !Character.isWhitespace(str.charAt(i - 1))) { + b.append(' '); + } + } else { + b.append(c); + } + } + return b.toString(); + } + } Modified: commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java?rev=1056862&r1=1056861&r2=1056862&view=diff ============================================================================== --- commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java (original) +++ commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java Sun Jan 9 01:41:56 2011 @@ -1910,6 +1910,26 @@ public class StringUtilsTest extends Tes assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"})); } + public void testNormalizeSpace() { + assertEquals(null, StringUtils.normalizeSpace(null)); + assertEquals("", StringUtils.normalizeSpace("")); + assertEquals("", StringUtils.normalizeSpace(" ")); + assertEquals("", StringUtils.normalizeSpace("\t")); + assertEquals("", StringUtils.normalizeSpace("\n")); + assertEquals("", StringUtils.normalizeSpace("\u0009")); + assertEquals("", StringUtils.normalizeSpace("\u000B")); + assertEquals("", StringUtils.normalizeSpace("\u000C")); + assertEquals("", StringUtils.normalizeSpace("\u001C")); + assertEquals("", StringUtils.normalizeSpace("\u001D")); + assertEquals("", StringUtils.normalizeSpace("\u001E")); + assertEquals("", StringUtils.normalizeSpace("\u001F")); + assertEquals("", StringUtils.normalizeSpace("\f")); + assertEquals("", StringUtils.normalizeSpace("\r")); + assertEquals("a", StringUtils.normalizeSpace(" a ")); + assertEquals("a b c", StringUtils.normalizeSpace(" a b c ")); + assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n")); + } + public void testLANG666() { assertEquals("12",StringUtils.stripEnd("120.00", ".0")); assertEquals("121",StringUtils.stripEnd("121.00", ".0"));