[1/5] [text] Fixed TEXT-39 WordUtils now using toXxxxCase(int) rather than toXxxxCase(char)

chtompki Mon, 08 May 2017 18:35:44 -0700

Repository: commons-text
Updated Branches:
  refs/heads/master 732450209 -> 7bdd61fbf



Fixed TEXT-39 WordUtils now using toXxxxCase(int) rather than toXxxxCase(char)


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/18969d7d
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/18969d7d
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/18969d7d

Branch: refs/heads/master
Commit: 18969d7d862264e3b5267ba170cf60645863ad19
Parents: f651b78
Author: Amey Jadiye <[email protected]>
Authored: Sat May 6 01:47:44 2017 +0530
Committer: Amey Jadiye <[email protected]>
Committed: Sat May 6 01:47:44 2017 +0530

----------------------------------------------------------------------
 .../java/org/apache/commons/text/WordUtils.java | 99 +++++++++++++++-----
 1 file changed, 73 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/18969d7d/src/main/java/org/apache/commons/text/WordUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/WordUtils.java 
b/src/main/java/org/apache/commons/text/WordUtils.java
index 20920d6..0925227 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -414,18 +414,29 @@ public class WordUtils {
         if (StringUtils.isEmpty(str) || delimLen == 0) {
             return str;
         }
-        final char[] buffer = str.toCharArray();
+        int strLen = str.length();
+        int [] newCodePoints = new int[strLen];
+        int outOffset = 0;
+
         boolean capitalizeNext = true;
-        for (int i = 0; i < buffer.length; i++) {
-            final char ch = buffer[i];
-            if (isDelimiter(ch, delimiters)) {
+        for (int index = 0; index < str.length();) {
+            final int codePoint = str.codePointAt(index);
+
+            if (isDelimiter(codePoint, delimiters)) {
                 capitalizeNext = true;
+                newCodePoints[outOffset++] = codePoint;
+                index += Character.charCount(codePoint);
             } else if (capitalizeNext) {
-                buffer[i] = Character.toTitleCase(ch);
+                int titleCaseCodePoint = Character.toTitleCase(codePoint);
+                newCodePoints[outOffset++] = titleCaseCodePoint;
+                index += Character.charCount(titleCaseCodePoint);
                 capitalizeNext = false;
+            } else {
+                newCodePoints[outOffset++] = codePoint;
+                index += Character.charCount(codePoint);
             }
         }
-        return new String(buffer);
+        return new String(newCodePoints, 0, outOffset);
     }
 
     //-----------------------------------------------------------------------
@@ -537,18 +548,29 @@ public class WordUtils {
         if (StringUtils.isEmpty(str) || delimLen == 0) {
             return str;
         }
-        final char[] buffer = str.toCharArray();
+        int strLen = str.length();
+        int [] newCodePoints = new int[strLen];
+        int outOffset = 0;
+
         boolean uncapitalizeNext = true;
-        for (int i = 0; i < buffer.length; i++) {
-            final char ch = buffer[i];
-            if (isDelimiter(ch, delimiters)) {
+        for (int index = 0; index < str.length();) {
+            final int codePoint = str.codePointAt(index);
+
+            if (isDelimiter(codePoint, delimiters)) {
                 uncapitalizeNext = true;
+                newCodePoints[outOffset++] = codePoint;
+                index += Character.charCount(codePoint);
             } else if (uncapitalizeNext) {
-                buffer[i] = Character.toLowerCase(ch);
+                int titleCaseCodePoint = Character.toLowerCase(codePoint);
+                newCodePoints[outOffset++] = titleCaseCodePoint;
+                index += Character.charCount(titleCaseCodePoint);
                 uncapitalizeNext = false;
+            } else {
+                newCodePoints[outOffset++] = codePoint;
+                index += Character.charCount(codePoint);
             }
         }
-        return new String(buffer);
+        return new String(newCodePoints, 0, outOffset);
     }
 
     //-----------------------------------------------------------------------
@@ -578,30 +600,34 @@ public class WordUtils {
         if (StringUtils.isEmpty(str)) {
             return str;
         }
-        final char[] buffer = str.toCharArray();
-
+        final int strLen = str.length();
+        int [] newCodePoints = new int[strLen];
+        int outOffset = 0;
         boolean whitespace = true;
-
-        for (int i = 0; i < buffer.length; i++) {
-            final char ch = buffer[i];
-            if (Character.isUpperCase(ch)) {
-                buffer[i] = Character.toLowerCase(ch);
+        for (int index = 0; index < strLen;) {
+            final int oldCodepoint = str.codePointAt(index);
+            final int newCodePoint;
+            if (Character.isUpperCase(oldCodepoint)) {
+                newCodePoint = Character.toLowerCase(oldCodepoint);
                 whitespace = false;
-            } else if (Character.isTitleCase(ch)) {
-                buffer[i] = Character.toLowerCase(ch);
+            } else if (Character.isTitleCase(oldCodepoint)) {
+                newCodePoint = Character.toLowerCase(oldCodepoint);
                 whitespace = false;
-            } else if (Character.isLowerCase(ch)) {
+            } else if (Character.isLowerCase(oldCodepoint)) {
                 if (whitespace) {
-                    buffer[i] = Character.toTitleCase(ch);
+                    newCodePoint = Character.toTitleCase(oldCodepoint);
                     whitespace = false;
                 } else {
-                    buffer[i] = Character.toUpperCase(ch);
+                    newCodePoint = Character.toUpperCase(oldCodepoint);
                 }
             } else {
-                whitespace = Character.isWhitespace(ch);
+                whitespace = Character.isWhitespace(oldCodepoint);
+                newCodePoint = oldCodepoint;
             }
+            newCodePoints[outOffset++] = newCodePoint;
+            index += Character.charCount(newCodePoint);
         }
-        return new String(buffer);
+        return new String(newCodePoints, 0, outOffset);
     }
 
     //-----------------------------------------------------------------------
@@ -739,6 +765,27 @@ public class WordUtils {
         return false;
     }
 
+  //-----------------------------------------------------------------------
+    /**
+     * Is the codePoint a delimiter.
+     *
+     * @param codePoint the codePint to check
+     * @param delimiters  the delimiters
+     * @return true if it is a delimiter
+     */
+    private static boolean isDelimiter(final int codePoint, final char[] 
delimiters) {
+        if (delimiters == null) {
+            return Character.isWhitespace(codePoint);
+        }
+        for (int index = 0; index < delimiters.length; index++) {
+            int delimiterCodePoint = Character.codePointAt(delimiters, index);
+            if (delimiterCodePoint == codePoint) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     //-----------------------------------------------------------------------
     /**
      * Abbreviates the words nicely.

[1/5] [text] Fixed TEXT-39 WordUtils now using toXxxxCase(int) rather than toXxxxCase(char)

Reply via email to