Author: scottbw
Date: Thu Apr 29 14:14:40 2010
New Revision: 939317

URL: http://svn.apache.org/viewvc?rev=939317&view=rev
Log:
Simplified the method for normalizing text content in UnicodeUtils, and added 
more comments. I also replaced the loop concatenating strings with a 
stringbuffer for better performance.

Modified:
    
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java

Modified: 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
URL: 
http://svn.apache.org/viewvc/incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java?rev=939317&r1=939316&r2=939317&view=diff
==============================================================================
--- 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
 (original)
+++ 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
 Thu Apr 29 14:14:40 2010
@@ -48,20 +48,33 @@ public class UnicodeUtils {
                return normalize(in, false);
        }
        
+       /**
+        * Normalizes all space characters (and whitespace if includeWhitespace 
is set to true) in the given string to 
+        * U+0020, then collapses multiple adjacent spaces to a single space, 
and
+        * removes any leading and trailing spaces. If the input string is null,
+        * the method returns an empty string ("")
+        * @param in the string to normalize
+        * @param includeWhitespace set to true to normalize whitespace as well 
as space characters
+        * @return the normalized string
+        */
        private static String normalize(String in, boolean includeWhitespace){
                if (in == null) return "";
-               String out = "";
+               // Create a buffer for the string
+               StringBuffer buf = new StringBuffer();
+               // Iterate over characters in the string and append them to 
buffer, replacing matching characters with standard spaces
                for (int x=0;x<in.length();x++){
-                       String s = in.substring(x, x+1);
-                       char ch = s.charAt(0);
+                       char ch = in.charAt(x);
                        if (Character.isSpaceChar(ch) || 
(Character.isWhitespace(ch) && includeWhitespace)){
-                               s = " ";
+                               ch = new Character(' ');
                        }
-                       out = out + s;
+                       buf.append(ch);
                }
-               out = CharSetUtils.squeeze(out, " ");
-               out = StringUtils.strip(out);
-               return out;
+               String str = buf.toString();
+               // Squeeze out extra spaces
+               str = CharSetUtils.squeeze(str, " ");
+               // Strip off trailing and leading spaces
+               str = StringUtils.strip(str);
+               return str;
        }
 
 }


Reply via email to