Author: bayard
Date: Mon Jun 26 17:28:43 2006
New Revision: 417319
URL: http://svn.apache.org/viewvc?rev=417319&view=rev
Log:
Adding a test and a fix for LANG-100. This is a bug in which the randomly
created String can sometimes be illegal unicode; because the code does not
consider when relationships exist between characters. High and low surrogates
are now dealt with, but I'm skipping private high surrogates because I can't
find out what to do. Need to go plod very slowly through the spec. This site
was very useful:
http://www.alanwood.net/unicode/private_use_high_surrogates.html
Modified:
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
Modified:
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
URL:
http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
---
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
(original)
+++
jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java
Mon Jun 26 17:28:43 2006
@@ -18,6 +18,14 @@
import java.util.Random;
/**
* <p>Operations for random <code>String</code>s.</p>
+ * <p>Currently <em>private high surrogate</em> characters are ignored.
+ * These are unicode characters that fall between the values 56192 (db80)
+ * and 56319 (dbff) as we don't know how to handle them.
+ * High and low surrogates are correctly dealt with - that is if a
+ * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f)
+ * then it is followed by a low surrogate. If a low surrogate is chosen,
+ * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly
+ * chosen high surrogate. </p>
*
* @author GenerationJava Core library
* @author <a href="mailto:[EMAIL PROTECTED]">Henri Yandell</a>
@@ -243,8 +251,32 @@
}
if ((letters && Character.isLetter(ch))
|| (numbers && Character.isDigit(ch))
- || (!letters && !numbers)) {
- buffer[count] = ch;
+ || (!letters && !numbers))
+ {
+ if(ch >= 56320 && ch <= 57343) {
+ if(count == 0) {
+ count++;
+ } else {
+ // low surrogate, insert high surrogate after putting
it in
+ buffer[count] = ch;
+ count--;
+ buffer[count] = (char) (55296 + random.nextInt(128));
+ }
+ } else if(ch >= 55296 && ch <= 56191) {
+ if(count == 0) {
+ count++;
+ } else {
+ // high surrogate, insert low surrogate before putting
it in
+ buffer[count] = (char) (56320 + random.nextInt(128));
+ count--;
+ buffer[count] = ch;
+ }
+ } else if(ch >= 56192 && ch <= 56319) {
+ // private high surrogate, no effing clue, so skip it
+ count++;
+ } else {
+ buffer[count] = ch;
+ }
} else {
count++;
}
Modified:
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
URL:
http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java?rev=417319&r1=417318&r2=417319&view=diff
==============================================================================
---
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
(original)
+++
jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java
Mon Jun 26 17:28:43 2006
@@ -315,7 +315,33 @@
}
return sumSq;
}
-
+
+ /**
+ * Checks if the string got by [EMAIL PROTECTED]
RandomStringUtils#random(int)}
+ * can be converted to UTF-8 and back without loss.
+ *
+ * @author [EMAIL PROTECTED]
+ * @throws Exception
+ */
+ public void testLang100() throws Exception {
+ int size = 5000;
+ String encoding = "UTF-8";
+ String orig = RandomStringUtils.random(size);
+ byte[] bytes = orig.getBytes(encoding);
+ String copy = new String(bytes, encoding);
+
+ // for a verbose compare:
+ for (int i=0; i < orig.length() && i < copy.length(); i++) {
+ char o = orig.charAt(i);
+ char c = copy.charAt(i);
+ assertEquals("differs at " + i + "(" + Integer.toHexString((new
Character(o)).hashCode()) + "," +
+ Integer.toHexString((new Character(c)).hashCode()) + ")", o, c);
+ }
+ // compare length also
+ assertEquals(orig.length(), copy.length());
+ // just to be complete
+ assertEquals(orig, copy);
+ }
public static void main(String args[]) {
TestRunner.run(suite());
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]