Author: xor
Date: 2008-12-19 13:36:26 +0000 (Fri, 19 Dec 2008)
New Revision: 24588

Added:
   trunk/freenet/src/freenet/support/StringValidityChecker.java
Log:
Add a general purpose class for checking string validity.

Added: trunk/freenet/src/freenet/support/StringValidityChecker.java
===================================================================
--- trunk/freenet/src/freenet/support/StringValidityChecker.java                
                (rev 0)
+++ trunk/freenet/src/freenet/support/StringValidityChecker.java        
2008-12-19 13:36:26 UTC (rev 24588)
@@ -0,0 +1,118 @@
+package freenet.support;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+public class StringValidityChecker {
+       
+       /**
+        * Taken from http://kb.mozillazine.org/Network.IDN.blacklist_chars
+        */
+       static HashSet<Character> idnBlacklist = new 
HashSet<Character>(Arrays.asList(
+                       new Character[] {
+                                       0x0020, /* SPACE */
+                                       0x00A0, /* NO-BREAK SPACE */
+                                       0x00BC, /* VULGAR FRACTION ONE QUARTER 
*/
+                                       0x00BD, /* VULGAR FRACTION ONE HALF */
+                                       0x01C3, /* LATIN LETTER RETROFLEX CLICK 
*/
+                                       0x0337, /* COMBINING SHORT SOLIDUS 
OVERLAY */
+                                       0x0338, /* COMBINING LONG SOLIDUS 
OVERLAY */
+                                       0x05C3, /* HEBREW PUNCTUATION SOF PASUQ 
*/
+                                       0x05F4, /* HEBREW PUNCTUATION GERSHAYIM 
*/
+                                       0x06D4, /* ARABIC FULL STOP */
+                                       0x0702, /* SYRIAC SUBLINEAR FULL STOP */
+                                       0x115F, /* HANGUL CHOSEONG FILLER */
+                                       0x1160, /* HANGUL JUNGSEONG FILLER */
+                                       0x2000, /* EN QUAD */
+                                       0x2001, /* EM QUAD */
+                                       0x2002, /* EN SPACE */
+                                       0x2003, /* EM SPACE */
+                                       0x2004, /* THREE-PER-EM SPACE */
+                                       0x2005, /* FOUR-PER-EM SPACE */
+                                       0x2006, /* SIX-PER-EM-SPACE */
+                                       0x2007, /* FIGURE SPACE */
+                                       0x2008, /* PUNCTUATION SPACE */
+                                       0x2009, /* THIN SPACE */
+                                       0x200A, /* HAIR SPACE */
+                                       0x200B, /* ZERO WIDTH SPACE */
+                                       0x2024, /* ONE DOT LEADER */
+                                       0x2027, /* HYPHENATION POINT */
+                                       0x2028, /* LINE SEPARATOR */
+                                       0x2029, /* PARAGRAPH SEPARATOR */
+                                       0x202F, /* NARROW NO-BREAK SPACE */
+                                       0x2039, /* SINGLE LEFT-POINTING ANGLE 
QUOTATION MARK */
+                                       0x203A, /* SINGLE RIGHT-POINTING ANGLE 
QUOTATION MARK */
+                                       0x2044, /* FRACTION SLASH */
+                                       0x205F, /* MEDIUM MATHEMATICAL SPACE */
+                                       0x2154, /* VULGAR FRACTION TWO THIRDS */
+                                       0x2155, /* VULGAR FRACTION ONE FIFTH */
+                                       0x2156, /* VULGAR FRACTION TWO FIFTHS */
+                                       0x2159, /* VULGAR FRACTION ONE SIXTH */
+                                       0x215A, /* VULGAR FRACTION FIVE SIXTHS 
*/
+                                       0x215B, /* VULGAR FRACTION ONE EIGTH */
+                                       0x215F, /* FRACTION NUMERATOR ONE */
+                                       0x2215, /* DIVISION SLASH */
+                                       0x23AE, /* INTEGRAL EXTENSION */
+                                       0x29F6, /* SOLIDUS WITH OVERBAR */
+                                       0x29F8, /* BIG SOLIDUS */
+                                       0x2AFB, /* TRIPLE SOLIDUS BINARY 
RELATION */
+                                       0x2AFD, /* DOUBLE SOLIDUS OPERATOR */
+                                       0x2FF0, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER LEFT TO RIGHT */
+                                       0x2FF1, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER ABOVE TO BELOW */
+                                       0x2FF2, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER LEFT TO MIDDLE AND RIGHT */
+                                       0x2FF3, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER ABOVE TO MIDDLE AND BELOW */
+                                       0x2FF4, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER FULL SURROUND */
+                                       0x2FF5, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM ABOVE */
+                                       0x2FF6, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM BELOW */
+                                       0x2FF7, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM LEFT */
+                                       0x2FF8, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM UPPER LEFT */
+                                       0x2FF9, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM UPPER RIGHT */
+                                       0x2FFA, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER SURROUND FROM LOWER LEFT */
+                                       0x2FFB, /* IDEOGRAPHIC DESCRIPTION 
CHARACTER OVERLAID */
+                                       0x3000, /* IDEOGRAPHIC SPACE */
+                                       0x3002, /* IDEOGRAPHIC FULL STOP */
+                                       0x3014, /* LEFT TORTOISE SHELL BRACKET 
*/
+                                       0x3015, /* RIGHT TORTOISE SHELL BRACKET 
*/
+                                       0x3033, /* VERTICAL KANA REPEAT MARK 
UPPER HALF */
+                                       0x3164, /* HANGUL FILLER */
+                                       0x321D, /* PARENTHESIZED KOREAN 
CHARACTER OJEON */
+                                       0x321E, /* PARENTHESIZED KOREAN 
CHARACTER O HU */
+                                       0x33AE, /* SQUARE RAD OVER S */
+                                       0x33AF, /* SQUARE RAD OVER S SQUARED */
+                                       0x33C6, /* SQUARE C OVER KG */
+                                       0x33DF, /* SQUARE A OVER M */
+                                       0xFE14, /* PRESENTATION FORM FOR 
VERTICAL SEMICOLON */
+                                       0xFE15, /* PRESENTATION FORM FOR 
VERTICAL EXCLAMATION MARK */
+                                       0xFE3F, /* PRESENTATION FORM FOR 
VERTICAL LEFT ANGLE BRACKET */
+                                       0xFE5D, /* SMALL LEFT TORTOISE SHELL 
BRACKET */
+                                       0xFE5E, /* SMALL RIGHT TORTOISE SHELL 
BRACKET */
+                                       0xFEFF, /* ZERO-WIDTH NO-BREAK SPACE */
+                                       0xFF0E, /* FULLWIDTH FULL STOP */
+                                       0xFF0F, /* FULL WIDTH SOLIDUS */
+                                       0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL 
STOP */
+                                       0xFFA0, /* HALFWIDTH HANGUL FILLER */
+                                       0xFFF9, /* INTERLINEAR ANNOTATION 
ANCHOR */
+                                       0xFFFA, /* INTERLINEAR ANNOTATION 
SEPARATOR */
+                                       0xFFFB, /* INTERLINEAR ANNOTATION 
TERMINATOR */
+                                       0xFFFC, /* OBJECT REPLACEMENT CHARACTER 
*/
+                                       0xFFFD, /* REPLACEMENT CHARACTER */
+                       }));
+
+       public static boolean containsNoIDNBlacklistCharacters(String text) {
+               for(Character c : text.toCharArray()) {
+                       if(idnBlacklist.contains(c))
+                               return false;
+               }
+               
+               return true;
+       }
+       
+       public static boolean containsNoLinebreaks(String text) {
+               for(Character c : text.toCharArray()) {
+                       if(Character.getType(c) == Character.LINE_SEPARATOR)
+                               return false;
+               }
+               
+               return true;
+       }
+}

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

Reply via email to