Author: xor
Date: 2008-12-19 13:36:26 +0000 (Fri, 19 Dec 2008)
New Revision: 24588
Added:
trunk/freenet/src/freenet/support/StringValidityChecker.java
Log:
Add a general purpose class for checking string validity.
Added: trunk/freenet/src/freenet/support/StringValidityChecker.java
===================================================================
--- trunk/freenet/src/freenet/support/StringValidityChecker.java
(rev 0)
+++ trunk/freenet/src/freenet/support/StringValidityChecker.java
2008-12-19 13:36:26 UTC (rev 24588)
@@ -0,0 +1,118 @@
+package freenet.support;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+public class StringValidityChecker {
+
+ /**
+ * Taken from http://kb.mozillazine.org/Network.IDN.blacklist_chars
+ */
+ static HashSet<Character> idnBlacklist = new
HashSet<Character>(Arrays.asList(
+ new Character[] {
+ 0x0020, /* SPACE */
+ 0x00A0, /* NO-BREAK SPACE */
+ 0x00BC, /* VULGAR FRACTION ONE QUARTER
*/
+ 0x00BD, /* VULGAR FRACTION ONE HALF */
+ 0x01C3, /* LATIN LETTER RETROFLEX CLICK
*/
+ 0x0337, /* COMBINING SHORT SOLIDUS
OVERLAY */
+ 0x0338, /* COMBINING LONG SOLIDUS
OVERLAY */
+ 0x05C3, /* HEBREW PUNCTUATION SOF PASUQ
*/
+ 0x05F4, /* HEBREW PUNCTUATION GERSHAYIM
*/
+ 0x06D4, /* ARABIC FULL STOP */
+ 0x0702, /* SYRIAC SUBLINEAR FULL STOP */
+ 0x115F, /* HANGUL CHOSEONG FILLER */
+ 0x1160, /* HANGUL JUNGSEONG FILLER */
+ 0x2000, /* EN QUAD */
+ 0x2001, /* EM QUAD */
+ 0x2002, /* EN SPACE */
+ 0x2003, /* EM SPACE */
+ 0x2004, /* THREE-PER-EM SPACE */
+ 0x2005, /* FOUR-PER-EM SPACE */
+ 0x2006, /* SIX-PER-EM-SPACE */
+ 0x2007, /* FIGURE SPACE */
+ 0x2008, /* PUNCTUATION SPACE */
+ 0x2009, /* THIN SPACE */
+ 0x200A, /* HAIR SPACE */
+ 0x200B, /* ZERO WIDTH SPACE */
+ 0x2024, /* ONE DOT LEADER */
+ 0x2027, /* HYPHENATION POINT */
+ 0x2028, /* LINE SEPARATOR */
+ 0x2029, /* PARAGRAPH SEPARATOR */
+ 0x202F, /* NARROW NO-BREAK SPACE */
+ 0x2039, /* SINGLE LEFT-POINTING ANGLE
QUOTATION MARK */
+ 0x203A, /* SINGLE RIGHT-POINTING ANGLE
QUOTATION MARK */
+ 0x2044, /* FRACTION SLASH */
+ 0x205F, /* MEDIUM MATHEMATICAL SPACE */
+ 0x2154, /* VULGAR FRACTION TWO THIRDS */
+ 0x2155, /* VULGAR FRACTION ONE FIFTH */
+ 0x2156, /* VULGAR FRACTION TWO FIFTHS */
+ 0x2159, /* VULGAR FRACTION ONE SIXTH */
+ 0x215A, /* VULGAR FRACTION FIVE SIXTHS
*/
+ 0x215B, /* VULGAR FRACTION ONE EIGTH */
+ 0x215F, /* FRACTION NUMERATOR ONE */
+ 0x2215, /* DIVISION SLASH */
+ 0x23AE, /* INTEGRAL EXTENSION */
+ 0x29F6, /* SOLIDUS WITH OVERBAR */
+ 0x29F8, /* BIG SOLIDUS */
+ 0x2AFB, /* TRIPLE SOLIDUS BINARY
RELATION */
+ 0x2AFD, /* DOUBLE SOLIDUS OPERATOR */
+ 0x2FF0, /* IDEOGRAPHIC DESCRIPTION
CHARACTER LEFT TO RIGHT */
+ 0x2FF1, /* IDEOGRAPHIC DESCRIPTION
CHARACTER ABOVE TO BELOW */
+ 0x2FF2, /* IDEOGRAPHIC DESCRIPTION
CHARACTER LEFT TO MIDDLE AND RIGHT */
+ 0x2FF3, /* IDEOGRAPHIC DESCRIPTION
CHARACTER ABOVE TO MIDDLE AND BELOW */
+ 0x2FF4, /* IDEOGRAPHIC DESCRIPTION
CHARACTER FULL SURROUND */
+ 0x2FF5, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM ABOVE */
+ 0x2FF6, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM BELOW */
+ 0x2FF7, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM LEFT */
+ 0x2FF8, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM UPPER LEFT */
+ 0x2FF9, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM UPPER RIGHT */
+ 0x2FFA, /* IDEOGRAPHIC DESCRIPTION
CHARACTER SURROUND FROM LOWER LEFT */
+ 0x2FFB, /* IDEOGRAPHIC DESCRIPTION
CHARACTER OVERLAID */
+ 0x3000, /* IDEOGRAPHIC SPACE */
+ 0x3002, /* IDEOGRAPHIC FULL STOP */
+ 0x3014, /* LEFT TORTOISE SHELL BRACKET
*/
+ 0x3015, /* RIGHT TORTOISE SHELL BRACKET
*/
+ 0x3033, /* VERTICAL KANA REPEAT MARK
UPPER HALF */
+ 0x3164, /* HANGUL FILLER */
+ 0x321D, /* PARENTHESIZED KOREAN
CHARACTER OJEON */
+ 0x321E, /* PARENTHESIZED KOREAN
CHARACTER O HU */
+ 0x33AE, /* SQUARE RAD OVER S */
+ 0x33AF, /* SQUARE RAD OVER S SQUARED */
+ 0x33C6, /* SQUARE C OVER KG */
+ 0x33DF, /* SQUARE A OVER M */
+ 0xFE14, /* PRESENTATION FORM FOR
VERTICAL SEMICOLON */
+ 0xFE15, /* PRESENTATION FORM FOR
VERTICAL EXCLAMATION MARK */
+ 0xFE3F, /* PRESENTATION FORM FOR
VERTICAL LEFT ANGLE BRACKET */
+ 0xFE5D, /* SMALL LEFT TORTOISE SHELL
BRACKET */
+ 0xFE5E, /* SMALL RIGHT TORTOISE SHELL
BRACKET */
+ 0xFEFF, /* ZERO-WIDTH NO-BREAK SPACE */
+ 0xFF0E, /* FULLWIDTH FULL STOP */
+ 0xFF0F, /* FULL WIDTH SOLIDUS */
+ 0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL
STOP */
+ 0xFFA0, /* HALFWIDTH HANGUL FILLER */
+ 0xFFF9, /* INTERLINEAR ANNOTATION
ANCHOR */
+ 0xFFFA, /* INTERLINEAR ANNOTATION
SEPARATOR */
+ 0xFFFB, /* INTERLINEAR ANNOTATION
TERMINATOR */
+ 0xFFFC, /* OBJECT REPLACEMENT CHARACTER
*/
+ 0xFFFD, /* REPLACEMENT CHARACTER */
+ }));
+
+ public static boolean containsNoIDNBlacklistCharacters(String text) {
+ for(Character c : text.toCharArray()) {
+ if(idnBlacklist.contains(c))
+ return false;
+ }
+
+ return true;
+ }
+
+ public static boolean containsNoLinebreaks(String text) {
+ for(Character c : text.toCharArray()) {
+ if(Character.getType(c) == Character.LINE_SEPARATOR)
+ return false;
+ }
+
+ return true;
+ }
+}
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs