Author: desruisseaux
Date: Fri Dec 7 08:11:59 2012
New Revision: 1418210
URL: http://svn.apache.org/viewvc?rev=1418210&view=rev
Log:
Refactored the CharSequences.equalsLettersAndDigits(...) as a
equalsFiltered(..., Characters.Filter, boolean) method,
and modified the Citations static methods to use it.
Modified:
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java
Modified:
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
(original)
+++
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/internal/util/Citations.java
Fri Dec 7 08:11:59 2012
@@ -23,7 +23,9 @@ import org.opengis.metadata.citation.Cit
import org.opengis.util.InternationalString;
import org.apache.sis.util.Static;
+import static org.apache.sis.util.CharSequences.equalsFiltered;
import static org.apache.sis.util.CharSequences.trimWhitespaces;
+import static org.apache.sis.util.Characters.Filter.LETTERS_AND_DIGITS;
// Related to JDK7
import java.util.Objects;
@@ -59,15 +61,16 @@ public final class Citations extends Sta
* @param collection The collection from which to get the iterator, or
{@code null}.
* @return The iterator over the given collection elements, or {@code
null}.
*/
- public static <E> Iterator<E> iterator(final Collection<E> collection) {
+ private static <E> Iterator<E> iterator(final Collection<E> collection) {
return (collection != null && !collection.isEmpty()) ?
collection.iterator() : null;
}
/**
* Returns {@code true} if at least one {@linkplain Citation#getTitle()
title} or
- * {@linkplain Citation#getAlternateTitles alternate title} in {@code c1}
is equal
- * to a title or alternate title in {@code c2}. The comparison is
case-insensitive
- * and ignores leading and trailing spaces. The titles ordering is not
significant.
+ * {@linkplain Citation#getAlternateTitles() alternate title} in {@code
c1} is leniently
+ * equal to a title or alternate title in {@code c2}. The comparison is
case-insensitive
+ * and ignores every character which is not a {@linkplain
Character#isLetterOrDigit(int)
+ * letter or a digit}. The titles ordering is not significant.
*
* @param c1 The first citation to compare, or {@code null}.
* @param c2 the second citation to compare, or {@code null}.
@@ -108,30 +111,29 @@ public final class Citations extends Sta
/**
* Returns {@code true} if the {@linkplain Citation#getTitle() title} or
any
- * {@linkplain Citation#getAlternateTitles alternate title} in the given
citation
- * matches the given string. The comparison is case-insensitive and
ignores leading
- * and trailing spaces.
+ * {@linkplain Citation#getAlternateTitles() alternate title} in the given
citation
+ * matches the given string. The comparison is case-insensitive and
ignores every character
+ * which is not a {@linkplain Character#isLetterOrDigit(int) letter or a
digit}.
*
* @param citation The citation to check for, or {@code null}.
* @param title The title or alternate title to compare, or {@code null}.
* @return {@code true} if both arguments are non-null, and the title or
alternate
* title matches the given string.
*/
- public static boolean titleMatches(final Citation citation, String title) {
+ public static boolean titleMatches(final Citation citation, final
CharSequence title) {
if (citation != null && title != null) {
- title = trimWhitespaces(title);
InternationalString candidate = citation.getTitle();
Iterator<? extends InternationalString> iterator = null;
do {
if (candidate != null) {
// The "null" locale argument is required for getting the
unlocalized version.
- final String unlocalized =
trimWhitespaces(candidate.toString(null));
- if (unlocalized != null &&
unlocalized.equalsIgnoreCase(title)) {
+ final String unlocalized = candidate.toString(null);
+ if (equalsFiltered(unlocalized, title, LETTERS_AND_DIGITS,
true)) {
return true;
}
- final String localized =
trimWhitespaces(candidate).toString();
- if (localized != unlocalized // Slight optimization for a
common case.
- && (localized != null) &&
localized.equalsIgnoreCase(title))
+ final String localized = candidate.toString();
+ if (!Objects.equals(localized, unlocalized) // Slight
optimization for a common case.
+ && equalsFiltered(localized, title,
LETTERS_AND_DIGITS, true))
{
return true;
}
@@ -150,7 +152,8 @@ public final class Citations extends Sta
/**
* Returns {@code true} if at least one {@linkplain
Citation#getIdentifiers() identifier} in
* {@code c1} is equal to an identifier in {@code c2}. The comparison is
case-insensitive
- * and ignores leading and trailing spaces. The identifier ordering is not
significant.
+ * and ignores every character which is not a {@linkplain
Character#isLetterOrDigit(int)
+ * letter or a digit}. The identifier ordering is not significant.
*
* <p>If (and <em>only</em> if) the citations do not contains any
identifier, then this method
* fallback on titles comparison using the {@link
#titleMatches(Citation,Citation) titleMatches}
@@ -193,8 +196,8 @@ public final class Citations extends Sta
/**
* Returns {@code true} if any {@linkplain Citation#getIdentifiers()
identifiers} in the given
- * citation matches the given string. The comparison is case-insensitive
and ignores leading
- * and trailing spaces.
+ * citation matches the given string. The comparison is case-insensitive
and ignores every
+ * character which is not a {@linkplain Character#isLetterOrDigit(int)
letter or a digit}.
*
* <p>If (and <em>only</em> if) the citation does not contain any
identifier, then this method
* fallback on titles comparison using the {@link
#titleMatches(Citation,String) titleMatches}
@@ -206,20 +209,16 @@ public final class Citations extends Sta
* @return {@code true} if both arguments are non-null, and the title or
alternate title
* matches the given string.
*/
- public static boolean identifierMatches(final Citation citation, String
identifier) {
+ public static boolean identifierMatches(final Citation citation, final
CharSequence identifier) {
if (citation != null && identifier != null) {
- identifier = trimWhitespaces(identifier);
final Iterator<? extends Identifier> identifiers =
iterator(citation.getIdentifiers());
if (identifiers == null) {
return titleMatches(citation, identifier);
}
while (identifiers.hasNext()) {
final Identifier id = identifiers.next();
- if (id != null) {
- final String code = id.getCode();
- if (code != null &&
identifier.equalsIgnoreCase(trimWhitespaces(code))) {
- return true;
- }
+ if (id != null && equalsFiltered(identifier, id.getCode(),
LETTERS_AND_DIGITS, true)) {
+ return true;
}
}
}
Modified:
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
(original)
+++
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/CharSequences.java
Fri Dec 7 08:11:59 2012
@@ -1126,7 +1126,7 @@ searchWordBreak: while (true) {
}
/**
- * Given a string in camel cases (typically a Java identifier), returns a
string formatted
+ * Given a string in camel cases (typically an identifier), returns a
string formatted
* like an English sentence. This heuristic method performs the following
steps:
*
* <ol>
@@ -1257,8 +1257,8 @@ searchWordBreak: while (true) {
* case, then the text is returned unchanged on the assumption that it is
already an acronym.
* Otherwise this method returns a string containing the first character
of each word, where
* the words are separated by the camel case convention, the {@code '_'}
character, or any
- * character which is not a {@linkplain
Character#isJavaIdentifierPart(int) java identifier
- * part} (including spaces).
+ * character which is not a {@linkplain
Character#isUnicodeIdentifierPart(int) Unicode
+ * identifier part} (including spaces).
*
* <p><b>Examples:</b> given {@code "northEast"}, this method returns
{@code "NE"}.
* Given {@code "Open Geospatial Consortium"}, this method returns {@code
"OGC"}.</p>
@@ -1275,11 +1275,11 @@ searchWordBreak: while (true) {
for (int i=0; i<length;) {
final int c = codePointAt(text, i);
if (wantChar) {
- if (isJavaIdentifierStart(c)) {
+ if (isUnicodeIdentifierStart(c)) {
buffer.appendCodePoint(c);
wantChar = false;
}
- } else if (!isJavaIdentifierPart(c) || c == '_') {
+ } else if (!isUnicodeIdentifierPart(c) || c == '_') {
wantChar = true;
} else if (Character.isUpperCase(c)) {
// Test for mixed-case (e.g. "northEast").
@@ -1408,7 +1408,7 @@ cmp: while (ia < lga) {
* <p>This method is used for identifying character strings that are
likely to be code
* like {@code "UTF-8"} or {@code "ISO-LATIN-1"}.</p>
*
- * @see #isJavaIdentifier(CharSequence)
+ * @see #isUnicodeIdentifier(CharSequence)
*/
private static boolean isCode(final CharSequence identifier) {
for (int i=identifier.length(); --i>=0;) {
@@ -1423,28 +1423,28 @@ cmp: while (ia < lga) {
}
/**
- * Returns {@code true} if the given identifier is a legal Java identifier.
+ * Returns {@code true} if the given identifier is a legal Unicode
identifier.
* This method returns {@code true} if the identifier length is greater
than zero,
- * the first character is a {@linkplain
Character#isJavaIdentifierStart(int) Java
- * identifier start} and all remaining characters (if any) are
- * {@linkplain Character#isJavaIdentifierPart(int) Java identifier parts}.
+ * the first character is a {@linkplain
Character#isUnicodeIdentifierStart(int)
+ * Unicode identifier start} and all remaining characters (if any) are
+ * {@linkplain Character#isUnicodeIdentifierPart(int) Unicode identifier
parts}.
*
* @param identifier The character sequence to test.
- * @return {@code true} if the given character sequence is a legal Java
identifier.
+ * @return {@code true} if the given character sequence is a legal Unicode
identifier.
* @throws NullPointerException if the argument is null.
*/
- public static boolean isJavaIdentifier(final CharSequence identifier) {
+ public static boolean isUnicodeIdentifier(final CharSequence identifier) {
final int length = identifier.length();
if (length == 0) {
return false;
}
int c = codePointAt(identifier, 0);
- if (!isJavaIdentifierStart(c)) {
+ if (!isUnicodeIdentifierStart(c)) {
return false;
}
for (int i=0; (i += charCount(c)) < length;) {
c = codePointAt(identifier, i);
- if (!isJavaIdentifierPart(c)) {
+ if (!isUnicodeIdentifierPart(c)) {
return false;
}
}
@@ -1476,6 +1476,71 @@ cmp: while (ia < lga) {
}
/**
+ * Returns {@code true} if the given texts are equal, optionally ignoring
case and filtered-out
+ * characters. This method is sometime used for comparing identifiers in a
lenient way.
+ *
+ * <p><b>Example:</b> the following call compares the two strings ignoring
case and any
+ * characters which are not {@linkplain Character#isLetterOrDigit(int)
letter or digit}.
+ * In particular, spaces and punctuation characters like {@code '_'} and
{@code '-'} are
+ * ignored:</p>
+ *
+ * {@preformat java
+ * assert equals("WGS84", "WGS_84",
Characters.Filter.LETTERS_AND_DIGITS, true) == true;
+ * }
+ *
+ * @param s1 The first string to compare, or {@code null}.
+ * @param s2 The second string to compare, or {@code null}.
+ * @param filter The subset of characters to compare, or {@code null} for
comparing all characters.
+ * @param ignoreCase {@code true} for comparing cases, or {@code false}
for requiring exact match.
+ * @return {@code true} if or if both arguments are {@code null} or if the
two given texts are equal,
+ * optionally ignoring case and filtered-out characters.
+ */
+ public static boolean equalsFiltered(final CharSequence s1, final
CharSequence s2,
+ final Characters.Filter filter, final boolean ignoreCase)
+ {
+ if (s1 == s2) {
+ return true;
+ }
+ if (s1 == null || s2 == null) {
+ return false;
+ }
+ if (filter == null) {
+ return ignoreCase ? equalsIgnoreCase(s1, s2) : equals(s1, s2);
+ }
+ final int lg1 = s1.length();
+ final int lg2 = s2.length();
+ int i2 = 0, n;
+ for (int i1=0; i1<lg1; i1+=n) {
+ int c1 = codePointAt(s1, i1);
+ n = charCount(c1);
+ if (filter.contains(c1)) {
+ // Fetch the next significant character from the second string.
+ int c2;
+ do {
+ if (i2 >= lg2) {
+ return false; // The first string has more significant
characters than expected.
+ }
+ c2 = codePointAt(s2, i2);
+ i2 += charCount(c2);
+ } while (!filter.contains(c2));
+
+ // Compare the characters in the same way than
String.equalsIgnoreCase(String).
+ if (c1 != c2 && !(ignoreCase && equalsIgnoreCase(c1, c2))) {
+ return false;
+ }
+ }
+ }
+ while (i2 < lg2) {
+ final int s = codePointAt(s2, i2);
+ if (filter.contains(s)) {
+ return false; // The first string has less significant
characters than expected.
+ }
+ i2 += charCount(s);
+ }
+ return true;
+ }
+
+ /**
* Returns {@code true} if the given code points are equal, ignoring case.
* This method implements the same comparison algorithm than
String#equalsIgnoreCase(String).
*
@@ -1530,57 +1595,6 @@ cmp: while (ia < lga) {
}
/**
- * Returns {@code true} if the given texts are equal, ignoring case and
any character which
- * is not a {@linkplain Character#isLetterOrDigit(int) letter or digit}.
In particular,
- * spaces and punctuation characters like {@code '_'} and {@code '-'} are
ignored.
- * This method is sometime used for comparing identifiers in a lenient way.
- *
- * @param s1 The first string to compare, or {@code null}.
- * @param s2 The second string to compare, or {@code null}.
- * @return {@code true} if the two given texts are equal, comparing only
letters and digits
- * in a case-insensitive way, or if both arguments are {@code
null}.
- */
- public static boolean equalsLettersAndDigits(final CharSequence s1, final
CharSequence s2) {
- if (s1 == s2) {
- return true;
- }
- if (s1 == null || s2 == null) {
- return false;
- }
- final int lg1 = s1.length();
- final int lg2 = s2.length();
- int i2 = 0, n;
- for (int i1=0; i1<lg1; i1+=n) {
- int c1 = codePointAt(s1, i1);
- n = charCount(c1);
- if (isLetterOrDigit(c1)) {
- // Fetch the next significant character from the second string.
- int c2;
- do {
- if (i2 >= lg2) {
- return false; // The first string has more significant
characters than expected.
- }
- c2 = codePointAt(s2, i2);
- i2 += charCount(c2);
- } while (!isLetterOrDigit(c2));
-
- // Compare the characters in the same way than
String.equalsIgnoreCase(String).
- if (c1 != c2 && !equalsIgnoreCase(c1, c2)) {
- return false;
- }
- }
- }
- while (i2 < lg2) {
- final int s = codePointAt(s2, i2);
- if (isLetterOrDigit(s)) {
- return false; // The first string has less significant
characters than expected.
- }
- i2 += charCount(s);
- }
- return true;
- }
-
- /**
* Returns {@code true} if the two given texts are equal. This method
delegates to
* {@link String#contentEquals(CharSequence)} if possible. This method
never invoke
* {@link CharSequence#toString()} in order to avoid a potentially large
copy of data.
@@ -1784,9 +1798,9 @@ cmp: while (ia < lga) {
*
* <ul>
* <li>If <var>c</var> is a
- * {@linkplain Character#isJavaIdentifierStart(int) Java identifier
start},
+ * {@linkplain Character#isUnicodeIdentifierStart(int) Unicode
identifier start},
* then any following characters that are
- * {@linkplain Character#isJavaIdentifierPart(int) Java identifier
part}.</li>
+ * {@linkplain Character#isUnicodeIdentifierPart(int) Unicode
identifier part}.</li>
* <li>Otherwise any character for which {@link Character#getType(int)}
returns
* the same value than for <var>c</var>.</li>
* </ul>
@@ -1815,8 +1829,8 @@ cmp: while (ia < lga) {
/*
* Advance over all characters "of the same type".
*/
- if (isJavaIdentifierStart(c)) {
- while (upper<length && isJavaIdentifierPart(c = codePointAt(text,
upper))) {
+ if (isUnicodeIdentifierStart(c)) {
+ while (upper<length && isUnicodeIdentifierPart(c =
codePointAt(text, upper))) {
upper += charCount(c);
}
} else {
Modified:
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
(original)
+++
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/Characters.java
Fri Dec 7 08:11:59 2012
@@ -16,6 +16,8 @@
*/
package org.apache.sis.util;
+import org.apache.sis.util.resources.Errors;
+
/**
* Static methods working on {@code char} values, and some character constants.
@@ -225,4 +227,195 @@ public final class Characters extends St
}
return c;
}
+
+
+
+
+ /**
+ * Subsets of Unicode characters identified by their general category.
+ * The categories are identified by constants defined in the {@link
Character} class, like
+ * {@link Character#LOWERCASE_LETTER LOWERCASE_LETTER},
+ * {@link Character#UPPERCASE_LETTER UPPERCASE_LETTER},
+ * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER} and
+ * {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}.
+ *
+ * <p>An instance of this class can be obtained from an enumeration of
character types
+ * using the {@link #forTypes(byte[])} method, or using one of the
constants predefined
+ * in this class. Then, Unicode characters can be tested for inclusion in
the subset by
+ * calling the {@link #contains(int)} method.</p>
+ *
+ * @author Martin Desruisseaux (Geomatys)
+ * @since 0.3
+ * @version 0.3
+ * @module
+ *
+ * @see java.lang.Character.Subset
+ * @see Character#getType(int)
+ */
+ public static class Filter extends Character.Subset {
+ /*
+ * This class can not easily be Serializable, because the parent class
is not Serializable
+ * and does not define a no-argument constructor. We could workaround
with a writeReplace
+ * method - waiting to see if there is a real need for that.
+ */
+
+ /**
+ * The subset of all characters for which {@link
Character#isLetterOrDigit(int)}
+ * returns {@code true}. This subset includes the following general
categories:
+ * {@link Character#LOWERCASE_LETTER},
+ * {@link Character#UPPERCASE_LETTER UPPERCASE_LETTER},
+ * {@link Character#TITLECASE_LETTER TITLECASE_LETTER},
+ * {@link Character#MODIFIER_LETTER MODIFIER_LETTER},
+ * {@link Character#OTHER_LETTER OTHER_LETTER} and
+ * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER}.
+ */
+ public static final Filter LETTERS_AND_DIGITS = new LettersAndDigits();
+
+ /**
+ * The subset of all characters for which {@link
Character#isUnicodeIdentifierPart(int)}
+ * returns {@code true}, excluding {@linkplain
Character#isIdentifierIgnorable(int)
+ * ignorable} characters. This subset includes all the {@link
#LETTERS_AND_DIGITS}
+ * categories with the addition of the following ones:
+ * {@link Character#LETTER_NUMBER},
+ * {@link Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION},
+ * {@link Character#NON_SPACING_MARK NON_SPACING_MARK} and
+ * {@link Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK}.
+ */
+ public static final Filter UNICODE_IDENTIFIER = new
UnicodeIdentifier();
+
+ /**
+ * A bitmask of character types in this subset.
+ */
+ private final long types;
+
+ /**
+ * Creates a new subset of the given name.
+ *
+ * @param name The subset name.
+ * @param types A bitmask of character types.
+ */
+ Filter(final String name, final long types) {
+ super(name);
+ this.types = types;
+ }
+
+ /**
+ * Returns {@code true} if this subset contains the given Unicode
character.
+ *
+ * @param codePoint The Unicode character, as a code point value.
+ * @return {@code true} if this subset contains the given character.
+ */
+ public boolean contains(final int codePoint) {
+ return containsType(Character.getType(codePoint));
+ }
+
+ /**
+ * Returns {@code true} if this subset contains the characters of the
given type.
+ * The given type shall be one of the {@link Character} constants like
+ * {@link Character#LOWERCASE_LETTER LOWERCASE_LETTER},
+ * {@link Character#UPPERCASE_LETTER UPPERCASE_LETTER},
+ * {@link Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER} or
+ * {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}.
+ *
+ * @param type One of the {@link Character} constants.
+ * @return {@code true} if this subset contains the characters of the
given type.
+ *
+ * @see Character#getType(int)
+ */
+ public final boolean containsType(final int type) {
+ return (type >= 0) && (type < Long.SIZE) && (types & (1L << type))
!= 0;
+ }
+
+ /**
+ * Returns a subset representing the union of all Unicode characters
of the given types.
+ *
+ * @param types The character types, as {@link Character} constants.
+ * @return The subset of Unicode characters of the given type.
+ *
+ * @see Character#LOWERCASE_LETTER
+ * @see Character#UPPERCASE_LETTER
+ * @see Character#DECIMAL_DIGIT_NUMBER
+ * @see Character#SPACE_SEPARATOR
+ */
+ public static Filter forTypes(final byte... types) {
+ long mask = 0;
+ for (int i=0; i<types.length; i++) {
+ final int type = types[i];
+ if (type < 0 || type >= Long.SIZE) {
+ throw new IllegalArgumentException(Errors.format(
+ Errors.Keys.IllegalArgumentValue_2, "types[" + i +
']', type));
+ }
+ mask |= (1L << type);
+ }
+predefined: for (int i=0; ; i++) {
+ final Filter candidate;
+ switch (i) {
+ case 0: candidate = LETTERS_AND_DIGITS; break;
+ case 1: candidate = UNICODE_IDENTIFIER; break;
+ default: break predefined;
+ }
+ if (mask == candidate.types) {
+ return candidate;
+ }
+ }
+ return new Filter("Filter", mask);
+ }
+ }
+
+ /**
+ * Implementation of the {@link Filter#LETTERS_AND_DIGITS} constant.
+ */
+ private static final class LettersAndDigits extends Filter {
+ /**
+ * Creates the {@link Filter#LETTERS_AND_DIGITS} singleton instance.
+ */
+ LettersAndDigits() {
+ super("LETTERS_AND_DIGITS",
+ (1L << Character.LOWERCASE_LETTER)
+ | (1L << Character.UPPERCASE_LETTER)
+ | (1L << Character.TITLECASE_LETTER)
+ | (1L << Character.MODIFIER_LETTER)
+ | (1L << Character.OTHER_LETTER)
+ | (1L << Character.DECIMAL_DIGIT_NUMBER));
+ }
+
+ /**
+ * Returns {@code true} if this subset contains the given Unicode
character.
+ */
+ @Override
+ public boolean contains(final int codePoint) {
+ return Character.isLetterOrDigit(codePoint);
+ }
+ }
+
+ /**
+ * Implementation of the {@link Filter#UNICODE_IDENTIFIER} constant.
+ */
+ private static final class UnicodeIdentifier extends Filter {
+ /**
+ * Creates the {@link Filter#LETTERS_AND_DIGITS} singleton instance.
+ */
+ UnicodeIdentifier() {
+ super("UNICODE_IDENTIFIER",
+ (1L << Character.LOWERCASE_LETTER)
+ | (1L << Character.UPPERCASE_LETTER)
+ | (1L << Character.TITLECASE_LETTER)
+ | (1L << Character.MODIFIER_LETTER)
+ | (1L << Character.OTHER_LETTER)
+ | (1L << Character.DECIMAL_DIGIT_NUMBER)
+ | (1L << Character.LETTER_NUMBER)
+ | (1L << Character.CONNECTOR_PUNCTUATION)
+ | (1L << Character.NON_SPACING_MARK)
+ | (1L << Character.COMBINING_SPACING_MARK));
+ }
+
+ /**
+ * Returns {@code true} if this subset contains the given Unicode
character.
+ */
+ @Override
+ public boolean contains(final int codePoint) {
+ return Character.isUnicodeIdentifierPart(codePoint) &&
+ !Character.isIdentifierIgnorable(codePoint);
+ }
+ }
}
Modified:
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
(original)
+++
sis/branches/JDK7/sis-utility/src/main/java/org/apache/sis/util/type/CodeListFilter.java
Fri Dec 7 08:11:59 2012
@@ -18,6 +18,7 @@ package org.apache.sis.util.type;
import org.opengis.util.CodeList;
import org.apache.sis.util.CharSequences;
+import org.apache.sis.util.Characters.Filter;
/**
@@ -56,12 +57,12 @@ final class CodeListFilter implements Co
}
/**
- * Returns {@code true} if the given code match the the name we are
looking for.
+ * Returns {@code true} if the given code matches the name we are looking
for.
*/
@Override
public boolean accept(final CodeList<?> code) {
for (final String name : code.names()) {
- if (CharSequences.equalsLettersAndDigits(name, codename)) {
+ if (CharSequences.equalsFiltered(name, codename,
Filter.LETTERS_AND_DIGITS, true)) {
return true;
}
}
Modified:
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
(original)
+++
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharSequencesTest.java
Fri Dec 7 08:11:59 2012
@@ -37,7 +37,10 @@ import static org.apache.sis.util.CharSe
* @version 0.3
* @module
*/
-@DependsOn(ArraysTest.class)
+@DependsOn({
+ ArraysTest.class,
+ CharactersTest.class
+})
public final strictfp class CharSequencesTest extends TestCase {
/**
* Tests {@link CharSequences#spaces(int)}.
@@ -312,12 +315,12 @@ public final strictfp class CharSequence
}
/**
- * Tests the {@link CharSequences#isJavaIdentifier(CharSequence)} method.
+ * Tests the {@link CharSequences#isUnicodeIdentifier(CharSequence)}
method.
*/
@Test
- public void testIsJavaIdentifier() {
- assertTrue ("A123", isJavaIdentifier("A123"));
- assertFalse("123A", isJavaIdentifier("123A"));
+ public void testIsUnicodeIdentifier() {
+ assertTrue ("A123", isUnicodeIdentifier("A123"));
+ assertFalse("123A", isUnicodeIdentifier("123A"));
}
/**
@@ -341,13 +344,16 @@ public final strictfp class CharSequence
}
/**
- * Tests the {@link CharSequences#equalsLettersAndDigits(CharSequence,
CharSequence)} method.
+ * Tests the {@link CharSequences#equalsFiltered(CharSequence,
CharSequence, Characters.Filter, boolean)} method.
*/
@Test
- public void testEqualsLettersAndDigits() {
- assertTrue (equalsLettersAndDigits(" UTF-8 ", "utf8"));
- assertTrue (equalsLettersAndDigits("UTF-8", " utf 8"));
- assertFalse(equalsLettersAndDigits("UTF-8", " utf 16"));
+ public void testEqualsFiltered() {
+ assertTrue (equalsFiltered(" UTF-8 ", "utf8",
Characters.Filter.LETTERS_AND_DIGITS, true));
+ assertFalse(equalsFiltered(" UTF-8 ", "utf8",
Characters.Filter.LETTERS_AND_DIGITS, false));
+ assertTrue (equalsFiltered("UTF-8", " utf 8",
Characters.Filter.LETTERS_AND_DIGITS, true));
+ assertFalse(equalsFiltered("UTF-8", " utf 16",
Characters.Filter.LETTERS_AND_DIGITS, true));
+ assertTrue (equalsFiltered("WGS84", "WGS_84",
Characters.Filter.LETTERS_AND_DIGITS, true));
+ assertFalse(equalsFiltered("WGS84", "WGS_84",
Characters.Filter.UNICODE_IDENTIFIER, true));
}
/**
Modified:
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java?rev=1418210&r1=1418209&r2=1418210&view=diff
==============================================================================
---
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java
(original)
+++
sis/branches/JDK7/sis-utility/src/test/java/org/apache/sis/util/CharactersTest.java
Fri Dec 7 08:11:59 2012
@@ -92,4 +92,52 @@ public final strictfp class CharactersTe
assertEquals(c, toNormalScript(c));
assertFalse(isSubScript(c));
}
+
+ /**
+ * Tests the pre-defined {@link Characters.Filter} constants.
+ */
+ @Test
+ public void testPredefinedFilters() {
+ assertTrue (Filter.UNICODE_IDENTIFIER.contains('a'));
+ assertTrue (Filter.LETTERS_AND_DIGITS.contains('a'));
+ assertTrue (Filter.UNICODE_IDENTIFIER.contains('_'));
+ assertFalse(Filter.LETTERS_AND_DIGITS.contains('_'));
+ assertFalse(Filter.UNICODE_IDENTIFIER.contains(' '));
+ assertFalse(Filter.LETTERS_AND_DIGITS.contains(' '));
+ }
+
+ /**
+ * Tests the {@link Characters.Filter#forTypes(byte[])} method.
+ */
+ @Test
+ public void testFilterForTypes() {
+ final Filter filter = Filter.forTypes(Character.SPACE_SEPARATOR,
Character.DECIMAL_DIGIT_NUMBER);
+ assertTrue (filter.contains('0'));
+ assertTrue (filter.contains(' '));
+ assertFalse(filter.contains('A'));
+ }
+
+ /**
+ * Scans the full {@code char}Â range in order to check for {@link
Character.Filter}Â consistency.
+ */
+ @Test
+ public void scanCharacterRange() {
+ for (int c=Character.MIN_VALUE; c<=Character.MAX_VALUE; c++) {
+ final int type = Character.getType(c);
+predefined: for (int i=0; ; i++) {
+ final Characters.Filter filter;
+ switch (i) {
+ case 0: filter = Filter.UNICODE_IDENTIFIER; break;
+ case 1: filter = Filter.LETTERS_AND_DIGITS; break;
+ default: break predefined;
+ }
+ final boolean cc = filter.contains(c);
+ final boolean ct = filter.containsType(type);
+ if (cc != ct) {
+ fail(filter + ".contains('" + (char) c + "') == " + cc + "
but "
+ + filter + ".containsType(" + type + ") == " + ct);
+ }
+ }
+ }
+ }
}