This character lies in the CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A block.
Added extensions detection, I assume (not really knowing) that all of these
characters are not phonetic as well.
import java.lang.Character.UnicodeBlock;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;
public class DetectCJK {
Set<UnicodeBlock> cjkUnicodeBlocks = new HashSet<UnicodeBlock>(
Arrays.asList(new Character.UnicodeBlock[] {
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
// C and D were added in Java7 - I'm using Java6
//,Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
//Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
}));
@Test
public void test1() {
Assert.assertEquals(Character.UnicodeBlock.BASIC_LATIN,
Character.UnicodeBlock.of('a'));
Assert.assertEquals(Character.UnicodeBlock.HEBREW,
Character.UnicodeBlock.of('א'));
assertCJK('電', "Traditional Chinese: Electricity");
assertCJK('电', "Simplified Chinese: Electricity");
assertCJK('電', "Simplified Chinese: Japanese");
assertCJK('㒨', "in CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
}
private void assertCJK(Character character, String message) {
UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(character);
Assert.assertTrue(message, cjkUnicodeBlocks.contains(unicodeBlock));
}
}
On Mon, Mar 11, 2013 at 12:10 AM, Trejkaz <[email protected]> wrote:
> On Sun, Mar 10, 2013 at 8:19 PM, Gili Nachum <[email protected]> wrote:
> > Answering myself for next generations' sake.
> > Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS does the job.
>
> How about 㒨?
>
> TX
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
>
>