Is there a reason that Jena does not support the full range of XML name
start characters?

see https://www.w3.org/TR/xml/#NT-NameStartChar

I wrote a quick test and found that there were a number of characters that
Jena does not support.
Miscategorization appears to start at 0x132.  There are 936990
miscategorized characters.

The issue is actually in the Xerces util class XMLChar

Is this because of the version of Xerces we are stuck with?  Is there a way
around this issue?

Claude

p.s. Since I can't attach a file, here is the test code I wrote.

import static org.junit.Assert.assertTrue;

import org.apache.xerces.util.XMLChar;
import org.junit.Test;

public class NameTest {
    /*
     * NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] |
[#xD8-#xF6] |
     * [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] |
     * [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
[#xF900-#xFDCF] |
     * [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
     */

    int[][] ranges = { { ':', ':' }, { 'A', 'Z' }, { '_', '_' }, { 0xC0,
0xD6 }, { 0xD8, 0xF6 }, { 0xF8, 0x2FF },
            { 0x370, 0x37D }, { 0x37F, 0x1FFF }, { 0x200C, 0x200D }, {
0x2070, 0x218F }, { 0x2C00, 0x2FEF },
            { 0x3001, 0xD7FF }, { 0xF900, 0xFDCF }, { 0xFDF0, 0xFFFD }, {
0x10000, 0xEFFFF } };

    @Test
    public void testNameStart() {

        for (int[] range : ranges) {
            for (int c = range[0]; c <= range[1]; c++) {
                assertTrue( String.format( "character %s
0x%s",c,Integer.toHexString( c )) , XMLChar.isNameStart( c ) );
            }
        }

    }

    @Test
    public void listNameStartErr() {
        int cnt = 0;
        for (int[] range : ranges) {
            for (int c = range[0]; c <= range[1]; c++) {
                if (!XMLChar.isNameStart( c ))
                {
                    System.out.print( String.format( "0x%s
",Integer.toHexString( c )) );
                    cnt++;
                    if (cnt % 25 == 0)
                    {
                        System.out.println();
                    }

                }

            }
        }
        System.out.println();
        System.out.println( cnt+" characters miscategorized"  );
    }

}


-- 
I like: Like Like - The likeliest place on the web
<http://like-like.xenei.com>
LinkedIn: http://www.linkedin.com/in/claudewarren

Reply via email to