afs commented on code in PR #2595:
URL: https://github.com/apache/jena/pull/2595#discussion_r1701584534


##########
jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java:
##########
@@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String 
localname) {
 
         for ( int idx = 0 ; idx < localname.length() ; idx++ ) {
             char ch = localname.charAt(idx);
-            if ( !validPNameChar(ch) )
-                return false;
+            if ( idx == 0 ) {
+                if ( !validFirstPNameChar(ch) )
+                    return false;
+            } else if ( idx == localname.length() - 1 ) {
+                if ( !validEndPNameChar(ch) )
+                    return false;
+            } else {
+                if ( !validMidPNameChar(ch) )
+                    return false;
+            }
         }
+        return true;
+    }
 
-        // Test start and end - at least one character in the name.
+    // first char as per options in first bracketed production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validFirstPNameChar(char ch) {
+        return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == 
'%' ) ;
+    }
 
-        if ( localname.endsWith(".") )
-            return false;
-        if ( localname.startsWith(".") )
-            return false;
+    // neither first char nor last in middle repeated production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validMidPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == 
'%' ) ;
+    }
 
-        return true;
+    // last char, but not first, in last production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validEndPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ;
+    }
+
+    // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE

Review Comment:
   There are functions to test for the token rules in `RiotChars`.
   
   This one is `RiotChars.isPNCharsBase`.



##########
jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java:
##########
@@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String 
localname) {
 
         for ( int idx = 0 ; idx < localname.length() ; idx++ ) {
             char ch = localname.charAt(idx);
-            if ( !validPNameChar(ch) )
-                return false;
+            if ( idx == 0 ) {
+                if ( !validFirstPNameChar(ch) )
+                    return false;
+            } else if ( idx == localname.length() - 1 ) {
+                if ( !validEndPNameChar(ch) )
+                    return false;
+            } else {
+                if ( !validMidPNameChar(ch) )
+                    return false;
+            }
         }
+        return true;
+    }
 
-        // Test start and end - at least one character in the name.
+    // first char as per options in first bracketed production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validFirstPNameChar(char ch) {
+        return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == 
'%' ) ;
+    }
 
-        if ( localname.endsWith(".") )
-            return false;
-        if ( localname.startsWith(".") )
-            return false;
+    // neither first char nor last in middle repeated production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validMidPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == 
'%' ) ;
+    }
 
-        return true;
+    // last char, but not first, in last production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validEndPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ;
+    }
+
+    // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE
+    private static boolean validPNameCharBase(char ch) {
+        return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= 
'\u00C0' && ch <= '\u00D6') ||
+                (ch >= '\u00D8' && ch <= '\u00F6') || (ch >= '\u00F8' && ch <= 
'\u02FF') ||
+                (ch >= '\u0370' && ch <= '\u037D') || (ch >= '\u037F' && ch <= 
'\u1FFF') ||
+                (ch >= '\u200C' && ch <= '\u200D') || (ch >= '\u2070' && ch <= 
'\u218F') ||
+                (ch >= '\u2C00' && ch <= '\u2FEF') || (ch >= '\u3001' && ch <= 
'\uD7FF') ||
+                (ch >= '\uF900' && ch <= '\uFDCF') || (ch >= '\uFDF0' && ch <= 
'\uFFFD');
+        // 32bit unicode chars are 2 Java chars ( ch >= '\u10000' && ch <= 
'\uEFFFF' )
+    }
+
+    // production 165: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_U
+    private static boolean validPNameCharU(char ch) {
+        return validPNameCharBase(ch) || ( ch == '_' ) ;
     }
 
+    // production 167: https://www.w3.org/TR/sparql11-query/#rPN_CHARS
     private static boolean validPNameChar(char ch) {
-        if ( Character.isLetterOrDigit(ch) ) return true ;
-        if ( ch == '.' )    return true ;
-        if ( ch == ':' )    return true ;
-        if ( ch == '-' )    return true ;
-        if ( ch == '_' )    return true ;
-        return false ;
+        return validPNameCharU(ch) ||

Review Comment:
   This is `RiotChars.isPNChars`.



##########
jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java:
##########
@@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String 
localname) {
 
         for ( int idx = 0 ; idx < localname.length() ; idx++ ) {
             char ch = localname.charAt(idx);
-            if ( !validPNameChar(ch) )
-                return false;
+            if ( idx == 0 ) {
+                if ( !validFirstPNameChar(ch) )
+                    return false;
+            } else if ( idx == localname.length() - 1 ) {
+                if ( !validEndPNameChar(ch) )
+                    return false;
+            } else {
+                if ( !validMidPNameChar(ch) )
+                    return false;
+            }
         }
+        return true;
+    }
 
-        // Test start and end - at least one character in the name.
+    // first char as per options in first bracketed production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validFirstPNameChar(char ch) {
+        return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == 
'%' ) ;
+    }
 
-        if ( localname.endsWith(".") )
-            return false;
-        if ( localname.startsWith(".") )
-            return false;
+    // neither first char nor last in middle repeated production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validMidPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == 
'%' ) ;
+    }
 
-        return true;
+    // last char, but not first, in last production in 169: 
https://www.w3.org/TR/sparql11-query/#rPN_LOCAL
+    private static boolean validEndPNameChar(char ch) {
+        return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ;
+    }
+
+    // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE
+    private static boolean validPNameCharBase(char ch) {
+        return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= 
'\u00C0' && ch <= '\u00D6') ||
+                (ch >= '\u00D8' && ch <= '\u00F6') || (ch >= '\u00F8' && ch <= 
'\u02FF') ||
+                (ch >= '\u0370' && ch <= '\u037D') || (ch >= '\u037F' && ch <= 
'\u1FFF') ||
+                (ch >= '\u200C' && ch <= '\u200D') || (ch >= '\u2070' && ch <= 
'\u218F') ||
+                (ch >= '\u2C00' && ch <= '\u2FEF') || (ch >= '\u3001' && ch <= 
'\uD7FF') ||
+                (ch >= '\uF900' && ch <= '\uFDCF') || (ch >= '\uFDF0' && ch <= 
'\uFFFD');
+        // 32bit unicode chars are 2 Java chars ( ch >= '\u10000' && ch <= 
'\uEFFFF' )
+    }
+
+    // production 165: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_U
+    private static boolean validPNameCharU(char ch) {
+        return validPNameCharBase(ch) || ( ch == '_' ) ;

Review Comment:
   This is `RiotChars.isPNChars_U`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org
For additional commands, e-mail: pr-h...@jena.apache.org

Reply via email to