afs commented on code in PR #2595: URL: https://github.com/apache/jena/pull/2595#discussion_r1701584534
########## jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java: ########## @@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String localname) { for ( int idx = 0 ; idx < localname.length() ; idx++ ) { char ch = localname.charAt(idx); - if ( !validPNameChar(ch) ) - return false; + if ( idx == 0 ) { + if ( !validFirstPNameChar(ch) ) + return false; + } else if ( idx == localname.length() - 1 ) { + if ( !validEndPNameChar(ch) ) + return false; + } else { + if ( !validMidPNameChar(ch) ) + return false; + } } + return true; + } - // Test start and end - at least one character in the name. + // first char as per options in first bracketed production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validFirstPNameChar(char ch) { + return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == '%' ) ; + } - if ( localname.endsWith(".") ) - return false; - if ( localname.startsWith(".") ) - return false; + // neither first char nor last in middle repeated production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validMidPNameChar(char ch) { + return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == '%' ) ; + } - return true; + // last char, but not first, in last production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validEndPNameChar(char ch) { + return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ; + } + + // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE Review Comment: There are functions to test for the token rules in `RiotChars`. This one is `RiotChars.isPNCharsBase`. ########## jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java: ########## @@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String localname) { for ( int idx = 0 ; idx < localname.length() ; idx++ ) { char ch = localname.charAt(idx); - if ( !validPNameChar(ch) ) - return false; + if ( idx == 0 ) { + if ( !validFirstPNameChar(ch) ) + return false; + } else if ( idx == localname.length() - 1 ) { + if ( !validEndPNameChar(ch) ) + return false; + } else { + if ( !validMidPNameChar(ch) ) + return false; + } } + return true; + } - // Test start and end - at least one character in the name. + // first char as per options in first bracketed production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validFirstPNameChar(char ch) { + return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == '%' ) ; + } - if ( localname.endsWith(".") ) - return false; - if ( localname.startsWith(".") ) - return false; + // neither first char nor last in middle repeated production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validMidPNameChar(char ch) { + return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == '%' ) ; + } - return true; + // last char, but not first, in last production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validEndPNameChar(char ch) { + return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ; + } + + // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE + private static boolean validPNameCharBase(char ch) { + return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '\u00C0' && ch <= '\u00D6') || + (ch >= '\u00D8' && ch <= '\u00F6') || (ch >= '\u00F8' && ch <= '\u02FF') || + (ch >= '\u0370' && ch <= '\u037D') || (ch >= '\u037F' && ch <= '\u1FFF') || + (ch >= '\u200C' && ch <= '\u200D') || (ch >= '\u2070' && ch <= '\u218F') || + (ch >= '\u2C00' && ch <= '\u2FEF') || (ch >= '\u3001' && ch <= '\uD7FF') || + (ch >= '\uF900' && ch <= '\uFDCF') || (ch >= '\uFDF0' && ch <= '\uFFFD'); + // 32bit unicode chars are 2 Java chars ( ch >= '\u10000' && ch <= '\uEFFFF' ) + } + + // production 165: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_U + private static boolean validPNameCharU(char ch) { + return validPNameCharBase(ch) || ( ch == '_' ) ; } + // production 167: https://www.w3.org/TR/sparql11-query/#rPN_CHARS private static boolean validPNameChar(char ch) { - if ( Character.isLetterOrDigit(ch) ) return true ; - if ( ch == '.' ) return true ; - if ( ch == ':' ) return true ; - if ( ch == '-' ) return true ; - if ( ch == '_' ) return true ; - return false ; + return validPNameCharU(ch) || Review Comment: This is `RiotChars.isPNChars`. ########## jena-arq/src/main/java/org/apache/jena/sparql/util/FmtUtils.java: ########## @@ -477,27 +477,56 @@ private static boolean checkValidLocalname(String localname) { for ( int idx = 0 ; idx < localname.length() ; idx++ ) { char ch = localname.charAt(idx); - if ( !validPNameChar(ch) ) - return false; + if ( idx == 0 ) { + if ( !validFirstPNameChar(ch) ) + return false; + } else if ( idx == localname.length() - 1 ) { + if ( !validEndPNameChar(ch) ) + return false; + } else { + if ( !validMidPNameChar(ch) ) + return false; + } } + return true; + } - // Test start and end - at least one character in the name. + // first char as per options in first bracketed production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validFirstPNameChar(char ch) { + return validPNameCharU(ch) || ( ch >= '0' && ch <= '9' ) || ( ch == '%' ) ; + } - if ( localname.endsWith(".") ) - return false; - if ( localname.startsWith(".") ) - return false; + // neither first char nor last in middle repeated production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validMidPNameChar(char ch) { + return validPNameChar(ch) || ( ch == '.' ) || ( ch == ':' ) || ( ch == '%' ) ; + } - return true; + // last char, but not first, in last production in 169: https://www.w3.org/TR/sparql11-query/#rPN_LOCAL + private static boolean validEndPNameChar(char ch) { + return validPNameChar(ch) || ( ch == ':' ) || ( ch == '%' ) ; + } + + // production 164: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_BASE + private static boolean validPNameCharBase(char ch) { + return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '\u00C0' && ch <= '\u00D6') || + (ch >= '\u00D8' && ch <= '\u00F6') || (ch >= '\u00F8' && ch <= '\u02FF') || + (ch >= '\u0370' && ch <= '\u037D') || (ch >= '\u037F' && ch <= '\u1FFF') || + (ch >= '\u200C' && ch <= '\u200D') || (ch >= '\u2070' && ch <= '\u218F') || + (ch >= '\u2C00' && ch <= '\u2FEF') || (ch >= '\u3001' && ch <= '\uD7FF') || + (ch >= '\uF900' && ch <= '\uFDCF') || (ch >= '\uFDF0' && ch <= '\uFFFD'); + // 32bit unicode chars are 2 Java chars ( ch >= '\u10000' && ch <= '\uEFFFF' ) + } + + // production 165: https://www.w3.org/TR/sparql11-query/#rPN_CHARS_U + private static boolean validPNameCharU(char ch) { + return validPNameCharBase(ch) || ( ch == '_' ) ; Review Comment: This is `RiotChars.isPNChars_U`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org For additional commands, e-mail: pr-h...@jena.apache.org