This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git

commit f3dcf44c6a909aafbdd97c7044961ad985623387
Author: Andy Seaborne <[email protected]>
AuthorDate: Thu Nov 7 19:29:31 2024 +0000

    IRI3986: One impl of percentCheck; fix percent error message
---
 .../java/org/apache/jena/rfc3986/Chars3986.java    | 26 ++++++++++++++--------
 .../main/java/org/apache/jena/rfc3986/IRI3986.java | 12 +---------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Chars3986.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Chars3986.java
index 9dafbe189e..c43dea406f 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Chars3986.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/Chars3986.java
@@ -37,8 +37,11 @@ public class Chars3986 {
     //  ipchar        = iunreserved / pct-encoded / sub-delims / ":" / "@"
     //                = ipchar / ucschar
 
-    /** RFC3986 pchar */
-    public static boolean isPChar(char ch, String str, int posn) {
+    /**
+     * Test whether {@code ch} is an RFC3986 pchar.
+     * This may need to look ahead in the string if the character is a {@code 
%}.
+     */
+    public static boolean isPChar(char ch, CharSequence str, int posn) {
         return unreserved(ch) || isPctEncoded(ch, str, posn) || subDelims(ch) 
|| ch == ':' || ch == '@';
     }
 
@@ -60,7 +63,7 @@ public class Chars3986 {
             return false;
         char ch1 = charAt(s, x+1);
         char ch2 = charAt(s, x+2);
-        return percentCheck(x, ch1, ch2);
+        return percentCheck(ch1, ch2, s, x);
     }
 
     public static boolean isAlpha(char ch) {
@@ -173,17 +176,22 @@ public class Chars3986 {
 
     /** Return a display string for a character suitable for error messages. */
     public static String displayChar(char ch) {
-        return String.format("%c (0x%04X)", ch, (int)ch);
+        return String.format("%c(U+%04X)", ch, (int)ch);
     }
 
-    private static boolean percentCheck(int idx, char ch1, char ch2) {
+    /**
+     * Check whether {@code ch1} and {@code ch2} are percent-encoding hex 
characters.
+     */
+    public static boolean percentCheck(char ch1, char ch2, CharSequence 
source, int idx) {
         if ( ch1 == EOF || ch2 == EOF ) {
-            parseError(null, idx+1, "Incomplete %-encoded character");
+            parseError(source.toString(), idx+1, "Incomplete %-encoded 
character");
             return false;
         }
+        // Any case.
+
         if ( isHexDigit(ch1) && isHexDigit(ch2) )
             return true;
-        parseError(null, idx+1, "Bad %-encoded character ["+displayChar(ch1)+" 
"+displayChar(ch2)+"]");
+        parseError(source.toString(), idx+1, "Bad %-encoded character 
["+displayChar(ch1)+" "+displayChar(ch2)+"]");
         return false;
     }
 
@@ -229,7 +237,7 @@ public class Chars3986 {
     }
 
     // How to handle parse errors (percent encoding).
-    private static void parseError(String string, int posn, String msg) {
-        throw ParseErrorIRI3986.parseError(string, posn, msg);
+    private static void parseError(String source, int posn, String msg) {
+        throw ParseErrorIRI3986.parseError(source, posn, msg);
     }
 }
diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
index 973f903c73..1f9c924fa4 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
@@ -1374,17 +1374,7 @@ public class IRI3986 implements IRI {
             return false;
         char ch1 = charAt(idx + 1);
         char ch2 = charAt(idx + 2);
-        return percentCheck(idx, ch1, ch2);
-    }
-
-    private boolean percentCheck(int idx, char ch1, char ch2) {
-        if ( ch1 == EOF || ch2 == EOF ) {
-            throw parseError(iriStr, idx + 1, "Incomplete %-encoded 
character");
-        }
-        // Any case.
-        if ( Chars3986.isHexDigit(ch1) && Chars3986.isHexDigit(ch2) )
-            return true;
-        throw parseError(iriStr, idx + 1, "Bad %-encoded character [" + 
displayChar(ch1) + " " + displayChar(ch2) + "]");
+        return Chars3986.percentCheck(ch1, ch2, iriStr, idx);
     }
 
     // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"

Reply via email to