This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git

commit 24f41b3fa5b7f6060acb52e01b419a5908d19570
Author: Andy Seaborne <[email protected]>
AuthorDate: Sat Sep 28 12:02:01 2024 +0100

    GH-2800: ParserProfileStd.doChecking for IRI3986; Separate CheckerJenaIRI
---
 .../java/org/apache/jena/riot/system/Checker.java  | 112 +++++-------------
 .../apache/jena/riot/system/CheckerJenaIRI.java    | 126 +++++++++++++++++++++
 .../apache/jena/riot/system/ParserProfileStd.java  |  41 ++++---
 .../java/org/apache/jena/irix/IRIProviderAny.java  |   2 +-
 .../java/org/apache/jena/irix/TestRFC3986.java     |  14 +--
 5 files changed, 193 insertions(+), 102 deletions(-)

diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java 
b/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java
index 55fdc3a23b..01d7098385 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java
@@ -18,7 +18,6 @@
 
 package org.apache.jena.riot.system;
 
-import java.util.Iterator;
 import java.util.regex.Pattern;
 
 import org.apache.jena.datatypes.RDFDatatype;
@@ -27,12 +26,7 @@ import org.apache.jena.datatypes.xsd.impl.RDFLangString;
 import org.apache.jena.graph.Node;
 import org.apache.jena.graph.Triple;
 import org.apache.jena.iri.IRI;
-import org.apache.jena.iri.IRIComponents;
-import org.apache.jena.iri.Violation;
-import org.apache.jena.irix.IRIProviderJenaIRI;
-import org.apache.jena.irix.IRIs;
-import org.apache.jena.irix.SetupJenaIRI;
-import org.apache.jena.irix.SystemIRIx;
+import org.apache.jena.irix.*;
 import org.apache.jena.sparql.core.Quad;
 import org.apache.jena.util.SplitIRI;
 
@@ -96,78 +90,29 @@ public class Checker {
         return checkIRI(node.getURI(), errorHandler, line, col);
     }
 
-    public static boolean checkIRI(String iriStr) {
-        return checkIRI(iriStr, nullErrorHandler, -1L, -1L);
-    }
+//    public static boolean checkIRI(String iriStr) {
+//        return checkIRI(iriStr, nullErrorHandler, -1L, -1L);
+//    }
 
     /** See also {@link IRIs#reference} */
     public static boolean checkIRI(String iriStr, ErrorHandler errorHandler, 
long line, long col) {
-        IRI iri = SetupJenaIRI.iriCheckerFactory().create(iriStr);
-        boolean b = iriViolations(iri, errorHandler, line, col);
-        return b;
-    }
-
-    /**
-     * Process violations on an IRI Calls the {@link ErrorHandler} on all 
errors and
-     * warnings (as warnings).
-     */
-    public static void iriViolations(IRI iri) {
-        iriViolations(iri, nullErrorHandler, false, true, -1L, -1L);
-    }
-
-    /**
-     * Process violations on an IRI Calls the {@link ErrorHandler} on all 
errors and
-     * warnings (as warnings).
-     */
-    public static boolean iriViolations(IRI iri, ErrorHandler errorHandler, 
long line, long col) {
-        return iriViolations(iri, errorHandler, false, true, line, col);
-    }
-
-    /**
-     * Process violations on an IRI Calls the errorHandler on all errors and 
warnings
-     * (as warning). (If checking for relative IRIs, these are sent out as 
errors.)
-     * Assumes error handler throws exceptions on errors if need be
-     */
-    public static boolean iriViolations(IRI iri, ErrorHandler errorHandler,
-                                        boolean allowRelativeIRIs, boolean 
includeIRIwarnings,
-                                        long line, long col) {
-
-        if ( !allowRelativeIRIs && iri.isRelative() )
-            // Relative IRIs.
-            iriViolationMessage(iri.toString(), true, "Relative IRI: " + iri, 
line, col, errorHandler);
-
-        boolean isOK = true;
-
-        if ( iri.hasViolation(includeIRIwarnings) ) {
-            Iterator<Violation> iter = iri.violations(includeIRIwarnings);
-
-            for ( ; iter.hasNext() ; ) {
-                Violation v = iter.next();
-                int code = v.getViolationCode();
-                boolean isError = v.isError();
-
-                // --- Tune warnings.
-                // IRIProviderJena filters ERRORs and throws an exception on 
error.
-                // It can't add warnings or remove them at that point.
-                // Do WARN filtering here.
-                if ( code == Violation.LOWERCASE_PREFERRED && v.getComponent() 
!= IRIComponents.SCHEME ) {
-                    // Issue warning about the scheme part only. Not e.g. DNS 
names.
-                    continue;
-                }
-
-                // Convert selected violations from ERROR to WARN for output.
-                // There are cases where jena-iri always makes a violation an 
ERROR regardless of SetupJenaIRI
-                // PROHIBITED_COMPONENT_PRESENT
-//                if ( code == Violation.PROHIBITED_COMPONENT_PRESENT )
-//                    isError = false;
-
-                isOK = false;
-                String msg = v.getShortMessage();
-                String iriStr = iri.toString();
-                iriViolationMessage(iriStr, isError, msg, line, col, 
errorHandler);
+        try {
+            IRIx iri = IRIs.reference(iriStr);
+            if ( iri instanceof IRIProviderJenaIRI.IRIxJena jiri ) {
+                IRI jenaIRI = jiri.getImpl();
+                return CheckerJenaIRI.iriViolations(jenaIRI, errorHandler, 
line, col);
             }
+            if ( ! iri.hasViolations() )
+                return true;
+            // IRI errors are errorHandler warnings when checking.
+            iri.handleViolations((isError, message)->{
+                    errorHandler.warning(message, line, col);
+            });
+            return false;
+        } catch (IRIException ex) {
+            errorHandler.warning(ex.getMessage(), line, col);
+            return false;
         }
-        return isOK;
     }
 
     /**
@@ -176,19 +121,24 @@ public class Checker {
      */
     public static void iriViolationMessage(String iriStr, boolean isError, 
String msg, long line, long col, ErrorHandler errorHandler) {
         try {
-            if ( ! ( SystemIRIx.getProvider() instanceof IRIProviderJenaIRI ) )
-                msg = "<" + iriStr + "> : " + msg;
-
+            // The IRI is valid RFC3986 syntax, else it failed earlier.
+            // This code is for scheme violations which do not stop parsing.
             if ( isError ) {
-                // ?? Treat as error, catch exceptions?
                 errorHandler(errorHandler).warning("Bad IRI: " + msg, line, 
col);
             } else
-                errorHandler(errorHandler).warning("Not advised IRI: " + msg, 
line, col);
+                errorHandler(errorHandler).warning("Unwise IRI: " + msg, line, 
col);
         } catch (org.apache.jena.iri.IRIException | 
org.apache.jena.irix.IRIException ex) {}
     }
 
-    // ==== Literals
-
+//    /**
+//     * Process violations on an IRI Calls the {@link ErrorHandler} on all 
errors and
+//     * warnings (as warnings).
+//     */
+//    @Deprecated(forRemoval = true)
+//    public static void iriViolations(IRI iri) {
+//        iriViolations(iri, nullErrorHandler, false, true, -1L, -1L);
+//    }
+//
     final static private Pattern langPattern = 
Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*");
 
     public static boolean checkLiteral(Node node) {
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java 
b/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java
new file mode 100644
index 0000000000..f417b62d0d
--- /dev/null
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.system;
+
+import java.util.Iterator;
+
+import org.apache.jena.iri.IRI;
+import org.apache.jena.iri.IRIComponents;
+import org.apache.jena.iri.Violation;
+import org.apache.jena.irix.IRIProviderJenaIRI;
+import org.apache.jena.irix.IRIs;
+import org.apache.jena.irix.SetupJenaIRI;
+import org.apache.jena.irix.SystemIRIx;
+
+/**
+ * Copy of the Jena 5.1.0 Checker code (jena-iri related)
+ * called from ParserProfieStd.
+ *
+ * */
+class CheckerJenaIRI {
+    /** See also {@link IRIs#reference} */
+    static boolean checkIRI(String iriStr, ErrorHandler errorHandler, long 
line, long col) {
+        IRI iri = SetupJenaIRI.iriCheckerFactory().create(iriStr);
+        boolean b = iriViolations(iri, errorHandler, line, col);
+        return b;
+    }
+
+    /**
+     * Process violations on an IRI Calls the {@link ErrorHandler} on all 
errors and
+     * warnings (as warnings).
+     */
+    static boolean iriViolations(IRI iri, ErrorHandler errorHandler, long 
line, long col) {
+        return iriViolations(iri, errorHandler, false, true, line, col);
+    }
+
+    /**
+     * Process violations on an IRI Calls the errorHandler on all errors and 
warnings
+     * (as warning). (If checking for relative IRIs, these are sent out as 
errors.)
+     * Assumes error handler throws exceptions on errors if need be
+     */
+    static boolean iriViolations(IRI iri, ErrorHandler errorHandler,
+                                        boolean allowRelativeIRIs, boolean 
includeIRIwarnings,
+                                        long line, long col) {
+
+        if ( !allowRelativeIRIs && iri.isRelative() )
+            // Relative IRIs.
+            iriViolationMessage(iri.toString(), true, "Relative IRI: " + iri, 
line, col, errorHandler);
+
+        boolean isOK = true;
+
+        if ( iri.hasViolation(includeIRIwarnings) ) {
+            Iterator<Violation> iter = iri.violations(includeIRIwarnings);
+
+            for ( ; iter.hasNext() ; ) {
+                Violation v = iter.next();
+                int code = v.getViolationCode();
+                boolean isError = v.isError();
+
+                // --- Tune warnings.
+                // IRIProviderJena filters ERRORs and throws an exception on 
error.
+                // It can't add warnings or remove them at that point.
+                // Do WARN filtering here.
+                if ( code == Violation.LOWERCASE_PREFERRED && v.getComponent() 
!= IRIComponents.SCHEME ) {
+                    // Issue warning about the scheme part only. Not e.g. DNS 
names.
+                    continue;
+                }
+
+                isOK = false;
+                String msg = v.getShortMessage();
+                String iriStr = iri.toString();
+                //System.out.println("Warning: "+msg);
+                iriViolationMessage(iriStr, isError, msg, line, col, 
errorHandler);
+            }
+        }
+        return isOK;
+    }
+
+    /**
+     * Common handling messages about IRIs during parsing whether a violation 
or an
+     * IRIException. Prints a warning, with different messages for IRI error 
or warning.
+     */
+    static void iriViolationMessage(String iriStr, boolean isError, String 
msg, long line, long col, ErrorHandler errorHandler) {
+        try {
+            if ( ! ( SystemIRIx.getProvider() instanceof IRIProviderJenaIRI ) )
+                msg = "<" + iriStr + "> : " + msg;
+
+            if ( isError ) {
+                // ?? Treat as error, catch exceptions?
+                errorHandler(errorHandler).warning("Bad IRI: " + msg, line, 
col);
+            } else
+                errorHandler(errorHandler).warning("Not advised IRI: " + msg, 
line, col);
+        } catch (org.apache.jena.iri.IRIException | 
org.apache.jena.irix.IRIException ex) {}
+    }
+
+    private static ErrorHandler errorHandler(ErrorHandler handler) {
+        return handler != null ? handler : ErrorHandlerFactory.errorHandlerStd;
+    }
+
+    // Does nothing. Used in "check(node)" operations where the boolean result 
is key.
+    private static ErrorHandler nullErrorHandler  = new ErrorHandler() {
+        @Override
+        public void warning(String message, long line, long col) {}
+
+        @Override
+        public void error(String message, long line, long col) {}
+
+        @Override
+        public void fatal(String message, long line, long col) {}
+    };
+}
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java 
b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java
index 70b28feba1..c3e43003f8 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java
@@ -112,7 +112,6 @@ public class ParserProfileStd implements ParserProfile {
             errorHandler.error("Relative IRI: " + uriStr, line, col);
             return IRIx.createAny(uriStr);
         } catch (IRIException ex) {
-            // Same code as Checker.iriViolations
             String msg = ex.getMessage();
             Checker.iriViolationMessage(uriStr, true, msg, line, col, 
errorHandler);
             return IRIx.createAny(uriStr);
@@ -120,18 +119,34 @@ public class ParserProfileStd implements ParserProfile {
     }
 
     private void doChecking(IRIx irix, String uriStr, long line, long col) {
-        // Should become ...
-//        irix.handleViolations((isError, message)->{
-//            if ( isError )
-//                errorHandler.error(message, line, col);
-//        });
-
-        IRI iri;
-        if ( irix instanceof IRIProviderJenaIRI.IRIxJena )
-            iri = (IRI)irix.getImpl();
-        else
-            iri = iriCache.get(uriStr, x -> 
SetupJenaIRI.iriCheckerFactory().create(x));
-        Checker.iriViolations(iri, errorHandler, false, true, line, col);
+        // This exists only to give the exact handling of Jena 5.1.0
+        // IRIProviderJenaIRI defines irix.hasViolations() as 
"jenaIRI.hasViolation(false/*no warnings*/)"
+        // CheckerJenaIRI handles jena-iri warnings.
+        if ( irix instanceof IRIProviderJenaIRI.IRIxJena ) {
+            CheckerJenaIRI.checkIRI(uriStr, errorHandler, line, col);
+            return;
+        }
+
+        if ( irix.isRelative() ) {
+            // Relative IRIs.
+            Checker.iriViolationMessage(irix.str(), true, "Relative IRI: " + 
irix.str(), line, col, errorHandler);
+            // And other warnings.
+        }
+
+        if ( ! irix.hasViolations() )
+            return;
+
+        irix.handleViolations((isError, message)->{
+            Checker.iriViolationMessage(uriStr, isError, message, line, col, 
errorHandler);
+        });
+
+        // Jena up to 5.2.0 behaviour: Always jena-iri messages
+//        IRI iri;
+//        if ( irix instanceof IRIProviderJenaIRI.IRIxJena )
+//            iri = (IRI)irix.getImpl();
+//        else
+//            iri = iriCache.get(uriStr, x -> 
SetupJenaIRI.iriCheckerFactory().create(x));
+//        Checker.iriViolations(iri, errorHandler, false, true, line, col);
     }
 
     /**
diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java 
b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java
index f4fd55b903..9f929dc4b9 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java
@@ -29,7 +29,7 @@ import java.util.function.BiConsumer;
 public class IRIProviderAny  implements IRIProvider {
 
     /** The IRIProvider builder does not create this kind of IRIProvider! */
-    public static IRIProviderAny stringProvider() {return new 
IRIProviderAny(); }
+    public static IRIProviderAny stringProvider() { return new 
IRIProviderAny(); }
 
     public IRIProviderAny() {}
 
diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java 
b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
index ee1c6736ef..4e401c1f51 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java
@@ -92,25 +92,25 @@ public class TestRFC3986 extends AbstractTestIRIx {
     // ---- bad
 
     // Leading ':'
-    @Test public void bad_scheme_1() { bad(":segment"); }
+    @Test public void bad_uri_scheme_1() { bad(":segment"); }
 
     // Bad scheme
-    @Test public void bad_scheme_2() { bad("://host/xyz"); }
+    @Test public void bad_uri_scheme_2() { bad("://host/xyz"); }
 
     // Bad scheme
-    @Test public void bad_scheme_3() { bad("1://host/xyz"); }
+    @Test public void bad_uri_scheme_3() { bad("1://host/xyz"); }
 
     // Bad scheme
-    @Test public void bad_scheme_4() { bad("a~b://host/xyz"); }
+    @Test public void bad_uri_scheme_4() { bad("a~b://host/xyz"); }
 
     // Bad scheme
-    @Test public void bad_scheme_5() { bad("aβ://host/xyz"); }
+    @Test public void bad_uri_scheme_5() { bad("aβ://host/xyz"); }
 
     // Bad scheme
-    @Test public void bad_scheme_6() { bad("_:xyz"); }
+    @Test public void bad_uri_scheme_6() { bad("_:xyz"); }
 
     // Bad scheme
-    @Test public void bad_scheme_7() { bad("a_b:xyz"); }
+    @Test public void bad_uri_scheme_7() { bad("a_b:xyz"); }
 
     // Space!
     @Test public void bad_chars_1() { bad("http://abcdef:80/xyz /abc"); }

Reply via email to