This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 24f41b3fa5b7f6060acb52e01b419a5908d19570 Author: Andy Seaborne <[email protected]> AuthorDate: Sat Sep 28 12:02:01 2024 +0100 GH-2800: ParserProfileStd.doChecking for IRI3986; Separate CheckerJenaIRI --- .../java/org/apache/jena/riot/system/Checker.java | 112 +++++------------- .../apache/jena/riot/system/CheckerJenaIRI.java | 126 +++++++++++++++++++++ .../apache/jena/riot/system/ParserProfileStd.java | 41 ++++--- .../java/org/apache/jena/irix/IRIProviderAny.java | 2 +- .../java/org/apache/jena/irix/TestRFC3986.java | 14 +-- 5 files changed, 193 insertions(+), 102 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java b/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java index 55fdc3a23b..01d7098385 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java @@ -18,7 +18,6 @@ package org.apache.jena.riot.system; -import java.util.Iterator; import java.util.regex.Pattern; import org.apache.jena.datatypes.RDFDatatype; @@ -27,12 +26,7 @@ import org.apache.jena.datatypes.xsd.impl.RDFLangString; import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; import org.apache.jena.iri.IRI; -import org.apache.jena.iri.IRIComponents; -import org.apache.jena.iri.Violation; -import org.apache.jena.irix.IRIProviderJenaIRI; -import org.apache.jena.irix.IRIs; -import org.apache.jena.irix.SetupJenaIRI; -import org.apache.jena.irix.SystemIRIx; +import org.apache.jena.irix.*; import org.apache.jena.sparql.core.Quad; import org.apache.jena.util.SplitIRI; @@ -96,78 +90,29 @@ public class Checker { return checkIRI(node.getURI(), errorHandler, line, col); } - public static boolean checkIRI(String iriStr) { - return checkIRI(iriStr, nullErrorHandler, -1L, -1L); - } +// public static boolean checkIRI(String iriStr) { +// return checkIRI(iriStr, nullErrorHandler, -1L, -1L); +// } /** See also {@link IRIs#reference} */ public static boolean checkIRI(String iriStr, ErrorHandler errorHandler, long line, long col) { - IRI iri = SetupJenaIRI.iriCheckerFactory().create(iriStr); - boolean b = iriViolations(iri, errorHandler, line, col); - return b; - } - - /** - * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and - * warnings (as warnings). - */ - public static void iriViolations(IRI iri) { - iriViolations(iri, nullErrorHandler, false, true, -1L, -1L); - } - - /** - * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and - * warnings (as warnings). - */ - public static boolean iriViolations(IRI iri, ErrorHandler errorHandler, long line, long col) { - return iriViolations(iri, errorHandler, false, true, line, col); - } - - /** - * Process violations on an IRI Calls the errorHandler on all errors and warnings - * (as warning). (If checking for relative IRIs, these are sent out as errors.) - * Assumes error handler throws exceptions on errors if need be - */ - public static boolean iriViolations(IRI iri, ErrorHandler errorHandler, - boolean allowRelativeIRIs, boolean includeIRIwarnings, - long line, long col) { - - if ( !allowRelativeIRIs && iri.isRelative() ) - // Relative IRIs. - iriViolationMessage(iri.toString(), true, "Relative IRI: " + iri, line, col, errorHandler); - - boolean isOK = true; - - if ( iri.hasViolation(includeIRIwarnings) ) { - Iterator<Violation> iter = iri.violations(includeIRIwarnings); - - for ( ; iter.hasNext() ; ) { - Violation v = iter.next(); - int code = v.getViolationCode(); - boolean isError = v.isError(); - - // --- Tune warnings. - // IRIProviderJena filters ERRORs and throws an exception on error. - // It can't add warnings or remove them at that point. - // Do WARN filtering here. - if ( code == Violation.LOWERCASE_PREFERRED && v.getComponent() != IRIComponents.SCHEME ) { - // Issue warning about the scheme part only. Not e.g. DNS names. - continue; - } - - // Convert selected violations from ERROR to WARN for output. - // There are cases where jena-iri always makes a violation an ERROR regardless of SetupJenaIRI - // PROHIBITED_COMPONENT_PRESENT -// if ( code == Violation.PROHIBITED_COMPONENT_PRESENT ) -// isError = false; - - isOK = false; - String msg = v.getShortMessage(); - String iriStr = iri.toString(); - iriViolationMessage(iriStr, isError, msg, line, col, errorHandler); + try { + IRIx iri = IRIs.reference(iriStr); + if ( iri instanceof IRIProviderJenaIRI.IRIxJena jiri ) { + IRI jenaIRI = jiri.getImpl(); + return CheckerJenaIRI.iriViolations(jenaIRI, errorHandler, line, col); } + if ( ! iri.hasViolations() ) + return true; + // IRI errors are errorHandler warnings when checking. + iri.handleViolations((isError, message)->{ + errorHandler.warning(message, line, col); + }); + return false; + } catch (IRIException ex) { + errorHandler.warning(ex.getMessage(), line, col); + return false; } - return isOK; } /** @@ -176,19 +121,24 @@ public class Checker { */ public static void iriViolationMessage(String iriStr, boolean isError, String msg, long line, long col, ErrorHandler errorHandler) { try { - if ( ! ( SystemIRIx.getProvider() instanceof IRIProviderJenaIRI ) ) - msg = "<" + iriStr + "> : " + msg; - + // The IRI is valid RFC3986 syntax, else it failed earlier. + // This code is for scheme violations which do not stop parsing. if ( isError ) { - // ?? Treat as error, catch exceptions? errorHandler(errorHandler).warning("Bad IRI: " + msg, line, col); } else - errorHandler(errorHandler).warning("Not advised IRI: " + msg, line, col); + errorHandler(errorHandler).warning("Unwise IRI: " + msg, line, col); } catch (org.apache.jena.iri.IRIException | org.apache.jena.irix.IRIException ex) {} } - // ==== Literals - +// /** +// * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and +// * warnings (as warnings). +// */ +// @Deprecated(forRemoval = true) +// public static void iriViolations(IRI iri) { +// iriViolations(iri, nullErrorHandler, false, true, -1L, -1L); +// } +// final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*"); public static boolean checkLiteral(Node node) { diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java b/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java new file mode 100644 index 0000000000..f417b62d0d --- /dev/null +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/CheckerJenaIRI.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.riot.system; + +import java.util.Iterator; + +import org.apache.jena.iri.IRI; +import org.apache.jena.iri.IRIComponents; +import org.apache.jena.iri.Violation; +import org.apache.jena.irix.IRIProviderJenaIRI; +import org.apache.jena.irix.IRIs; +import org.apache.jena.irix.SetupJenaIRI; +import org.apache.jena.irix.SystemIRIx; + +/** + * Copy of the Jena 5.1.0 Checker code (jena-iri related) + * called from ParserProfieStd. + * + * */ +class CheckerJenaIRI { + /** See also {@link IRIs#reference} */ + static boolean checkIRI(String iriStr, ErrorHandler errorHandler, long line, long col) { + IRI iri = SetupJenaIRI.iriCheckerFactory().create(iriStr); + boolean b = iriViolations(iri, errorHandler, line, col); + return b; + } + + /** + * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and + * warnings (as warnings). + */ + static boolean iriViolations(IRI iri, ErrorHandler errorHandler, long line, long col) { + return iriViolations(iri, errorHandler, false, true, line, col); + } + + /** + * Process violations on an IRI Calls the errorHandler on all errors and warnings + * (as warning). (If checking for relative IRIs, these are sent out as errors.) + * Assumes error handler throws exceptions on errors if need be + */ + static boolean iriViolations(IRI iri, ErrorHandler errorHandler, + boolean allowRelativeIRIs, boolean includeIRIwarnings, + long line, long col) { + + if ( !allowRelativeIRIs && iri.isRelative() ) + // Relative IRIs. + iriViolationMessage(iri.toString(), true, "Relative IRI: " + iri, line, col, errorHandler); + + boolean isOK = true; + + if ( iri.hasViolation(includeIRIwarnings) ) { + Iterator<Violation> iter = iri.violations(includeIRIwarnings); + + for ( ; iter.hasNext() ; ) { + Violation v = iter.next(); + int code = v.getViolationCode(); + boolean isError = v.isError(); + + // --- Tune warnings. + // IRIProviderJena filters ERRORs and throws an exception on error. + // It can't add warnings or remove them at that point. + // Do WARN filtering here. + if ( code == Violation.LOWERCASE_PREFERRED && v.getComponent() != IRIComponents.SCHEME ) { + // Issue warning about the scheme part only. Not e.g. DNS names. + continue; + } + + isOK = false; + String msg = v.getShortMessage(); + String iriStr = iri.toString(); + //System.out.println("Warning: "+msg); + iriViolationMessage(iriStr, isError, msg, line, col, errorHandler); + } + } + return isOK; + } + + /** + * Common handling messages about IRIs during parsing whether a violation or an + * IRIException. Prints a warning, with different messages for IRI error or warning. + */ + static void iriViolationMessage(String iriStr, boolean isError, String msg, long line, long col, ErrorHandler errorHandler) { + try { + if ( ! ( SystemIRIx.getProvider() instanceof IRIProviderJenaIRI ) ) + msg = "<" + iriStr + "> : " + msg; + + if ( isError ) { + // ?? Treat as error, catch exceptions? + errorHandler(errorHandler).warning("Bad IRI: " + msg, line, col); + } else + errorHandler(errorHandler).warning("Not advised IRI: " + msg, line, col); + } catch (org.apache.jena.iri.IRIException | org.apache.jena.irix.IRIException ex) {} + } + + private static ErrorHandler errorHandler(ErrorHandler handler) { + return handler != null ? handler : ErrorHandlerFactory.errorHandlerStd; + } + + // Does nothing. Used in "check(node)" operations where the boolean result is key. + private static ErrorHandler nullErrorHandler = new ErrorHandler() { + @Override + public void warning(String message, long line, long col) {} + + @Override + public void error(String message, long line, long col) {} + + @Override + public void fatal(String message, long line, long col) {} + }; +} diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java index 70b28feba1..c3e43003f8 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileStd.java @@ -112,7 +112,6 @@ public class ParserProfileStd implements ParserProfile { errorHandler.error("Relative IRI: " + uriStr, line, col); return IRIx.createAny(uriStr); } catch (IRIException ex) { - // Same code as Checker.iriViolations String msg = ex.getMessage(); Checker.iriViolationMessage(uriStr, true, msg, line, col, errorHandler); return IRIx.createAny(uriStr); @@ -120,18 +119,34 @@ public class ParserProfileStd implements ParserProfile { } private void doChecking(IRIx irix, String uriStr, long line, long col) { - // Should become ... -// irix.handleViolations((isError, message)->{ -// if ( isError ) -// errorHandler.error(message, line, col); -// }); - - IRI iri; - if ( irix instanceof IRIProviderJenaIRI.IRIxJena ) - iri = (IRI)irix.getImpl(); - else - iri = iriCache.get(uriStr, x -> SetupJenaIRI.iriCheckerFactory().create(x)); - Checker.iriViolations(iri, errorHandler, false, true, line, col); + // This exists only to give the exact handling of Jena 5.1.0 + // IRIProviderJenaIRI defines irix.hasViolations() as "jenaIRI.hasViolation(false/*no warnings*/)" + // CheckerJenaIRI handles jena-iri warnings. + if ( irix instanceof IRIProviderJenaIRI.IRIxJena ) { + CheckerJenaIRI.checkIRI(uriStr, errorHandler, line, col); + return; + } + + if ( irix.isRelative() ) { + // Relative IRIs. + Checker.iriViolationMessage(irix.str(), true, "Relative IRI: " + irix.str(), line, col, errorHandler); + // And other warnings. + } + + if ( ! irix.hasViolations() ) + return; + + irix.handleViolations((isError, message)->{ + Checker.iriViolationMessage(uriStr, isError, message, line, col, errorHandler); + }); + + // Jena up to 5.2.0 behaviour: Always jena-iri messages +// IRI iri; +// if ( irix instanceof IRIProviderJenaIRI.IRIxJena ) +// iri = (IRI)irix.getImpl(); +// else +// iri = iriCache.get(uriStr, x -> SetupJenaIRI.iriCheckerFactory().create(x)); +// Checker.iriViolations(iri, errorHandler, false, true, line, col); } /** diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java index f4fd55b903..9f929dc4b9 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java +++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderAny.java @@ -29,7 +29,7 @@ import java.util.function.BiConsumer; public class IRIProviderAny implements IRIProvider { /** The IRIProvider builder does not create this kind of IRIProvider! */ - public static IRIProviderAny stringProvider() {return new IRIProviderAny(); } + public static IRIProviderAny stringProvider() { return new IRIProviderAny(); } public IRIProviderAny() {} diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java index ee1c6736ef..4e401c1f51 100644 --- a/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java +++ b/jena-core/src/test/java/org/apache/jena/irix/TestRFC3986.java @@ -92,25 +92,25 @@ public class TestRFC3986 extends AbstractTestIRIx { // ---- bad // Leading ':' - @Test public void bad_scheme_1() { bad(":segment"); } + @Test public void bad_uri_scheme_1() { bad(":segment"); } // Bad scheme - @Test public void bad_scheme_2() { bad("://host/xyz"); } + @Test public void bad_uri_scheme_2() { bad("://host/xyz"); } // Bad scheme - @Test public void bad_scheme_3() { bad("1://host/xyz"); } + @Test public void bad_uri_scheme_3() { bad("1://host/xyz"); } // Bad scheme - @Test public void bad_scheme_4() { bad("a~b://host/xyz"); } + @Test public void bad_uri_scheme_4() { bad("a~b://host/xyz"); } // Bad scheme - @Test public void bad_scheme_5() { bad("aβ://host/xyz"); } + @Test public void bad_uri_scheme_5() { bad("aβ://host/xyz"); } // Bad scheme - @Test public void bad_scheme_6() { bad("_:xyz"); } + @Test public void bad_uri_scheme_6() { bad("_:xyz"); } // Bad scheme - @Test public void bad_scheme_7() { bad("a_b:xyz"); } + @Test public void bad_uri_scheme_7() { bad("a_b:xyz"); } // Space! @Test public void bad_chars_1() { bad("http://abcdef:80/xyz /abc"); }
