This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit ffa6d76d851aa706886c1033fb1526dc5fda0e30 Author: Andy Seaborne <[email protected]> AuthorDate: Fri Jan 10 18:27:29 2025 +0000 GH-2936: Update IRI commands for jena-iri3986 --- jena-cmds/src/main/java/arq/iri.java | 138 +++++++++++++++++++-- .../jena/iri3986/provider/IRIProvider3986.java | 3 +- .../main/java/org/apache/jena/rfc3986/IRI3986.java | 15 ++- .../java/org/apache/jena/rfc3986/ParseURN.java | 11 +- .../org/apache/jena/rfc3986/cmd/CmdRelative.java | 4 +- .../org/apache/jena/rfc3986/cmd/CmdResolve.java | 6 +- .../java/org/apache/jena/rfc3986/TestParseURN.java | 24 ++-- .../org/apache/jena/rfc3986/TestURISchemes.java | 26 ++-- 8 files changed, 174 insertions(+), 53 deletions(-) diff --git a/jena-cmds/src/main/java/arq/iri.java b/jena-cmds/src/main/java/arq/iri.java index dfb8db0841..f28dd5af96 100644 --- a/jena-cmds/src/main/java/arq/iri.java +++ b/jena-cmds/src/main/java/arq/iri.java @@ -18,20 +18,30 @@ package arq; +import java.io.PrintStream; import java.util.Iterator ; +import org.apache.jena.atlas.lib.Lib; import org.apache.jena.iri.IRI ; -import org.apache.jena.iri.IRIFactory ; import org.apache.jena.iri.Violation ; -import org.apache.jena.irix.SetupJenaIRI; +import org.apache.jena.iri3986.provider.IRIProvider3986; +import org.apache.jena.iri3986.provider.IRIProvider3986.IRIx3986; +import org.apache.jena.irix.*; +import org.apache.jena.irix.IRIProviderJenaIRI.IRIxJena; +import org.apache.jena.rfc3986.IRI3986; +import org.apache.jena.rfc3986.IRIParseException; +/** + * Parse and print IRIs + */ public class iri { + private static PrintStream out = System.out; + // Errors and warnings. + private static PrintStream err = System.err; public static void main(String... args) { - IRIFactory iriFactory = SetupJenaIRI.iriCheckerFactory() ; - boolean first = true ; for ( String iriStr : args ) { @@ -42,16 +52,120 @@ public class iri System.out.println() ; first = false ; - IRI iri = iriFactory.create(iriStr) ; - System.out.println(iriStr + " ==> "+iri) ; - if ( iri.isRelative() ) - System.out.println("Relative: "+iri.isRelative()) ; + String setting = Lib.getenv(SystemIRIx.sysPropertyProvider, SystemIRIx.envVariableProvider); + + IRIProvider provider = null; + if ( setting != null ) { + provider = switch(setting) { + case "IRI3986" -> new IRIProvider3986(); + case "IRI0" -> new IRIProviderJenaIRI(); + default -> { + System.err.println("Unknown IRI Provider: "+setting); + System.exit(1); + yield null; + } + }; + } + + if ( provider == null ) + provider = SystemIRIx.getProvider(); + + IRIx irix; + try { + irix = provider.create(iriStr); + } catch (IRIException ex) { + System.err.println(ex.getMessage()); + continue; + } + + // jena-iri3986 + if ( irix instanceof IRIx3986 iri3986 ) { + print(iri3986, iriStr); + continue; + } + // jena-iri: The original Jena IRI subsystem + if ( irix instanceof IRIxJena iriJena ) { + print(iriJena, iriStr); + continue; + } + print(irix, iriStr); + } + } + + private static void print(IRIx3986 irix, String iriStr) { + try { + IRI3986 iri = irix.getImpl(); + IRI3986 iri1 = iri.normalize(); - Iterator<Violation> vIter = iri.violations(true) ; - for ( ; vIter.hasNext() ; ) - { - System.out.println(vIter.next().getShortMessage()) ; + out.printf("Input: <%s>\n", iriStr); + out.printf(" Parsed: %s\n", iri.rebuild()) ; + out.printf(" Absolute: %s\n", iri.isAbsolute()); + out.printf(" Relative: %s\n", iri.isRelative()); + out.printf(" Hierarchical: %s\n", iri.isHierarchical()); + out.printf(" Rootless: %s\n", iri.isRootless()); + if ( ! iri.equals(iri1) ) + out.printf(" Normalized: %s\n", iri1) ; + out.printf("\n"); + out.printf("%s|%s| ", "Scheme", iri.scheme()); + out.printf("%s|%s| ", "Authority", iri.authority()); + out.printf("%s|%s| ", "Host", iri.host()); + if ( iri.hasPort() ) + out.printf("%s|%s| ", "Port", iri.port()); + out.printf("%s|%s| ", "Path", iri.path()); + out.printf("%s|%s| ", "Query", iri.query()); + out.printf("%s|%s|", "Fragment", iri.fragment()); + out.println(); + if ( iri.hasViolations() ) { + out.println(); + out.println("Scheme specific warnings:"); + iri.forEachViolation(v->{ + out.print(" "); + err.printf("%s\n", v.message()); + }); } + } catch (IRIParseException ex) { + System.err.printf("Error: %s\n", ex.getMessage()); } } + + private static void print(IRIxJena jenaIRI, String iriStr) { + IRI iri = jenaIRI.getImpl(); + System.out.println(iriStr + " ==> " + iri); + if ( jenaIRI.isRelative() ) + System.out.println("Relative: " + iri.isRelative()); + + Iterator<Violation> vIter = iri.violations(true); + for ( ; vIter.hasNext() ; ) { + System.out.println(vIter.next().getShortMessage()); + } + } + + private static void print(IRIx irix, String iriStr) { + try { + IRIx iri1 = irix.normalize(); + + out.printf("Input: <%s>\n", iriStr); + out.printf(" Absolute: %s\n", irix.isAbsolute()); + out.printf(" Relative: %s\n", irix.isRelative()); + if ( ! irix.equals(iri1) ) + out.printf(" Normalized: %s\n", iri1) ; + out.printf("\n"); + out.printf("%s|%s| ", "Scheme", irix.scheme()); + out.println(); + if ( irix.hasViolations() ) { + out.println(); + out.println("Scheme specific warnings:"); + irix.handleViolations((error, msg)->{ + String type = (error? "Error:" : "Warn:"); + out.print(" "); + err.printf("%-6s\n", type, msg); + }); + } + } catch (IRIParseException ex) { + System.err.printf("Error: %s\n", ex.getMessage()); + } + + } + + } diff --git a/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java b/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java index 794b867b48..bd716791de 100644 --- a/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java +++ b/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java @@ -77,8 +77,8 @@ public class IRIProvider3986 implements IRIProvider { @Override public IRIx resolve(String other) { + // create3986() - checks syntax, and errors if in strict mode. IRI3986 iriOther = create3986(other); - // Does not complain if iriOther is bad but create3986 did checking. IRI3986 iri2 = this.iri.resolve(iriOther); return newIRIx(iri2); } @@ -87,7 +87,6 @@ public class IRIProvider3986 implements IRIProvider { public IRIx resolve(IRIx other) { IRIx3986 iriOther = (IRIx3986)other; IRI3986 iri2 = this.iri.resolve(iriOther.iri); - //violations(iri2); return newIRIx(iri2); } diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java index b7530b06f2..9397f990a5 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java @@ -448,8 +448,7 @@ public class IRI3986 implements IRI { } /** - * <a href="https://tools.ietf.org/html/rfc3986#section-4.3">RFC 3986, Section - * 4.3</a> + * <a href="https://tools.ietf.org/html/rfc3986#section-4.3">RFC 3986, Section 4.3</a> */ @Override public boolean isAbsolute() { @@ -458,8 +457,7 @@ public class IRI3986 implements IRI { } /** - * <a href="https://tools.ietf.org/html/rfc3986#section-4.2">RFC 3986, Section - * 4.2</a> + * <a href="https://tools.ietf.org/html/rfc3986#section-4.2">RFC 3986, Section 4.2</a> */ @Override public boolean isRelative() { @@ -1622,8 +1620,7 @@ public class IRI3986 implements IRI { if ( !hasHost() ) schemeReport(this, Issue.http_no_host, scheme, "http and https URI schemes require //host/"); - - if ( /* hasHost() && */ (host0 == host1) ) + else if ( /* hasHost() && */ (host0 == host1) ) schemeReport(this, Issue.http_empty_host, scheme, "http and https URI schemes do not allow the host to be empty"); // https://tools.ietf.org/html/rfc3986#section-3.2.3 @@ -1951,8 +1948,10 @@ public class IRI3986 implements IRI { private static void addReportParseError(IRI3986 iri, String iriStr, String message) { // The iri object is probably only partial populated. - // Use iriStr for the message. - String msg = "'"+iriStr+"' : "+message; + // Exception message already has the IRI string. But check. + String msg = message; + if ( ! message.startsWith("<"+iriStr+">") ) + msg = "'"+iriStr+"' : "+message; Violation v = new Violation(iriStr, null, Issue.ParseError, msg); addReport(iri, v); } diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java index 3216b2f5e7..744f7da578 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java @@ -215,8 +215,13 @@ public class ParseURN { handler.accept(Issue.urn_bad_nid, "No namespace id"); return -1; } + + if ( ch == ':' ) { + handler.accept(Issue.urn_bad_nid, "Missing namespace id"); + return -1; + } if ( ! Chars3986.isAlphaNum(ch) ) { - handler.accept(Issue.urn_bad_nid, "Namespace id does no start with an alphabetic ASCII character"); + handler.accept(Issue.urn_bad_nid, "Namespace id does not start with an alphabetic ASCII character"); return -1; } x++; @@ -258,7 +263,7 @@ public class ParseURN { // RFC 8141 section 5.1 (described in RFC 3406) if ( LibParseIRI.caseInsensitiveRegion(string, startNamespace, "X-") ) { - String start = string.substring(0,2); + String start = string.substring(startNamespace,2+startNamespace); handler.accept(Issue.urn_x_namespace, "Namespace id starts with '"+start+"'"); return -1; } @@ -270,7 +275,7 @@ public class ParseURN { char chx = charAt(string, i); if ( !seenNonZero ) { if ( chx == '0' ) { - handler.accept(Issue.urn_bad_nid, "Leading zero in an informal namepsace"); + handler.accept(Issue.urn_bad_nid, "Leading zero in an informal namespace"); return -1; } else seenNonZero = true; diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java index 20a0006c97..21c6ccc7a2 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java @@ -35,7 +35,7 @@ public class CmdRelative { String relStr = fixup(args[1]); IRI3986 base = createOrExit(baseStr, "Bad base"); - IRI3986 target = createOrExit(relStr, "Bad"); + IRI3986 target = createOrExit(relStr, "Bad IRI"); if ( ! base.isAbsolute() ) { System.err.println("Base must be an absolute IRI: '" +base+"'"); @@ -45,7 +45,9 @@ public class CmdRelative { IRI3986 result = base.relativize(target); System.out.println("Base: "+base); System.out.println("IRI: "+target); + System.out.println(); System.out.println("Relative: "+result); + System.out.println(); print(result); } diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java index e17a45abae..ab94014ee3 100644 --- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java +++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java @@ -35,7 +35,7 @@ public class CmdResolve { String relStr = fixup(args[1]); IRI3986 base = createOrExit(baseStr, "Bad base"); - IRI3986 rel = createOrExit(baseStr, "Bad "); + IRI3986 rel = createOrExit(relStr, "Bad relative IRI"); if ( ! base.isAbsolute() ) { System.err.println("Base should be an absolute IRI: '" +base+"'"); @@ -44,9 +44,9 @@ public class CmdResolve { IRI3986 result = base.resolve(rel); System.out.println("Base: "+base); System.out.println("IRI: "+rel); + System.out.println(); System.out.println("Resolved: "+result); - + System.out.println(); print(result); } } - diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java index fe6f26eb3a..b00d9c42af 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java @@ -47,7 +47,6 @@ public class TestParseURN { // 32 character NID @Test public void parseURN_11() { goodURN1("urn:123456789-123456789-123456789-12:nss", "urn", "123456789-123456789-123456789-12", "nss"); } - @Test public void parseURN_20() { goodURN1("urn:nid:nss?+R1?+R2", "urn", "nid", "nss", "R1?+R2", null, null); } // The r-component includes the ?+R2 @Test public void parseURN_21() { goodURN1("urn:nid:nss?=Q?+R", "urn", "nid", "nss", null, "Q?+R", null); } // The q-component includes the ?+R @Test public void parseURN_22() { goodURN1("urn:nid:nss?=Q?+", "urn", "nid", "nss", null, "Q?+", null); } // The q-component includes the ?+ @@ -56,8 +55,6 @@ public class TestParseURN { @Test public void parseURN_25() { goodURN1("urn:nid:nss?+R?Z", "urn", "nid", "nss", "R?Z", null, null); } // The r-component includes the ?Z @Test public void parseURN_26() { goodURN1("urn:nid:nss?=Q?n=v", "urn", "nid", "nss", null, "Q?n=v", null); } // The q-component includes the "?name=value" - - @Test public void parseURN_bad_01() { badURN("cat:ns:s"); } @Test public void parseURN_bad_02() { badURN("urn:ns"); } @Test public void parseURN_bad_03() { badURN("urn:ns:"); } @@ -66,18 +63,21 @@ public class TestParseURN { @Test public void parseURN_bad_05() { badURN("urn:n:s"); } @Test public void parseURN_bad_06() { badURN("urn:-ns:123"); } @Test public void parseURN_bad_07() { badURN("urn:ns-:123"); } + // 33 characters + @Test public void parseURN_bad_08() { badURN("urn:123456789-123456789-123456789-123:nss"); } - // Bad components. - @Test public void parseURN_bad_10() { badURN1("urn:nid:nss?+#F"); } - @Test public void parseURN_bad_11() { badURN1("urn:nid:nss?=#F"); } - @Test public void parseURN_bad_12() { badURN1("urn:nid:nss?+R?="); } - @Test public void parseURN_bad_13() { badURN1("urn:nid:nss?+?=Q"); } + @Test public void parseURN_bad_10() { badURN("urn:"); } + @Test public void parseURN_bad_11() { badURN("urn::"); } + @Test public void parseURN_bad_12() { badURN("urn::abc"); } - @Test public void parseURN_bad_14() { badURN1("urn:nid:nss?"); } - @Test public void parseURN_bad_15() { badURN1("urn:nid:nss?junk"); } + // Bad components. + @Test public void parseURN_bad_20() { badURN1("urn:nid:nss?+#F"); } + @Test public void parseURN_bad_21() { badURN1("urn:nid:nss?=#F"); } + @Test public void parseURN_bad_22() { badURN1("urn:nid:nss?+R?="); } + @Test public void parseURN_bad_23() { badURN1("urn:nid:nss?+?=Q"); } - // 33 characters - @Test public void parseURN_bad_08() { badURN("urn:123456789-123456789-123456789-123:nss"); } + @Test public void parseURN_bad_24() { badURN1("urn:nid:nss?"); } + @Test public void parseURN_bad_25() { badURN1("urn:nid:nss?junk"); } private void badURN(String string) { URN x = ParseURN.parseURN(string); diff --git a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java index 61cfd86396..9abac7a804 100644 --- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java +++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java @@ -80,27 +80,29 @@ public class TestURISchemes { @Test public void scheme_urn_05() { schemeViolation("urn:and-:nss", URIScheme.URN, Issue.urn_bad_nid); } @Test public void scheme_urn_06() { schemeViolation("urn:", URIScheme.URN, Issue.urn_bad_nid); } - @Test public void scheme_urn_07() { schemeViolation("urn:x:abc", URIScheme.URN, Issue.urn_bad_nid); } - @Test public void scheme_urn_08() { schemeViolation("urn:abc:", URIScheme.URN, Issue.urn_bad_nss); } + @Test public void scheme_urn_07() { schemeViolation("urn::", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_08() { schemeViolation("urn::abc", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_09() { schemeViolation("urn:x:abc", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_10() { schemeViolation("urn:abc:", URIScheme.URN, Issue.urn_bad_nss); } // OK by URN syntax, forbidden by RFC 8141 section 5.1 - @Test public void scheme_urn_09() { schemeViolation("urn:X-local:nss", URIScheme.URN, Issue.urn_x_namespace); } - @Test public void scheme_urn_10() { schemeViolation("urn:x-local:nss", URIScheme.URN, Issue.urn_x_namespace); } + @Test public void scheme_urn_11() { schemeViolation("urn:X-local:nss", URIScheme.URN, Issue.urn_x_namespace); } + @Test public void scheme_urn_12() { schemeViolation("urn:x-local:nss", URIScheme.URN, Issue.urn_x_namespace); } // OK by URN syntax, forbidden by RFC 8141 section 5.1 Informal namespace. - @Test public void scheme_urn_11() { schemeViolation("urn:urn-abc:nss", URIScheme.URN, Issue.urn_bad_nid); } - @Test public void scheme_urn_12() { good("urn:urn-7:nss"); } - @Test public void scheme_urn_13() { good("urn:nid:a"); } + @Test public void scheme_urn_13() { schemeViolation("urn:urn-abc:nss", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_14() { good("urn:urn-7:nss"); } + @Test public void scheme_urn_15() { good("urn:nid:a"); } // 32 char NID - @Test public void scheme_urn_14() { good("urn:12345678901234567890123456789012:a"); } + @Test public void scheme_urn_16() { good("urn:12345678901234567890123456789012:a"); } // 33 char NID - @Test public void scheme_urn_15() { schemeViolation("urn:abcdefghij-123456789-123456789-yz:a", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_17() { schemeViolation("urn:abcdefghij-123456789-123456789-yz:a", URIScheme.URN, Issue.urn_bad_nid); } // Bad by URN specific rule for the query components. - @Test public void scheme_urn_16() { schemeViolation("urn:local:abc/def?query=foo", URIScheme.URN, Issue.urn_bad_components); } + @Test public void scheme_urn_18() { schemeViolation("urn:local:abc/def?query=foo", URIScheme.URN, Issue.urn_bad_components); } // Two f-components = two fragments - @Test public void scheme_urn_17() { badSyntax("urn:local:abc/def#f1#f2"); } - @Test public void scheme_urn_18() { schemeViolation("urn:αβγ:abc", URIScheme.URN, Issue.urn_bad_nid); } + @Test public void scheme_urn_19() { badSyntax("urn:local:abc/def#f1#f2"); } + @Test public void scheme_urn_20() { schemeViolation("urn:αβγ:abc", URIScheme.URN, Issue.urn_bad_nid); } // == urn:uuid:
