This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git

commit ffa6d76d851aa706886c1033fb1526dc5fda0e30
Author: Andy Seaborne <[email protected]>
AuthorDate: Fri Jan 10 18:27:29 2025 +0000

    GH-2936: Update IRI commands for jena-iri3986
---
 jena-cmds/src/main/java/arq/iri.java               | 138 +++++++++++++++++++--
 .../jena/iri3986/provider/IRIProvider3986.java     |   3 +-
 .../main/java/org/apache/jena/rfc3986/IRI3986.java |  15 ++-
 .../java/org/apache/jena/rfc3986/ParseURN.java     |  11 +-
 .../org/apache/jena/rfc3986/cmd/CmdRelative.java   |   4 +-
 .../org/apache/jena/rfc3986/cmd/CmdResolve.java    |   6 +-
 .../java/org/apache/jena/rfc3986/TestParseURN.java |  24 ++--
 .../org/apache/jena/rfc3986/TestURISchemes.java    |  26 ++--
 8 files changed, 174 insertions(+), 53 deletions(-)

diff --git a/jena-cmds/src/main/java/arq/iri.java 
b/jena-cmds/src/main/java/arq/iri.java
index dfb8db0841..f28dd5af96 100644
--- a/jena-cmds/src/main/java/arq/iri.java
+++ b/jena-cmds/src/main/java/arq/iri.java
@@ -18,20 +18,30 @@
 
 package arq;
 
+import java.io.PrintStream;
 import java.util.Iterator ;
 
+import org.apache.jena.atlas.lib.Lib;
 import org.apache.jena.iri.IRI ;
-import org.apache.jena.iri.IRIFactory ;
 import org.apache.jena.iri.Violation ;
-import org.apache.jena.irix.SetupJenaIRI;
+import org.apache.jena.iri3986.provider.IRIProvider3986;
+import org.apache.jena.iri3986.provider.IRIProvider3986.IRIx3986;
+import org.apache.jena.irix.*;
+import org.apache.jena.irix.IRIProviderJenaIRI.IRIxJena;
+import org.apache.jena.rfc3986.IRI3986;
+import org.apache.jena.rfc3986.IRIParseException;
 
+/**
+ * Parse and print IRIs
+ */
 public class iri
 {
+    private static PrintStream out = System.out;
+    // Errors and warnings.
+    private static PrintStream err = System.err;
 
     public static void main(String... args)
     {
-        IRIFactory iriFactory = SetupJenaIRI.iriCheckerFactory() ;
-
         boolean first = true ;
         for ( String iriStr : args )
         {
@@ -42,16 +52,120 @@ public class iri
                 System.out.println() ;
             first = false ;
 
-            IRI iri = iriFactory.create(iriStr) ;
-            System.out.println(iriStr + " ==> "+iri) ;
-            if ( iri.isRelative() )
-                System.out.println("Relative: "+iri.isRelative()) ;
+            String setting = Lib.getenv(SystemIRIx.sysPropertyProvider, 
SystemIRIx.envVariableProvider);
+
+            IRIProvider provider = null;
+            if ( setting != null ) {
+                provider = switch(setting) {
+                    case "IRI3986" -> new IRIProvider3986();
+                    case "IRI0" ->  new IRIProviderJenaIRI();
+                    default -> {
+                        System.err.println("Unknown IRI Provider: "+setting);
+                        System.exit(1);
+                        yield null;
+                    }
+                };
+            }
+
+            if ( provider == null )
+                provider = SystemIRIx.getProvider();
+
+            IRIx irix;
+            try {
+                irix = provider.create(iriStr);
+            } catch (IRIException ex) {
+                System.err.println(ex.getMessage());
+                continue;
+            }
+
+            // jena-iri3986
+            if ( irix instanceof IRIx3986 iri3986 ) {
+                print(iri3986, iriStr);
+                continue;
+            }
+            // jena-iri: The original Jena IRI subsystem
+            if ( irix instanceof IRIxJena iriJena ) {
+                print(iriJena, iriStr);
+                continue;
+            }
+            print(irix, iriStr);
+        }
+    }
+
+    private static void print(IRIx3986 irix, String iriStr) {
+        try {
+            IRI3986 iri = irix.getImpl();
+            IRI3986 iri1 = iri.normalize();
 
-            Iterator<Violation> vIter = iri.violations(true) ;
-            for ( ; vIter.hasNext() ; )
-            {
-                System.out.println(vIter.next().getShortMessage()) ;
+            out.printf("Input: <%s>\n", iriStr);
+            out.printf("    Parsed:       %s\n", iri.rebuild()) ;
+            out.printf("    Absolute:     %s\n", iri.isAbsolute());
+            out.printf("    Relative:     %s\n", iri.isRelative());
+            out.printf("    Hierarchical: %s\n", iri.isHierarchical());
+            out.printf("    Rootless:     %s\n", iri.isRootless());
+            if ( ! iri.equals(iri1) )
+              out.printf("    Normalized:   %s\n", iri1) ;
+            out.printf("\n");
+            out.printf("%s|%s|  ", "Scheme",     iri.scheme());
+            out.printf("%s|%s|  ", "Authority",  iri.authority());
+            out.printf("%s|%s|  ", "Host",       iri.host());
+            if ( iri.hasPort() )
+                out.printf("%s|%s|  ", "Port",       iri.port());
+            out.printf("%s|%s|  ", "Path",       iri.path());
+            out.printf("%s|%s|  ", "Query",      iri.query());
+            out.printf("%s|%s|", "Fragment",   iri.fragment());
+            out.println();
+            if ( iri.hasViolations() ) {
+                out.println();
+                out.println("Scheme specific warnings:");
+                iri.forEachViolation(v->{
+                    out.print("   ");
+                    err.printf("%s\n", v.message());
+                });
             }
+        } catch (IRIParseException ex) {
+            System.err.printf("Error: %s\n", ex.getMessage());
         }
     }
+
+    private static void print(IRIxJena jenaIRI, String iriStr) {
+        IRI iri = jenaIRI.getImpl();
+        System.out.println(iriStr + " ==> " + iri);
+        if ( jenaIRI.isRelative() )
+            System.out.println("Relative: " + iri.isRelative());
+
+        Iterator<Violation> vIter = iri.violations(true);
+        for ( ; vIter.hasNext() ; ) {
+            System.out.println(vIter.next().getShortMessage());
+        }
+    }
+
+    private static void print(IRIx irix, String iriStr) {
+        try {
+            IRIx iri1 = irix.normalize();
+
+            out.printf("Input: <%s>\n", iriStr);
+            out.printf("    Absolute:     %s\n", irix.isAbsolute());
+            out.printf("    Relative:     %s\n", irix.isRelative());
+            if ( ! irix.equals(iri1) )
+              out.printf("    Normalized:   %s\n", iri1) ;
+            out.printf("\n");
+            out.printf("%s|%s|  ", "Scheme",     irix.scheme());
+            out.println();
+            if ( irix.hasViolations() ) {
+                out.println();
+                out.println("Scheme specific warnings:");
+                irix.handleViolations((error, msg)->{
+                    String type = (error? "Error:" : "Warn:");
+                    out.print("   ");
+                    err.printf("%-6s\n", type, msg);
+                });
+            }
+        } catch (IRIParseException ex) {
+            System.err.printf("Error: %s\n", ex.getMessage());
+        }
+
+    }
+
+
 }
diff --git 
a/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java 
b/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java
index 794b867b48..bd716791de 100644
--- 
a/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java
+++ 
b/jena-core/src/main/java/org/apache/jena/iri3986/provider/IRIProvider3986.java
@@ -77,8 +77,8 @@ public class IRIProvider3986 implements IRIProvider {
 
         @Override
         public IRIx resolve(String other) {
+            // create3986() - checks syntax, and errors if in strict mode.
             IRI3986 iriOther = create3986(other);
-            // Does not complain if iriOther is bad but create3986 did 
checking.
             IRI3986 iri2 = this.iri.resolve(iriOther);
             return newIRIx(iri2);
         }
@@ -87,7 +87,6 @@ public class IRIProvider3986 implements IRIProvider {
         public IRIx resolve(IRIx other) {
             IRIx3986 iriOther = (IRIx3986)other;
             IRI3986 iri2 = this.iri.resolve(iriOther.iri);
-            //violations(iri2);
             return newIRIx(iri2);
         }
 
diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
index b7530b06f2..9397f990a5 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/IRI3986.java
@@ -448,8 +448,7 @@ public class IRI3986 implements IRI {
     }
 
     /**
-     * <a href="https://tools.ietf.org/html/rfc3986#section-4.3";>RFC 3986, 
Section
-     * 4.3</a>
+     * <a href="https://tools.ietf.org/html/rfc3986#section-4.3";>RFC 3986, 
Section 4.3</a>
      */
     @Override
     public boolean isAbsolute() {
@@ -458,8 +457,7 @@ public class IRI3986 implements IRI {
     }
 
     /**
-     * <a href="https://tools.ietf.org/html/rfc3986#section-4.2";>RFC 3986, 
Section
-     * 4.2</a>
+     * <a href="https://tools.ietf.org/html/rfc3986#section-4.2";>RFC 3986, 
Section 4.2</a>
      */
     @Override
     public boolean isRelative() {
@@ -1622,8 +1620,7 @@ public class IRI3986 implements IRI {
 
         if ( !hasHost() )
             schemeReport(this, Issue.http_no_host, scheme, "http and https URI 
schemes require //host/");
-
-        if ( /* hasHost() && */ (host0 == host1) )
+        else if ( /* hasHost() && */ (host0 == host1) )
             schemeReport(this, Issue.http_empty_host, scheme, "http and https 
URI schemes do not allow the host to be empty");
 
         // https://tools.ietf.org/html/rfc3986#section-3.2.3
@@ -1951,8 +1948,10 @@ public class IRI3986 implements IRI {
 
     private static void addReportParseError(IRI3986 iri, String iriStr, String 
message) {
         // The iri object is probably only partial populated.
-        // Use iriStr for the message.
-        String msg = "'"+iriStr+"' : "+message;
+        // Exception message already has the IRI string. But check.
+        String msg = message;
+        if ( ! message.startsWith("<"+iriStr+">") )
+            msg = "'"+iriStr+"' : "+message;
         Violation v = new Violation(iriStr, null, Issue.ParseError, msg);
         addReport(iri, v);
     }
diff --git a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java
index 3216b2f5e7..744f7da578 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/ParseURN.java
@@ -215,8 +215,13 @@ public class ParseURN {
             handler.accept(Issue.urn_bad_nid, "No namespace id");
             return -1;
         }
+
+        if ( ch == ':' ) {
+            handler.accept(Issue.urn_bad_nid, "Missing namespace id");
+            return -1;
+        }
         if ( ! Chars3986.isAlphaNum(ch) ) {
-            handler.accept(Issue.urn_bad_nid, "Namespace id does no start with 
an alphabetic ASCII character");
+            handler.accept(Issue.urn_bad_nid, "Namespace id does not start 
with an alphabetic ASCII character");
             return -1;
         }
         x++;
@@ -258,7 +263,7 @@ public class ParseURN {
 
         // RFC 8141 section 5.1 (described in RFC 3406)
         if ( LibParseIRI.caseInsensitiveRegion(string, startNamespace, "X-") ) 
{
-            String start = string.substring(0,2);
+            String start = string.substring(startNamespace,2+startNamespace);
             handler.accept(Issue.urn_x_namespace, "Namespace id starts with 
'"+start+"'");
             return -1;
         }
@@ -270,7 +275,7 @@ public class ParseURN {
                 char chx = charAt(string, i);
                 if ( !seenNonZero ) {
                     if ( chx == '0' ) {
-                        handler.accept(Issue.urn_bad_nid, "Leading zero in an 
informal namepsace");
+                        handler.accept(Issue.urn_bad_nid, "Leading zero in an 
informal namespace");
                         return -1;
                     } else
                         seenNonZero = true;
diff --git 
a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java
index 20a0006c97..21c6ccc7a2 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdRelative.java
@@ -35,7 +35,7 @@ public class CmdRelative {
         String relStr = fixup(args[1]);
 
         IRI3986 base = createOrExit(baseStr, "Bad base");
-        IRI3986 target = createOrExit(relStr, "Bad");
+        IRI3986 target = createOrExit(relStr, "Bad IRI");
 
         if ( ! base.isAbsolute() ) {
             System.err.println("Base must be an absolute IRI: '" +base+"'");
@@ -45,7 +45,9 @@ public class CmdRelative {
         IRI3986 result = base.relativize(target);
         System.out.println("Base:     "+base);
         System.out.println("IRI:      "+target);
+        System.out.println();
         System.out.println("Relative: "+result);
+        System.out.println();
 
         print(result);
     }
diff --git 
a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java 
b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java
index e17a45abae..ab94014ee3 100644
--- a/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java
+++ b/jena-iri3986/src/main/java/org/apache/jena/rfc3986/cmd/CmdResolve.java
@@ -35,7 +35,7 @@ public class CmdResolve {
         String relStr = fixup(args[1]);
 
         IRI3986 base = createOrExit(baseStr, "Bad base");
-        IRI3986 rel = createOrExit(baseStr, "Bad ");
+        IRI3986 rel = createOrExit(relStr, "Bad relative IRI");
 
         if ( ! base.isAbsolute() ) {
             System.err.println("Base should be an absolute IRI: '" +base+"'");
@@ -44,9 +44,9 @@ public class CmdResolve {
         IRI3986 result = base.resolve(rel);
         System.out.println("Base:     "+base);
         System.out.println("IRI:      "+rel);
+        System.out.println();
         System.out.println("Resolved: "+result);
-
+        System.out.println();
         print(result);
     }
 }
-
diff --git 
a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java
index fe6f26eb3a..b00d9c42af 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestParseURN.java
@@ -47,7 +47,6 @@ public class TestParseURN {
     // 32 character NID
     @Test public void parseURN_11() { 
goodURN1("urn:123456789-123456789-123456789-12:nss", "urn", 
"123456789-123456789-123456789-12", "nss"); }
 
-
     @Test public void parseURN_20() { goodURN1("urn:nid:nss?+R1?+R2",  "urn", 
"nid", "nss", "R1?+R2", null, null); }  // The r-component includes the ?+R2
     @Test public void parseURN_21() { goodURN1("urn:nid:nss?=Q?+R",    "urn", 
"nid", "nss", null, "Q?+R", null); }    // The q-component includes the ?+R
     @Test public void parseURN_22() { goodURN1("urn:nid:nss?=Q?+",     "urn", 
"nid", "nss", null, "Q?+", null); }     // The q-component includes the ?+
@@ -56,8 +55,6 @@ public class TestParseURN {
     @Test public void parseURN_25() { goodURN1("urn:nid:nss?+R?Z",     "urn", 
"nid", "nss", "R?Z", null, null); }     // The r-component includes the ?Z
     @Test public void parseURN_26() { goodURN1("urn:nid:nss?=Q?n=v",   "urn", 
"nid", "nss", null, "Q?n=v", null); }   // The q-component includes the 
"?name=value"
 
-
-
     @Test public void parseURN_bad_01() { badURN("cat:ns:s"); }
     @Test public void parseURN_bad_02() { badURN("urn:ns"); }
     @Test public void parseURN_bad_03() { badURN("urn:ns:"); }
@@ -66,18 +63,21 @@ public class TestParseURN {
     @Test public void parseURN_bad_05() { badURN("urn:n:s"); }
     @Test public void parseURN_bad_06() { badURN("urn:-ns:123"); }
     @Test public void parseURN_bad_07() { badURN("urn:ns-:123"); }
+    // 33 characters
+    @Test public void parseURN_bad_08() { 
badURN("urn:123456789-123456789-123456789-123:nss"); }
 
-    // Bad components.
-    @Test public void parseURN_bad_10() { badURN1("urn:nid:nss?+#F"); }
-    @Test public void parseURN_bad_11() { badURN1("urn:nid:nss?=#F"); }
-    @Test public void parseURN_bad_12() { badURN1("urn:nid:nss?+R?="); }
-    @Test public void parseURN_bad_13() { badURN1("urn:nid:nss?+?=Q"); }
+    @Test public void parseURN_bad_10() { badURN("urn:"); }
+    @Test public void parseURN_bad_11() { badURN("urn::"); }
+    @Test public void parseURN_bad_12() { badURN("urn::abc"); }
 
-    @Test public void parseURN_bad_14() { badURN1("urn:nid:nss?"); }
-    @Test public void parseURN_bad_15() { badURN1("urn:nid:nss?junk"); }
+    // Bad components.
+    @Test public void parseURN_bad_20() { badURN1("urn:nid:nss?+#F"); }
+    @Test public void parseURN_bad_21() { badURN1("urn:nid:nss?=#F"); }
+    @Test public void parseURN_bad_22() { badURN1("urn:nid:nss?+R?="); }
+    @Test public void parseURN_bad_23() { badURN1("urn:nid:nss?+?=Q"); }
 
-    // 33 characters
-    @Test public void parseURN_bad_08() { 
badURN("urn:123456789-123456789-123456789-123:nss"); }
+    @Test public void parseURN_bad_24() { badURN1("urn:nid:nss?"); }
+    @Test public void parseURN_bad_25() { badURN1("urn:nid:nss?junk"); }
 
     private void badURN(String string) {
         URN x = ParseURN.parseURN(string);
diff --git 
a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java 
b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
index 61cfd86396..9abac7a804 100644
--- a/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
+++ b/jena-iri3986/src/test/java/org/apache/jena/rfc3986/TestURISchemes.java
@@ -80,27 +80,29 @@ public class TestURISchemes {
     @Test public void scheme_urn_05() { schemeViolation("urn:and-:nss", 
URIScheme.URN, Issue.urn_bad_nid); }
 
     @Test public void scheme_urn_06() { schemeViolation("urn:", URIScheme.URN, 
Issue.urn_bad_nid); }
-    @Test public void scheme_urn_07() { schemeViolation("urn:x:abc", 
URIScheme.URN, Issue.urn_bad_nid); }
-    @Test public void scheme_urn_08() { schemeViolation("urn:abc:", 
URIScheme.URN, Issue.urn_bad_nss); }
+    @Test public void scheme_urn_07() { schemeViolation("urn::", 
URIScheme.URN, Issue.urn_bad_nid); }
+    @Test public void scheme_urn_08() { schemeViolation("urn::abc", 
URIScheme.URN, Issue.urn_bad_nid); }
 
+    @Test public void scheme_urn_09() { schemeViolation("urn:x:abc", 
URIScheme.URN, Issue.urn_bad_nid); }
+    @Test public void scheme_urn_10() { schemeViolation("urn:abc:", 
URIScheme.URN, Issue.urn_bad_nss); }
 
     // OK by URN syntax, forbidden by RFC 8141 section 5.1
-    @Test public void scheme_urn_09() { schemeViolation("urn:X-local:nss", 
URIScheme.URN, Issue.urn_x_namespace); }
-    @Test public void scheme_urn_10() { schemeViolation("urn:x-local:nss", 
URIScheme.URN, Issue.urn_x_namespace); }
+    @Test public void scheme_urn_11() { schemeViolation("urn:X-local:nss", 
URIScheme.URN, Issue.urn_x_namespace); }
+    @Test public void scheme_urn_12() { schemeViolation("urn:x-local:nss", 
URIScheme.URN, Issue.urn_x_namespace); }
     // OK by URN syntax, forbidden by RFC 8141 section 5.1 Informal namespace.
-    @Test public void scheme_urn_11() { schemeViolation("urn:urn-abc:nss", 
URIScheme.URN, Issue.urn_bad_nid); }
-    @Test public void scheme_urn_12() { good("urn:urn-7:nss"); }
-    @Test public void scheme_urn_13() { good("urn:nid:a"); }
+    @Test public void scheme_urn_13() { schemeViolation("urn:urn-abc:nss", 
URIScheme.URN, Issue.urn_bad_nid); }
+    @Test public void scheme_urn_14() { good("urn:urn-7:nss"); }
+    @Test public void scheme_urn_15() { good("urn:nid:a"); }
 
     // 32 char NID
-    @Test public void scheme_urn_14() { 
good("urn:12345678901234567890123456789012:a"); }
+    @Test public void scheme_urn_16() { 
good("urn:12345678901234567890123456789012:a"); }
     // 33 char NID
-    @Test public void scheme_urn_15() { 
schemeViolation("urn:abcdefghij-123456789-123456789-yz:a", URIScheme.URN, 
Issue.urn_bad_nid); }
+    @Test public void scheme_urn_17() { 
schemeViolation("urn:abcdefghij-123456789-123456789-yz:a", URIScheme.URN, 
Issue.urn_bad_nid); }
     // Bad by URN specific rule for the query components.
-    @Test public void scheme_urn_16() { 
schemeViolation("urn:local:abc/def?query=foo", URIScheme.URN, 
Issue.urn_bad_components); }
+    @Test public void scheme_urn_18() { 
schemeViolation("urn:local:abc/def?query=foo", URIScheme.URN, 
Issue.urn_bad_components); }
     // Two f-components = two fragments
-    @Test public void scheme_urn_17() { badSyntax("urn:local:abc/def#f1#f2"); }
-    @Test public void scheme_urn_18() { schemeViolation("urn:αβγ:abc", 
URIScheme.URN, Issue.urn_bad_nid); }
+    @Test public void scheme_urn_19() { badSyntax("urn:local:abc/def#f1#f2"); }
+    @Test public void scheme_urn_20() { schemeViolation("urn:αβγ:abc", 
URIScheme.URN, Issue.urn_bad_nid); }
 
     // == urn:uuid:
 

Reply via email to