This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new b3f664cc17 GH-2887: Tolerate non-NFC IRIs (IRIProviderJenaIRI)
b3f664cc17 is described below

commit b3f664cc17df675eb51ac899a67d7a449018a9a8
Author: Andy Seaborne <[email protected]>
AuthorDate: Mon Dec 9 18:41:39 2024 +0000

    GH-2887: Tolerate non-NFC IRIs (IRIProviderJenaIRI)
---
 .../main/java/org/apache/jena/irix/IRIProviderJenaIRI.java    | 11 ++++++-----
 .../src/main/java/org/apache/jena/irix/SetupJenaIRI.java      |  4 +++-
 .../test/java/org/apache/jena/irix/TestIRIxJenaSystem.java    |  7 +++++--
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git 
a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java 
b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
index 222e0e1893..e6c60ceb35 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
@@ -249,23 +249,24 @@ public class IRIProviderJenaIRI implements IRIProvider {
             // Global settings below; this section is for conditional 
filtering.
             // See also Checker.iriViolations for WARN filtering.
             switch(code) {
-                case Violation.PROHIBITED_COMPONENT_PRESENT:
+                case Violation.PROHIBITED_COMPONENT_PRESENT->{
                     // Allow "u:p@" when non-strict.
                     // Jena3 compatibility.
                     if ( isHTTP(iri) && ! STRICT_HTTP && v.getComponent() == 
IRIComponents.USER )
                         continue;
-                    break;
-                case Violation.SCHEME_PATTERN_MATCH_FAILED:
+                }
+                case Violation.SCHEME_PATTERN_MATCH_FAILED->{
                     if ( isURN(iri) && ! STRICT_URN )
                         continue;
                     if ( isFILE(iri) )
                         continue;
-                    break;
-                case Violation.REQUIRED_COMPONENT_MISSING:
+                }
+                case Violation.REQUIRED_COMPONENT_MISSING->{
                     // jena-iri handling of "file:" URIs is only for (an 
interpretation of) RFC 1738.
                     // RFC8089 allows relative file URIs and a wider use of 
characters.
                     if ( isFILE(iri) )
                         continue;
+                }
             }
             // Signal first error.
             String msg = v.getShortMessage();
diff --git a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java 
b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
index 1639236980..968e48a7ee 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
@@ -99,9 +99,11 @@ public class SetupJenaIRI {
         setErrorWarning(iriCheckerFactory, 
ViolationCodes.NON_INITIAL_DOT_SEGMENT, false, false);
 
         // == Character related.
-        //setErrorWarning(iriFactoryInst, ViolationCodes.NOT_NFC,  false, 
false);
+        // Causes confusion! And this is only advice in RDF Concepts.
+        setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFC,  false, 
false);
         // NFKC is not mentioned in RDF 1.1. Switch off.
         setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFKC, false, 
false);
+        // The MAYBE ViolationCodes are never generated.
 
         // ** Applies to various unicode blocks.
 
diff --git 
a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java 
b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
index cb3543f3f7..86076feb26 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
@@ -33,8 +33,8 @@ import org.apache.jena.iri.IRI;
 
 /**
  * Test of parsing and schema violations.
- * <p>s
- * This is the test suite that compares result with jena-iri.
+ * <p>
+ * This is the test suite that compares results with jena-iri.
  * See also {@link TestIRIxSyntaxRFC3986} for RDF 3986 syntax only parsing.
  */
 @FixMethodOrder(MethodSorters.NAME_ASCENDING)
@@ -90,6 +90,9 @@ public class TestIRIxJenaSystem extends AbstractTestIRIx_3986 
{
 
     @Test public void parse_18() { good("/z/a:b"); }
 
+    // This character is from a report on users@jena.
+    @Test public void parse_nfc() { good("http://host/ή";); }
+
     // ---- bad
 
     // Leading ':'

Reply via email to