This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new b3f664cc17 GH-2887: Tolerate non-NFC IRIs (IRIProviderJenaIRI)
b3f664cc17 is described below
commit b3f664cc17df675eb51ac899a67d7a449018a9a8
Author: Andy Seaborne <[email protected]>
AuthorDate: Mon Dec 9 18:41:39 2024 +0000
GH-2887: Tolerate non-NFC IRIs (IRIProviderJenaIRI)
---
.../main/java/org/apache/jena/irix/IRIProviderJenaIRI.java | 11 ++++++-----
.../src/main/java/org/apache/jena/irix/SetupJenaIRI.java | 4 +++-
.../test/java/org/apache/jena/irix/TestIRIxJenaSystem.java | 7 +++++--
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git
a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
index 222e0e1893..e6c60ceb35 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java
@@ -249,23 +249,24 @@ public class IRIProviderJenaIRI implements IRIProvider {
// Global settings below; this section is for conditional
filtering.
// See also Checker.iriViolations for WARN filtering.
switch(code) {
- case Violation.PROHIBITED_COMPONENT_PRESENT:
+ case Violation.PROHIBITED_COMPONENT_PRESENT->{
// Allow "u:p@" when non-strict.
// Jena3 compatibility.
if ( isHTTP(iri) && ! STRICT_HTTP && v.getComponent() ==
IRIComponents.USER )
continue;
- break;
- case Violation.SCHEME_PATTERN_MATCH_FAILED:
+ }
+ case Violation.SCHEME_PATTERN_MATCH_FAILED->{
if ( isURN(iri) && ! STRICT_URN )
continue;
if ( isFILE(iri) )
continue;
- break;
- case Violation.REQUIRED_COMPONENT_MISSING:
+ }
+ case Violation.REQUIRED_COMPONENT_MISSING->{
// jena-iri handling of "file:" URIs is only for (an
interpretation of) RFC 1738.
// RFC8089 allows relative file URIs and a wider use of
characters.
if ( isFILE(iri) )
continue;
+ }
}
// Signal first error.
String msg = v.getShortMessage();
diff --git a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
index 1639236980..968e48a7ee 100644
--- a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
+++ b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java
@@ -99,9 +99,11 @@ public class SetupJenaIRI {
setErrorWarning(iriCheckerFactory,
ViolationCodes.NON_INITIAL_DOT_SEGMENT, false, false);
// == Character related.
- //setErrorWarning(iriFactoryInst, ViolationCodes.NOT_NFC, false,
false);
+ // Causes confusion! And this is only advice in RDF Concepts.
+ setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFC, false,
false);
// NFKC is not mentioned in RDF 1.1. Switch off.
setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFKC, false,
false);
+ // The MAYBE ViolationCodes are never generated.
// ** Applies to various unicode blocks.
diff --git
a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
index cb3543f3f7..86076feb26 100644
--- a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
+++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java
@@ -33,8 +33,8 @@ import org.apache.jena.iri.IRI;
/**
* Test of parsing and schema violations.
- * <p>s
- * This is the test suite that compares result with jena-iri.
+ * <p>
+ * This is the test suite that compares results with jena-iri.
* See also {@link TestIRIxSyntaxRFC3986} for RDF 3986 syntax only parsing.
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
@@ -90,6 +90,9 @@ public class TestIRIxJenaSystem extends AbstractTestIRIx_3986
{
@Test public void parse_18() { good("/z/a:b"); }
+ // This character is from a report on users@jena.
+ @Test public void parse_nfc() { good("http://host/ή"); }
+
// ---- bad
// Leading ':'