This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 60c659c1bfa56df260aa000d81eb42bc6e1899f3 Author: Andy Seaborne <[email protected]> AuthorDate: Tue Dec 9 17:15:02 2025 +0000 GH-3630: Default syntax N-Quads only if stdin --- .../main/java/org/apache/jena/riot/RDFParser.java | 2 +- .../main/java/org/apache/jena/riot/WebContent.java | 10 ++-- .../apache/jena/riot/TestSyntaxDetermination.java | 64 +++++++++++----------- .../java/org/apache/jena/atlas/lib/IRILib.java | 8 ++- jena-cmds/src/main/java/riotcmd/CmdLangParse.java | 29 ++++++---- 5 files changed, 60 insertions(+), 53 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java index 73b9b0f1ac..9a9bf179cd 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java @@ -111,7 +111,7 @@ public class RDFParser { private final ErrorHandler errorHandler; private final Context context; // Some cases the parser is reusable (read a file), some are not (input streams). - private boolean canUseThisParser = true; + private boolean canUseThisParser = true; // ---- Builder creation diff --git a/jena-arq/src/main/java/org/apache/jena/riot/WebContent.java b/jena-arq/src/main/java/org/apache/jena/riot/WebContent.java index 6d0615953a..e45763724f 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/WebContent.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/WebContent.java @@ -273,14 +273,12 @@ public class WebContent { // server setups return text/plain for any file type. // (It was never registered as being N-triples; // that was only for RDF 2004 testing.) - ContentType ct = null; - if ( !isTextPlain ) - // Not guaranteed to be registered as a language here. - ct = (contentTypeStr == null) ? null : ContentType.create(contentTypeStr); + if ( !isTextPlain && (contentTypeStr != null) ) + return ContentType.create(contentTypeStr); - if ( ct == null && hintLang != null ) + ContentType ct = null; + if ( hintLang != null ) ct = hintLang.getContentType(); - if ( ct == null ) ct = RDFLanguages.guessContentType(target); diff --git a/jena-arq/src/test/java/org/apache/jena/riot/TestSyntaxDetermination.java b/jena-arq/src/test/java/org/apache/jena/riot/TestSyntaxDetermination.java index a3af9e30ae..cf5297b162 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/TestSyntaxDetermination.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/TestSyntaxDetermination.java @@ -37,33 +37,34 @@ public class TestSyntaxDetermination { public static Stream<Arguments> provideArgs() { List<Arguments> x = new ArrayList<>(); - add(x, "Test-ext-ttl-1", "http://sparql.org/D.ttl", "text/turtle", Lang.TTL, Lang.TTL); - add(x, "Test-ext-ttl-2", "http://sparql.org/D.ttl", "text/turtle", Lang.RDFXML, Lang.TTL); - add(x, "Test-ext-ttl-3", "http://sparql.org/D.ttl", "text/plain", null, Lang.TTL); - add(x, "Test-ext-ttl-4", "http://sparql.org/D.ttl", "text/plain", Lang.RDFXML, Lang.RDFXML); - add(x, "Test-ext-ttl-5", "http://sparql.org/D.ttl", null, Lang.TTL, Lang.TTL); - add(x, "Test-ext-ttl-6", "http://sparql.org/D.ttl", null, null, Lang.TTL); - - add(x, "Test-no-ext-1", "http://sparql.org/D", "text/turtle", Lang.TTL, Lang.TTL); - add(x, "Test-no-ext-2", "http://sparql.org/D", "text/turtle", Lang.RDFXML, Lang.TTL); - add(x, "Test-no-ext-3", "http://sparql.org/D", "text/plain", null, null); - add(x, "Test-no-ext-4", "http://sparql.org/D", "text/plain", Lang.RDFXML, Lang.RDFXML); - add(x, "Test-no-ext-5", "http://sparql.org/D", null, Lang.NT, Lang.NT); - add(x, "Test-no-ext-6", "http://sparql.org/D", null, null, null); - - add(x, "Test-ext-rdf-1", "http://sparql.org/D.rdf", "text/turtle", Lang.TTL, Lang.TTL); - add(x, "Test-ext-rdf-2", "http://sparql.org/D.rdf", "text/turtle", Lang.RDFXML, Lang.TTL); - add(x, "Test-ext-rdf-3", "http://sparql.org/D.rdf", "text/plain", null, Lang.RDFXML); - add(x, "Test-ext-rdf-4", "http://sparql.org/D.rdf", "text/plain", Lang.RDFXML, Lang.RDFXML); - add(x, "Test-ext-rdf-5", "http://sparql.org/D.rdf", null, Lang.TTL, Lang.TTL); - add(x, "Test-ext-rdf-6", "http://sparql.org/D.rdf", null, null, Lang.RDFXML); - - add(x, "Test-unknown-ext-1", "http://sparql.org/D.xyz", "text/turtle", Lang.TTL, Lang.TTL); - add(x, "Test-unknown-ext-2", "http://sparql.org/D.xyz", "text/turtle", Lang.RDFXML, Lang.TTL); - add(x, "Test-unknown-ext-3", "http://sparql.org/D.xyz", "text/plain", null, null); - add(x, "Test-unknown-ext-4", "http://sparql.org/D.xyz", "text/plain", Lang.RDFXML, Lang.RDFXML); - add(x, "Test-unknown-ext-5", "http://sparql.org/D.xyz", null, Lang.NT, Lang.NT); - add(x, "Test-unknown-ext-6", "http://sparql.org/D.xyz", null, null, null); + // marker url contentType hintLang, expected + add(x, "Test-ext-ttl-1", "http://sparql.org/D.ttl", "text/turtle", Lang.TTL, Lang.TTL); + add(x, "Test-ext-ttl-2", "http://sparql.org/D.ttl", "text/turtle", Lang.RDFXML, Lang.TTL); + add(x, "Test-ext-ttl-3", "http://sparql.org/D.ttl", "text/plain", null, Lang.TTL); + add(x, "Test-ext-ttl-4", "http://sparql.org/D.ttl", "text/plain", Lang.RDFXML, Lang.RDFXML); + add(x, "Test-ext-ttl-5", "http://sparql.org/D.ttl", null, Lang.TTL, Lang.TTL); + add(x, "Test-ext-ttl-6", "http://sparql.org/D.ttl", null, null, Lang.TTL); + + add(x, "Test-no-ext-1", "http://sparql.org/D", "text/turtle", Lang.TTL, Lang.TTL); + add(x, "Test-no-ext-2", "http://sparql.org/D", "text/turtle", Lang.RDFXML, Lang.TTL); + add(x, "Test-no-ext-3", "http://sparql.org/D", "text/plain", null, null); + add(x, "Test-no-ext-4", "http://sparql.org/D", "text/plain", Lang.RDFXML, Lang.RDFXML); + add(x, "Test-no-ext-5", "http://sparql.org/D", null, Lang.NT, Lang.NT); + add(x, "Test-no-ext-6", "http://sparql.org/D", null, null, null); + + add(x, "Test-ext-rdf-1", "http://sparql.org/D.rdf", "text/turtle", Lang.TTL, Lang.TTL); + add(x, "Test-ext-rdf-2", "http://sparql.org/D.rdf", "text/turtle", Lang.RDFXML, Lang.TTL); + add(x, "Test-ext-rdf-3", "http://sparql.org/D.rdf", "text/plain", null, Lang.RDFXML); + add(x, "Test-ext-rdf-4", "http://sparql.org/D.rdf", "text/plain", Lang.RDFXML, Lang.RDFXML); + add(x, "Test-ext-rdf-5", "http://sparql.org/D.rdf", null, Lang.TTL, Lang.TTL); + add(x, "Test-ext-rdf-6", "http://sparql.org/D.rdf", null, null, Lang.RDFXML); + + add(x, "Test-unknown-ext-1", "http://sparql.org/D.xyz", "text/turtle", Lang.TTL, Lang.TTL); + add(x, "Test-unknown-ext-2", "http://sparql.org/D.xyz", "text/turtle", Lang.RDFXML, Lang.TTL); + add(x, "Test-unknown-ext-3", "http://sparql.org/D.xyz", "text/plain", null, null); + add(x, "Test-unknown-ext-4", "http://sparql.org/D.xyz", "text/plain", Lang.RDFXML, Lang.RDFXML); + add(x, "Test-unknown-ext-5", "http://sparql.org/D.xyz", null, Lang.NT, Lang.NT); + add(x, "Test-unknown-ext-6", "http://sparql.org/D.xyz", null, null, null); return x.stream(); } @@ -72,11 +73,10 @@ public class TestSyntaxDetermination { x.add(Arguments.of(marker, url, contentType, hintLang, expected)); } - private String url; - private String contentType; - private Lang hintLang; - private Lang expected; - + private final String url; + private final String contentType; + private final Lang hintLang; + private final Lang expected; public TestSyntaxDetermination(String marker, String url, String contentType, Lang hintLang, Lang expected) { this.url = url; diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java index 2e0d92cd81..12451448bf 100644 --- a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java +++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java @@ -165,9 +165,14 @@ public class IRILib return plainFilenameToURL(fn); } + /** Test whether a URI starts with schema name {@code file:} (any case) */ + public static boolean isFilename(String iri) { + return StrUtils.strStartsWithIgnoreCase(iri, "file:"); + } + /** Convert a file: IRI to a filename */ public static String IRIToFilename(String iri) { - if ( ! iri.startsWith("file:") ) + if ( ! isFilename("file:") ) throw new AtlasException("Not a file: URI: "+iri); String fn; @@ -180,7 +185,6 @@ public class IRILib // At this point, we have a filename of /C:/ // so need strip the leading "/" fn = fixupWindows(fn); - return decodeHex(fn); } diff --git a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java index 2d26ac9ad1..c1b66dea74 100644 --- a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java +++ b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java @@ -293,6 +293,8 @@ public abstract class CmdLangParse extends CmdGeneral { } public ParseRecord parseFile(String filename) { + String filenameLabel = filename; + boolean isStdin = filename.equals("-"); String baseParserIRI = this.parserBaseIRI; RDFParserBuilder builder = RDFParser.create(); if ( baseParserIRI != null ) @@ -306,23 +308,26 @@ public abstract class CmdLangParse extends CmdGeneral { // Always use the command line specified syntax. builder.forceLang(modLangParse.getLang()); else { - // Otherwise, use the command selected language, with N-Quads as the - // ultimate fallback. - Lang lang = dftLang(); - if ( lang == null ) - lang = Lang.NQUADS; - // Fall back lang if RIOT can't guess it. - builder.lang(lang); + if ( isStdin ) { + // Otherwise, use the command selected language, with N-Quads as the + // ultimate fallback. + Lang lang = dftLang(); + if ( lang == null ) + lang = Lang.NQUADS; + // Fall back lang if RIOT can't guess it. + builder.lang(lang); + // hint > file extension in WebContent.determineCT + } } // Set the display name and the source URL. String sourceURL = filename; - if ( filename.equals("-") ) { + if ( isStdin ) { if ( baseParserIRI == null ) { baseParserIRI = "http://base/"; builder.base(baseParserIRI); } - filename = "stdin"; + filenameLabel = "stdin"; builder.source(System.in); } else { String scheme = IRIs.scheme(filename); @@ -334,7 +339,7 @@ public abstract class CmdLangParse extends CmdGeneral { sourceURL = IRILib.filenameToIRI(filename); builder.source(sourceURL); } - return parseRIOT(builder, filename, sourceURL); + return parseRIOT(builder, filenameLabel, sourceURL); } // Return the default (fall-back) language used if no other choice is made. @@ -342,7 +347,7 @@ public abstract class CmdLangParse extends CmdGeneral { protected abstract Lang dftLang(); /** Parse one source */ - protected ParseRecord parseRIOT(RDFParserBuilder builder, String filename, String sourceURL) { + protected ParseRecord parseRIOT(RDFParserBuilder builder, String filenameLabel, String sourceURL) { boolean checking = true; if ( modLangParse.explicitChecking() ) checking = true; @@ -405,7 +410,7 @@ public abstract class CmdLangParse extends CmdGeneral { } parserOut.finish(); long x = modTime.endTimer(); - ParseRecord outcome = new ParseRecord(filename, sourceURL, successful, x, parserOut.countTriples(), parserOut.countQuads(), errHandler); + ParseRecord outcome = new ParseRecord(filenameLabel, sourceURL, successful, x, parserOut.countTriples(), parserOut.countQuads(), errHandler); return outcome; }
