Repository: any23 Updated Branches: refs/heads/master 3f87cf3a8 -> 29bee074f
ANY23-362 resolved rdf4j deprecation warnings Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/29bee074 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/29bee074 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/29bee074 Branch: refs/heads/master Commit: 29bee074f7454dacfd8a565624da84cadfa6d989 Parents: 3f87cf3 Author: Hans <[email protected]> Authored: Wed Jul 11 11:13:30 2018 -0500 Committer: Hans <[email protected]> Committed: Wed Jul 11 16:07:36 2018 -0500 ---------------------------------------------------------------------- .../any23/extractor/rdf/BaseRDFExtractor.java | 15 ------------- .../any23/extractor/rdf/RDFParserFactory.java | 22 +++++++++++--------- .../any23/extractor/rdf/RDFXMLExtractor.java | 2 +- .../any23/extractor/rdf/TriXExtractor.java | 2 +- .../java/org/apache/any23/rdf/RDFUtils.java | 13 ++++++------ .../apache/any23/mime/TikaMIMETypeDetector.java | 5 ++--- 6 files changed, 23 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java index a1eab72..f390f04 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java @@ -27,8 +27,6 @@ import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFParseException; import org.eclipse.rdf4j.rio.RDFParser; import org.eclipse.rdf4j.rio.RDFHandlerException; -import org.eclipse.rdf4j.rio.RioSetting; -import org.eclipse.rdf4j.rio.helpers.BasicParserSettings; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Comment; import org.jsoup.nodes.DataNode; @@ -48,7 +46,6 @@ import java.io.InputStream; import java.io.PushbackInputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.HashSet; import java.util.Iterator; /** @@ -111,18 +108,6 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { ) throws IOException, ExtractionException { try { final RDFParser parser = getParser(extractionContext, extractionResult); - parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>()); - - // Disable verification to ensure that DBPedia is accessible, given it uses so many custom datatypes - parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true); - parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES); - parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); - parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES); - parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false); - parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES); - parser.getParserConfig().set(BasicParserSettings.VERIFY_RELATIVE_URIS, true); - parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_RELATIVE_URIS); - RDFFormat format = parser.getRDFFormat(); String iri = extractionContext.getDocumentIRI().stringValue(); http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java index b9d9c9b..2778621 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java @@ -28,6 +28,7 @@ import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFParseException; import org.eclipse.rdf4j.rio.RDFParser; import org.eclipse.rdf4j.rio.Rio; +import org.eclipse.rdf4j.rio.helpers.BasicParserSettings; import org.eclipse.rdf4j.rio.helpers.RDFaParserSettings; import org.eclipse.rdf4j.rio.helpers.RDFaVersion; import org.eclipse.rdf4j.rio.turtle.TurtleParser; @@ -38,6 +39,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.util.Collections; +import java.util.HashSet; /** * This factory provides a common logic for creating and configuring correctly @@ -49,13 +52,12 @@ public class RDFParserFactory { private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class); - private static RDFParserFactory instance; + private static class InstanceHolder { + private static final RDFParserFactory instance = new RDFParserFactory(); + } public static RDFParserFactory getInstance() { - if(instance == null) { - instance = new RDFParserFactory(); - } - return instance; + return InstanceHolder.instance; } /** @@ -280,11 +282,11 @@ public class RDFParserFactory { final ExtractionContext extractionContext, final ExtractionResult extractionResult ) { - parser.setDatatypeHandling( - verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE - ); - parser.setStopAtFirstError(stopAtFirstError); - parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) ); + parser.getParserConfig().setNonFatalErrors(stopAtFirstError ? Collections.emptySet() : new HashSet<>(parser.getSupportedSettings())); + parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, verifyDataType); + parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); + + parser.setParseErrorListener(new InternalParseErrorListener(extractionResult)); parser.setValueFactory( new Any23ValueFactoryWrapper( SimpleValueFactory.getInstance(), http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java index 67fac7a..1500723 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java @@ -45,7 +45,7 @@ public class RDFXMLExtractor extends BaseRDFExtractor { * Default constructor, with no verification of data types and not stop at first error. */ public RDFXMLExtractor() { - this(true, true); + this(false, false); } @Override http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java index a3b9681..3a8b0d7 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java @@ -46,7 +46,7 @@ public class TriXExtractor extends BaseRDFExtractor { * Default constructor, with no verification of data types and not stop at first error. */ public TriXExtractor() { - this(true, true); + this(false, false); } @Override http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/core/src/main/java/org/apache/any23/rdf/RDFUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java index 242984b..44a98e0 100644 --- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java +++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java @@ -69,6 +69,8 @@ public class RDFUtils { private static final Logger LOG = LoggerFactory.getLogger(RDFUtils.class); + private static final Statement[] EMPTY_STATEMENTS = new Statement[0]; + private RDFUtils() {} /** @@ -443,7 +445,7 @@ public class RDFUtils { * @throws IllegalArgumentException if no extension matches. */ public static Optional<RDFFormat> getFormatByExtension(String ext) { - if( ! ext.startsWith(".") ) + if (!ext.startsWith(".")) ext = "." + ext; return Rio.getParserFormatForFileName(ext); } @@ -463,11 +465,10 @@ public class RDFUtils { final StatementCollector handler = new StatementCollector(); final RDFParser parser = getParser(format); parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); - parser.setStopAtFirstError(true); parser.setPreserveBNodeIDs(true); parser.setRDFHandler(handler); parser.parse(is, baseIRI); - return handler.getStatements().toArray( new Statement[handler.getStatements().size()] ); + return handler.getStatements().toArray(EMPTY_STATEMENTS); } /** @@ -508,11 +509,11 @@ public class RDFUtils { */ public static Statement[] parseRDF(String resource) throws IOException { final int extIndex = resource.lastIndexOf('.'); - if(extIndex == -1) + if (extIndex == -1) throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource); final String extension = resource.substring(extIndex + 1); - return parseRDF( getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension)) - , RDFUtils.class.getResourceAsStream(resource) ); + return parseRDF(getFormatByExtension(extension).orElseThrow(Rio.unsupportedFormat(extension)), + RDFUtils.class.getResourceAsStream(resource)); } /** http://git-wip-us.apache.org/repos/asf/any23/blob/29bee074/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java ---------------------------------------------------------------------- diff --git a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java index c5601b1..3347895 100644 --- a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java +++ b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java @@ -29,6 +29,7 @@ import org.apache.tika.mime.MimeTypes; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFParser; import org.eclipse.rdf4j.rio.Rio; +import org.eclipse.rdf4j.rio.helpers.BasicParserSettings; import java.io.BufferedReader; import java.io.ByteArrayInputStream; @@ -110,9 +111,7 @@ public class TikaMIMETypeDetector implements MIMETypeDetector { public static boolean checkTurtleFormat(InputStream is) throws IOException { String sample = extractDataSample(is, '.'); RDFParser turtleParser = Rio.createParser(RDFFormat.TURTLE); - turtleParser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY); - turtleParser.setStopAtFirstError(true); - turtleParser.setVerifyData(true); + turtleParser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true); ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes()); try { turtleParser.parse(bais, "");
