Update YamlExtractor - update unit tests - add test to simple text file Signed-off-by:Jacek Grzebyta <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/657912b3 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/657912b3 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/657912b3 Branch: refs/heads/master Commit: 657912b3aa22b1f46918fb39f2dc1a0a459303d8 Parents: f1b26df Author: Jacek Grzebyta <[email protected]> Authored: Mon Oct 23 12:40:21 2017 +0100 Committer: Jacek Grzebyta <[email protected]> Committed: Mon Oct 23 12:40:21 2017 +0100 ---------------------------------------------------------------------- .../any23/extractor/yaml/YAMLExtractor.java | 140 ++----------------- .../extractor/yaml/YAMLProcessorException.java | 64 +++++++++ .../any23/extractor/yaml/YAMLExtractorTest.java | 27 +++- .../apache/any23/extractor/yaml/simple-312.yml | 4 + 4 files changed, 103 insertions(+), 132 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java index ceacaba..7d8221f 100644 --- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java @@ -29,10 +29,10 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.Extractor; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.rdf.RDFUtils; -import org.apache.any23.util.StringUtils; import org.apache.any23.vocab.YAML; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.RDFS; @@ -51,7 +51,7 @@ public class YAMLExtractor implements Extractor.ContentExtractor { private static final YAML vocab = YAML.getInstance(); - private int nodeId = 0; + private final ElementsProcessor ep = ElementsProcessor.getInstance(); private Resource documentRoot; @@ -80,7 +80,18 @@ public class YAMLExtractor implements Extractor.ContentExtractor { Resource pageNode = RDFUtils.makeIRI("document", documentIRI, true); out.writeTriple(documentRoot, vocab.contains, pageNode); out.writeTriple(pageNode, RDF.TYPE, vocab.document); - buildNode(documentIRI, p, out, pageNode); + Map.Entry<Value, Model> rootNode = ep.asModel(documentIRI, p, pageNode); + + if (rootNode == null) { + continue; + } + + log.debug("Subgraph root node: {}", rootNode.getKey().stringValue()); + + rootNode.getValue().forEach((s) ->{ + out.writeTriple(s.getSubject(), s.getPredicate(), s.getObject()); + }); + } } @@ -90,127 +101,4 @@ public class YAMLExtractor implements Extractor.ContentExtractor { return YAMLExtractorFactory.getDescriptionInstance(); } - private Optional<Value> buildNode(IRI fileURI, Object treeData, ExtractionResult out, Resource... parent) { - - if (treeData != null) { - log.debug("object type: {}", treeData.getClass()); - } - - if (treeData == null) { - return Optional.empty(); - } else if (treeData instanceof Map) { - return Optional.ofNullable(processMap(fileURI, (Map) treeData, out, parent)); - } else if (treeData instanceof List) { - return Optional.ofNullable(processList(fileURI, (List) treeData, out, parent)); - } else if (treeData instanceof Long) { - return Optional.of(RDFUtils.literal(((Long) treeData))); - } else if (treeData instanceof Integer) { - return Optional.of(RDFUtils.literal(((Integer) treeData))); - } else if (treeData instanceof Float) { - return Optional.of(RDFUtils.literal((Float) treeData)); - } else if (treeData instanceof Double) { - return Optional.of(RDFUtils.literal((Double) treeData)); - } else if (treeData instanceof Byte) { - return Optional.of(RDFUtils.literal((Byte) treeData)); - } else if (treeData instanceof Boolean) { - return Optional.of(RDFUtils.literal((Boolean) treeData)); - } else { - return Optional.of(processString((String) treeData)); - } - } - - private Value processMap(IRI file, Map<String, Object> node, ExtractionResult out, Resource... parent) { - Resource nodeURI = Arrays.asList(parent).isEmpty() ? YAMLExtractor.this.makeUri(file) : parent[0]; - - - node.keySet().forEach((k) -> { - /* False prevents adding _<int> to the predicate. - Thus the predicate pattern is: - "some string" ---> ns:someString - */ - Resource predicate = RDFUtils.makeIRI(k, file, false); - Optional<Value> isValue = buildNode(file, node.get(k), out); - out.writeTriple(nodeURI, RDF.TYPE, vocab.mapping); - if (isValue.isPresent()) { - out.writeTriple(nodeURI, (IRI) predicate, isValue.get()); - } - out.writeTriple(predicate, RDF.TYPE, RDF.PREDICATE); - out.writeTriple(predicate, RDFS.LABEL, RDFUtils.literal(k)); - }); - return nodeURI; - } - - private Value processList(IRI fileURI, Iterable iter, ExtractionResult out, Resource... parent) { - Resource node = YAMLExtractor.this.makeUri(); - out.writeTriple(node, RDF.TYPE, RDF.LIST); - - if (!Arrays.asList(parent).isEmpty()) { - out.writeTriple(parent[0], vocab.contains, node); - } - - Resource pList = null; // previous RDF iter node - Resource cList = node; // cutternt RDF iter node - Iterator<?> listIter = iter.iterator(); - while (listIter.hasNext()) { - // If previous RDF iter node is given lint with current one - if (pList != null) { - out.writeTriple(pList, RDF.REST, cList); - } - // adds value to the current iter - Optional<Value> isValue = buildNode(fileURI, listIter.next(), out); - out.writeTriple(cList, RDF.FIRST, isValue.orElse(RDF.NIL)); - // makes current node the previuos one and generate new current node - pList = cList; - cList = YAMLExtractor.this.makeUri(); - } - out.writeTriple(pList, RDF.REST, RDF.NIL); - - return node; - } - - private Value processString(String str) { - if (RDFUtils.isAbsoluteIRI(str)) { - return RDFUtils.iri(str); - } else { - return RDFUtils.literal(str); - } - } - - private Resource makeUri() { - Resource bnode = RDFUtils.bnode(Integer.toString(nodeId)); - nodeId++; - return bnode; - } - - private Resource makeUri(IRI docUri) { - return makeUri("node", docUri); -} - - private Resource makeUri(String type, IRI docUri) { - return makeUri(type, docUri, true); - } - - private Resource makeUri(String type, IRI docUri, boolean addId) { - - // preprocess string: converts - -> _ - // converts <space>: word1 word2 -> word1Word2 - String newType = StringUtils.implementJavaNaming(type); - - String uriString; - if (docUri.toString().endsWith("/")) { - uriString = docUri.toString() + newType; - } else { - uriString = docUri.toString() + "#" + newType; - } - - if (addId) { - uriString = uriString + "_" + Integer.toString(nodeId); - } - - Resource node = RDFUtils.iri(uriString); - if (addId) { - nodeId++; - } - return node; - } } http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java new file mode 100644 index 0000000..6c113b1 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java @@ -0,0 +1,64 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.any23.extractor.yaml; + +import javax.annotation.Nullable; + +/** + * + * @author Jacek Grzebyta (grzebyta.dev [at] gmail.com) + */ +public class YAMLProcessorException extends RuntimeException { + + @Nullable + private Object reason; + + public Object getReason() { + return reason; + } + + public YAMLProcessorException() { + super(); + } + + public YAMLProcessorException(Object reason) { + this.reason = reason; + } + + public YAMLProcessorException(Object reason, String message) { + super(message); + this.reason = reason; + } + + public YAMLProcessorException(Object reason, String message, Throwable cause) { + super(message, cause); + this.reason = reason; + } + + public YAMLProcessorException(Object reason, Throwable cause) { + super(cause); + this.reason = reason; + } + + public YAMLProcessorException(Object reason, String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + this.reason = reason; + } + + + + +} http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java index 57588e9..9f6d01e 100644 --- a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java @@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory; /** * Test {@link YAMLExtractor}. * - * @author Jacek Grzebyta (grzebyta.dev [at] gmail.com) + * @author Jacek Grzebyta (jgrzebyta [at] apache [dot] org) */ public class YAMLExtractorTest extends AbstractExtractorTestCase { @@ -46,6 +46,17 @@ public class YAMLExtractorTest extends AbstractExtractorTestCase { return new YAMLExtractorFactory(); } + /** + * Test to validate simple yaml file for ANY23-312 + */ + @Test + public void simpleTest312() + throws Exception { + assertExtract("/org/apache/any23/extractor/yaml/simple-312.yml"); + log.debug(dumpModelToTurtle()); + assertModelNotEmpty(); + } + @Test public void simpleFileLoading() throws Exception { @@ -89,24 +100,28 @@ public class YAMLExtractorTest extends AbstractExtractorTestCase { assertExtract("/org/apache/any23/extractor/yaml/test-null.yml"); log.debug(dumpModelToTurtle()); assertModelNotEmpty(); + /** + * Since the map is empty it should not contain any document marked type mapping. + */ + assertNotContains(RDF.TYPE, vocab.mapping); int statements = dumpAsListOfStatements().size(); - Assert.assertTrue("Found " + statements + " statements",statements == 9); + Assert.assertTrue("Found " + statements + " statements", statements == 9); } - + @Test public void treeTest() throws Exception { assertExtract("/org/apache/any23/extractor/yaml/tree.yml"); log.debug(dumpModelToTurtle()); assertModelNotEmpty(); // validate part of the tree structure - assertContainsModel(new Statement[] { + assertContainsModel(new Statement[]{ RDFUtils.triple(RDFUtils.bnode(), RDFUtils.iri(ns, "value3"), RDFUtils.bnode("10")), RDFUtils.triple(RDFUtils.bnode("10"), RDF.FIRST, RDFUtils.bnode("11")), RDFUtils.triple(RDFUtils.bnode("11"), RDFUtils.iri(ns, "key3.1"), RDFUtils.bnode("12")), RDFUtils.triple(RDFUtils.bnode("12"), RDF.TYPE, RDF.LIST), - RDFUtils.triple(RDFUtils.bnode("12"), RDF.FIRST, RDFUtils.literal("value3.1.1" )) + RDFUtils.triple(RDFUtils.bnode("12"), RDF.FIRST, RDFUtils.literal("value3.1.1")) }); - + // validate occurence of <urn:value1> resource assertContains(RDFUtils.triple(RDFUtils.bnode(), RDF.FIRST, RDFUtils.iri("urn:value1"))); } http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml b/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml new file mode 100644 index 0000000..ecf7768 --- /dev/null +++ b/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml @@ -0,0 +1,4 @@ +%YAML 1.2 +--- +Some test +is fast \ No newline at end of file
