Update YamlExtractor

- update unit tests
- add test to simple text file
Signed-off-by:Jacek Grzebyta <[email protected]>

Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/657912b3
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/657912b3
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/657912b3

Branch: refs/heads/master
Commit: 657912b3aa22b1f46918fb39f2dc1a0a459303d8
Parents: f1b26df
Author: Jacek Grzebyta <[email protected]>
Authored: Mon Oct 23 12:40:21 2017 +0100
Committer: Jacek Grzebyta <[email protected]>
Committed: Mon Oct 23 12:40:21 2017 +0100

----------------------------------------------------------------------
 .../any23/extractor/yaml/YAMLExtractor.java     | 140 ++-----------------
 .../extractor/yaml/YAMLProcessorException.java  |  64 +++++++++
 .../any23/extractor/yaml/YAMLExtractorTest.java |  27 +++-
 .../apache/any23/extractor/yaml/simple-312.yml  |   4 +
 4 files changed, 103 insertions(+), 132 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
index ceacaba..7d8221f 100644
--- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
@@ -29,10 +29,10 @@ import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.StringUtils;
 import org.apache.any23.vocab.YAML;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Model;
 import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.vocabulary.RDF;
 import org.eclipse.rdf4j.model.vocabulary.RDFS;
@@ -51,7 +51,7 @@ public class YAMLExtractor implements 
Extractor.ContentExtractor {
 
     private static final YAML vocab = YAML.getInstance();
 
-    private int nodeId = 0;
+    private final ElementsProcessor ep = ElementsProcessor.getInstance();
 
     private Resource documentRoot;
 
@@ -80,7 +80,18 @@ public class YAMLExtractor implements 
Extractor.ContentExtractor {
             Resource pageNode = RDFUtils.makeIRI("document", documentIRI, 
true);
             out.writeTriple(documentRoot, vocab.contains, pageNode);
             out.writeTriple(pageNode, RDF.TYPE, vocab.document);
-            buildNode(documentIRI, p, out, pageNode);
+            Map.Entry<Value, Model> rootNode = ep.asModel(documentIRI, p, 
pageNode);
+            
+            if (rootNode == null) {
+                continue;
+            }
+            
+            log.debug("Subgraph root node: {}", 
rootNode.getKey().stringValue());
+            
+            rootNode.getValue().forEach((s) ->{
+                out.writeTriple(s.getSubject(), s.getPredicate(), 
s.getObject());
+            });
+            
         }
 
     }
@@ -90,127 +101,4 @@ public class YAMLExtractor implements 
Extractor.ContentExtractor {
         return YAMLExtractorFactory.getDescriptionInstance();
     }
 
-    private Optional<Value> buildNode(IRI fileURI, Object treeData, 
ExtractionResult out, Resource... parent) {
-
-        if (treeData != null) {
-            log.debug("object type: {}", treeData.getClass());
-        }
-
-        if (treeData == null) {
-            return Optional.empty();
-        } else if (treeData instanceof Map) {
-            return Optional.ofNullable(processMap(fileURI, (Map) treeData, 
out, parent));
-        } else if (treeData instanceof List) {
-            return Optional.ofNullable(processList(fileURI, (List) treeData, 
out, parent));
-        } else if (treeData instanceof Long) {
-            return Optional.of(RDFUtils.literal(((Long) treeData)));
-        } else if (treeData instanceof Integer) {
-            return Optional.of(RDFUtils.literal(((Integer) treeData)));
-        } else if (treeData instanceof Float) {
-            return Optional.of(RDFUtils.literal((Float) treeData));
-        } else if (treeData instanceof Double) {
-            return Optional.of(RDFUtils.literal((Double) treeData));
-        } else if (treeData instanceof Byte) {
-            return Optional.of(RDFUtils.literal((Byte) treeData));
-        } else if (treeData instanceof Boolean) {
-            return Optional.of(RDFUtils.literal((Boolean) treeData));
-        } else {
-            return Optional.of(processString((String) treeData));
-        }
-    }
-
-    private Value processMap(IRI file, Map<String, Object> node, 
ExtractionResult out, Resource... parent) {
-        Resource nodeURI = Arrays.asList(parent).isEmpty() ? 
YAMLExtractor.this.makeUri(file) : parent[0];
-        
-
-        node.keySet().forEach((k) -> {
-            /* False prevents adding _<int> to the predicate.
-            Thus the predicate pattern is:
-            "some string" ---> ns:someString
-            */
-            Resource predicate = RDFUtils.makeIRI(k, file, false);
-            Optional<Value> isValue = buildNode(file, node.get(k), out);
-            out.writeTriple(nodeURI, RDF.TYPE, vocab.mapping);
-            if (isValue.isPresent()) {
-                out.writeTriple(nodeURI, (IRI) predicate, isValue.get());
-            }
-            out.writeTriple(predicate, RDF.TYPE, RDF.PREDICATE);
-            out.writeTriple(predicate, RDFS.LABEL, RDFUtils.literal(k));
-        });
-        return nodeURI;
-    }
-
-    private Value processList(IRI fileURI, Iterable iter, ExtractionResult 
out, Resource... parent) {
-        Resource node = YAMLExtractor.this.makeUri();
-        out.writeTriple(node, RDF.TYPE, RDF.LIST);
-        
-        if (!Arrays.asList(parent).isEmpty()) {
-            out.writeTriple(parent[0], vocab.contains, node);
-        }
-
-        Resource pList = null; // previous RDF iter node
-        Resource cList = node; // cutternt RDF iter node
-        Iterator<?> listIter = iter.iterator();
-        while (listIter.hasNext()) {
-            // If previous RDF iter node is given lint with current one
-            if (pList != null) {
-                out.writeTriple(pList, RDF.REST, cList);
-            }
-            // adds value to the current iter
-            Optional<Value> isValue = buildNode(fileURI, listIter.next(), out);
-            out.writeTriple(cList, RDF.FIRST, isValue.orElse(RDF.NIL));
-            // makes current node the previuos one and generate new current 
node
-            pList = cList;
-            cList = YAMLExtractor.this.makeUri();
-        }
-        out.writeTriple(pList, RDF.REST, RDF.NIL);
-
-        return node;
-    }
-    
-    private Value processString(String str) {
-        if (RDFUtils.isAbsoluteIRI(str)) {
-            return RDFUtils.iri(str);
-        } else {
-            return RDFUtils.literal(str);
-        }
-    }
-
-    private Resource makeUri() {
-        Resource bnode = RDFUtils.bnode(Integer.toString(nodeId));
-        nodeId++;
-        return bnode;
-    }
-
-    private Resource makeUri(IRI docUri) {
-        return makeUri("node", docUri);
-}
-
-    private Resource makeUri(String type, IRI docUri) {
-        return makeUri(type, docUri, true);
-    }
-
-    private Resource makeUri(String type, IRI docUri, boolean addId) {
-
-        // preprocess string: converts - -> _
-        //                    converts <space>: word1 word2 -> word1Word2
-        String newType = StringUtils.implementJavaNaming(type);
-
-        String uriString;
-        if (docUri.toString().endsWith("/")) {
-            uriString = docUri.toString() + newType;
-        } else {
-            uriString = docUri.toString() + "#" + newType;
-        }
-
-        if (addId) {
-            uriString = uriString + "_" + Integer.toString(nodeId);
-        }
-
-        Resource node = RDFUtils.iri(uriString);
-        if (addId) {
-            nodeId++;
-        }
-        return node;
-    }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java
 
b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java
new file mode 100644
index 0000000..6c113b1
--- /dev/null
+++ 
b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLProcessorException.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2017 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.extractor.yaml;
+
+import javax.annotation.Nullable;
+
+/**
+ *
+ * @author Jacek Grzebyta (grzebyta.dev [at] gmail.com)
+ */
+public class YAMLProcessorException extends RuntimeException {
+    
+    @Nullable
+    private Object reason;
+
+    public Object getReason() {
+        return reason;
+    }
+
+    public YAMLProcessorException() {
+        super();
+    }
+
+    public YAMLProcessorException(Object reason) {
+        this.reason = reason;
+    }
+
+    public YAMLProcessorException(Object reason, String message) {
+        super(message);
+        this.reason = reason;
+    }
+
+    public YAMLProcessorException(Object reason, String message, Throwable 
cause) {
+        super(message, cause);
+        this.reason = reason;
+    }
+
+    public YAMLProcessorException(Object reason, Throwable cause) {
+        super(cause);
+        this.reason = reason;
+    }
+
+    public YAMLProcessorException(Object reason, String message, Throwable 
cause, boolean enableSuppression, boolean writableStackTrace) {
+        super(message, cause, enableSuppression, writableStackTrace);
+        this.reason = reason;
+    }
+    
+    
+    
+    
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java 
b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
index 57588e9..9f6d01e 100644
--- a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
@@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory;
 /**
  * Test {@link YAMLExtractor}.
  *
- * @author Jacek Grzebyta (grzebyta.dev [at] gmail.com)
+ * @author Jacek Grzebyta (jgrzebyta [at] apache [dot] org)
  */
 public class YAMLExtractorTest extends AbstractExtractorTestCase {
 
@@ -46,6 +46,17 @@ public class YAMLExtractorTest extends 
AbstractExtractorTestCase {
         return new YAMLExtractorFactory();
     }
 
+    /**
+     * Test to validate simple yaml file for ANY23-312
+     */
+    @Test
+    public void simpleTest312()
+            throws Exception {
+        assertExtract("/org/apache/any23/extractor/yaml/simple-312.yml");
+        log.debug(dumpModelToTurtle());
+        assertModelNotEmpty();
+    }
+
     @Test
     public void simpleFileLoading()
             throws Exception {
@@ -89,24 +100,28 @@ public class YAMLExtractorTest extends 
AbstractExtractorTestCase {
         assertExtract("/org/apache/any23/extractor/yaml/test-null.yml");
         log.debug(dumpModelToTurtle());
         assertModelNotEmpty();
+        /**
+         * Since the map is empty it should not contain any document marked 
type mapping.
+         */
+        assertNotContains(RDF.TYPE, vocab.mapping); 
         int statements = dumpAsListOfStatements().size();
-        Assert.assertTrue("Found " + statements + " statements",statements == 
9);
+        Assert.assertTrue("Found " + statements + " statements", statements == 
9);
     }
-    
+
     @Test
     public void treeTest() throws Exception {
         assertExtract("/org/apache/any23/extractor/yaml/tree.yml");
         log.debug(dumpModelToTurtle());
         assertModelNotEmpty();
         // validate part of the tree structure
-        assertContainsModel(new Statement[] {
+        assertContainsModel(new Statement[]{
             RDFUtils.triple(RDFUtils.bnode(), RDFUtils.iri(ns, "value3"), 
RDFUtils.bnode("10")),
             RDFUtils.triple(RDFUtils.bnode("10"), RDF.FIRST, 
RDFUtils.bnode("11")),
             RDFUtils.triple(RDFUtils.bnode("11"), RDFUtils.iri(ns, "key3.1"), 
RDFUtils.bnode("12")),
             RDFUtils.triple(RDFUtils.bnode("12"), RDF.TYPE, RDF.LIST),
-            RDFUtils.triple(RDFUtils.bnode("12"), RDF.FIRST, 
RDFUtils.literal("value3.1.1" ))
+            RDFUtils.triple(RDFUtils.bnode("12"), RDF.FIRST, 
RDFUtils.literal("value3.1.1"))
         });
-        
+
         // validate occurence of <urn:value1> resource
         assertContains(RDFUtils.triple(RDFUtils.bnode(), RDF.FIRST, 
RDFUtils.iri("urn:value1")));
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/657912b3/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml
----------------------------------------------------------------------
diff --git 
a/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml
 
b/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml
new file mode 100644
index 0000000..ecf7768
--- /dev/null
+++ 
b/test-resources/src/test/resources/org/apache/any23/extractor/yaml/simple-312.yml
@@ -0,0 +1,4 @@
+%YAML 1.2
+---
+Some test
+is fast
\ No newline at end of file

Reply via email to