Repository: any23 Updated Branches: refs/heads/master 0aa3d54c4 -> 13d04c742
ANY23-410 fix microdata itemrefs Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/13d04c74 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/13d04c74 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/13d04c74 Branch: refs/heads/master Commit: 13d04c7426b10c7bf982dacfc5cfd2bee2385b0e Parents: 0aa3d54 Author: Hans <[email protected]> Authored: Thu Oct 25 17:45:09 2018 -0500 Committer: Hans <[email protected]> Committed: Thu Oct 25 17:45:09 2018 -0500 ---------------------------------------------------------------------- .../extractor/microdata/MicrodataParser.java | 23 +++++++++----------- .../microdata/MicrodataParserTest.java | 17 ++++++--------- .../microdata-itemref-expected.properties | 18 ++++++--------- .../resources/microdata/microdata-itemref.html | 6 ++--- .../microdata/microdata-json-serialization.json | 2 +- .../microdata/microdata-nested-expected.nquads | 4 ++++ .../microdata-nested-expected.properties | 2 +- 7 files changed, 32 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java index 95fd94b..f59bbdb 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java @@ -62,12 +62,12 @@ public class MicrodataParser { /** * This set holds the name of properties being dereferenced. - * The {@link #deferProperties(Node, String...)} checks first if the + * The {@link #deferProperties(String...)} checks first if the * required dereference has been already asked, if so raises * a loop detection error. This map works in coordination * with {@link #dereferenceRecursionCounter}, so that at the end of - * {@link #deferProperties(Node, String...)} call recursion the - * {@link #loopDetectorSet} can be cleaned up. + * {@link #deferProperties(String...)} call recursion the + * loopDetectorSet can be cleaned up. */ private final Set<String> loopDetectorSet = new HashSet<>(); @@ -82,7 +82,7 @@ public class MicrodataParser { private final Map<Node, ItemPropValue> itemPropValues = new HashMap<>(); /** - * Counts the recursive call of {@link #deferProperties(Node, String...)}. + * Counts the recursive call of {@link #deferProperties(String...)}. * It helps to cleanup the {@link #loopDetectorSet} when recursion ends. */ private int dereferenceRecursionCounter = 0; @@ -495,12 +495,12 @@ public class MicrodataParser { * Given a document and a list of <b>itemprop</b> names this method will return * such <b>itemprops</b>. * - * @param node a {@link org.w3c.dom.Node} to which the refs belong * @param refs list of references. * @return list of retrieved <b>itemprop</b>s. * @throws MicrodataParserException if a loop is detected or a property name is missing. */ - public ItemProp[] deferProperties(Node node, String... refs) throws MicrodataParserException { + public ItemProp[] deferProperties(String... refs) throws MicrodataParserException { + Document document = this.document; dereferenceRecursionCounter++; final List<ItemProp> result = new ArrayList<>(); try { @@ -515,17 +515,14 @@ public class MicrodataParser { ); } loopDetectorSet.add(ref); - Element element = (Element) node; + Element element = document.getElementById(ref); if (element == null) { manageError( new MicrodataParserException( String.format("Unknown itemProp id '%s'", ref ), null ) ); continue; } - List<ItemProp> propList = getItemProps(element, false); - if (!result.containsAll(propList)) { - result.addAll(propList); - } + result.addAll(getItemProps(element, false)); } } catch (MicrodataParserException mpe) { if(dereferenceRecursionCounter == 1) @@ -558,10 +555,10 @@ public class MicrodataParser { final String itemId = DomUtils.readAttribute(node, "itemid" , null); final List<ItemProp> itemProps = getItemProps(node, true); - final String[] itemrefIDs = itemref == null ? new String[0] : itemref.split(" "); + final String[] itemrefIDs = itemref == null ? new String[0] : itemref.split("\\s+"); final ItemProp[] deferredProperties; try { - deferredProperties = deferProperties(node, itemrefIDs); + deferredProperties = deferProperties(itemrefIDs); } catch (MicrodataParserException mpe) { mpe.setErrorNode(node); throw mpe; http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java index c58a92b..8b2addd 100644 --- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java +++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java @@ -30,16 +30,13 @@ import java.util.GregorianCalendar; import java.util.List; import java.util.Properties; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.any23.extractor.html.TagSoupParser; import org.apache.any23.util.StreamUtils; import org.apache.commons.io.IOUtils; import org.junit.Assert; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; @@ -163,8 +160,8 @@ public class MicrodataParserTest { public void testDeferProperties() throws IOException, MicrodataParserException { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); - final ItemProp[] deferred = parser.deferProperties(document.getElementById("is2"), "ip5", "ip4", "ip3", "unexisting"); - Assert.assertEquals(2, deferred.length); + final ItemProp[] deferred = parser.deferProperties("ip5", "ip4", "ip3", "unexisting"); + Assert.assertEquals(3, deferred.length); } /** @@ -178,7 +175,7 @@ public class MicrodataParserTest { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR); - parser.deferProperties(null, "loop0"); + parser.deferProperties("loop0"); } /** @@ -192,7 +189,7 @@ public class MicrodataParserTest { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR); - parser.deferProperties(null, "loop2"); + parser.deferProperties("loop2"); } /** @@ -207,9 +204,9 @@ public class MicrodataParserTest { final Document document = getMicrodataDom("microdata-itemref"); final MicrodataParser parser = new MicrodataParser(document); String ip1 = "ip1"; - Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length); - Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length); - Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length); + Assert.assertEquals(1, parser.deferProperties(ip1).length); + Assert.assertEquals(1, parser.deferProperties(ip1).length); + Assert.assertEquals(1, parser.deferProperties(ip1).length); } private Document getDom(String document) throws IOException { http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties index 843e957..aff6bf9 100644 --- a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties +++ b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties @@ -16,14 +16,10 @@ # result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]", "id" : "is1", "refs" : [], "type" : "http://type/IScopeType1", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[1]", "name" : "p1", "value" : { "content" : "http://www.domain.org/path/1", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[1]", "name" : "p2", "value" : { "content" : "Some Text 1", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } } ] } -result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } } ] } -result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] } -result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } } ] } -error0={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 } -error1={ "message" : "Duplicated deferred itemProp 'p6'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 } -error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : 36, "begin_col" : 7, "end_row" : 36, "end_col" : 65 } -error3={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "begin_row" : 37, "begin_col" : 7, "end_row" : 37, "end_col" : 65 } -error4={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop3' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "begin_row" : 39, "begin_col" : 7, "end_row" : 39, "end_col" : 65 } -error5={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop4' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "begin_row" : 40, "begin_col" : 7, "end_row" : 40, "end_col" : 65 } -error6={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : 41, "begin_col" : 7, "end_row" : 41, "end_col" : 65 } -error7={ "message" : "Duplicated deferred itemProp 'prop'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "begin_row" : 48, "begin_col" : 7, "end_row" : 48, "end_col" : 54 } \ No newline at end of file +result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } } ] } +result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "con tent" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] } +result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via parent, before.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[3]", "name" : "prop", "value" : { "content" : "Included via parent, after.", "type" : "Plain" } } ] } +error0={ "message" : "Unknown itemProp id 'unexisting'", "path" : "null", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 } +error1={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 } +error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 } +error3={ "message" : "Loop detected with depth 3 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/test-resources/src/test/resources/microdata/microdata-itemref.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-itemref.html b/test-resources/src/test/resources/microdata/microdata-itemref.html index f8ff990..bf91376 100644 --- a/test-resources/src/test/resources/microdata/microdata-itemref.html +++ b/test-resources/src/test/resources/microdata/microdata-itemref.html @@ -43,13 +43,11 @@ <!-- Inside - Out Nesting. --> <div id="insideOut"> - <div id="idBefore" itemprop="prop">Included via parent, - before.</div> + <div id="idBefore" itemprop="prop">Included via parent, before.</div> <div id="idItem" itemscope itemref="insideOut"> <div id="ioChild" itemprop="prop">Included via tree.</div> </div> - <div id="idAfter" itemprop="prop">Included via parent, - after.</div> + <div id="idAfter" itemprop="prop">Included via parent, after.</div> </div> </div> </body> http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/test-resources/src/test/resources/microdata/microdata-json-serialization.json ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-json-serialization.json b/test-resources/src/test/resources/microdata/microdata-json-serialization.json index 04d1180..10a61a5 100644 --- a/test-resources/src/test/resources/microdata/microdata-json-serialization.json +++ b/test-resources/src/test/resources/microdata/microdata-json-serialization.json @@ -1 +1 @@ -{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } } ] }] } \ No newline at end of file +{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } } ] }, "type" : "Nested" } } ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/ DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } } ] }] } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads index 2193dc8..6435ec4 100644 --- a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads +++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads @@ -15,6 +15,10 @@ # limitations under the License. # +_:node295195eb5d5124e03da26bafc7313bc <http://schema.org/name> "Amanda" <http://bob.example.com/> . +_:node1cqmii7qqx3 <http://schema.org/size> "12" <http://bob.example.com/> . +_:node1cqmii7qqx3 <http://schema.org/name> "Jazz Band" <http://bob.example.com/> . +_:node295195eb5d5124e03da26bafc7313bc <http://schema.org/band> _:node1cqmii7qqx3 <http://bob.example.com/> . <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node295195eb5d5124e03da26bafc7313bc <http://bob.example.com/> . _:node3ecb85b37ebfd65a5d57ab82374a5 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Movie> <http://bob.example.com/> . _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://bob.example.com/> . http://git-wip-us.apache.org/repos/asf/any23/blob/13d04c74/test-resources/src/test/resources/microdata/microdata-nested-expected.properties ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties index 4166db1..73129f6 100644 --- a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties +++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties @@ -15,5 +15,5 @@ # limitations under the License. # -result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] } +result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } } ] }, "type" : "Nested" } } ] } result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } } ] } \ No newline at end of file
