Repository: any23 Updated Branches: refs/heads/master ebcaaaecb -> 8f0db89ea
ANY23-167 fixed: parent itemscopes inherited all properties from child itemscopes Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/8f0db89e Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/8f0db89e Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/8f0db89e Branch: refs/heads/master Commit: 8f0db89ea1c4ca1c9c59d740b5ee4fe687957e5a Parents: ebcaaae Author: Hans <[email protected]> Authored: Thu Apr 12 18:12:02 2018 -0500 Committer: Hans <[email protected]> Committed: Thu Apr 12 18:12:02 2018 -0500 ---------------------------------------------------------------------- .../any23/extractor/microdata/MicrodataExtractor.java | 6 +++--- .../any23/extractor/microdata/MicrodataParser.java | 12 ++++++------ .../5.2.1-non-normative-example-1-expected.nquads | 2 -- .../microdata/microdata-json-serialization.json | 2 +- .../microdata/microdata-nested-expected.nquads | 1 - .../microdata/microdata-nested-expected.properties | 2 +- .../microdata/microdata-richsnippet-expected.nquads | 3 --- 7 files changed, 11 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java index 358a0f9..513ffbb 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java @@ -442,9 +442,9 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor { if (itemScopeType != null) { out.writeTriple(subject, RDF.TYPE, itemScopeType); } - for (String propName : itemScope.getProperties().keySet()) { - List<ItemProp> itemProps = itemScope.getProperties().get(propName); - for (ItemProp itemProp : itemProps) { + for (Map.Entry<String, List<ItemProp>> itemProps : itemScope.getProperties().entrySet()) { + String propName = itemProps.getKey(); + for (ItemProp itemProp : itemProps.getValue()) { try { processProperty( subject, http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java index cf05e35..e54a533 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java @@ -88,7 +88,7 @@ public class MicrodataParser { private ErrorMode errorMode = ErrorMode.FULL_REPORT; /** - * List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FullReport}. + * List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FULL_REPORT}. */ private List<MicrodataParserException> errors = new ArrayList<>(); @@ -379,11 +379,11 @@ public class MicrodataParser { if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) { accepted.add(node); } -// ANY23-131 Nested Microdata are not extracted -// if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { -// // Don't visit descendants of nodes that define a new scope -// return FILTER_REJECT; -// } + + if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) { + // Don't visit descendants of nodes that define a new scope + return FILTER_REJECT; + } } return FILTER_ACCEPT; } http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads index 8409a61..0c1a3ca 100644 --- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads +++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads @@ -16,8 +16,6 @@ # <http://books.example.com/works/45U8QJGZSQKDH8N> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Work> <http://bob.example.com/> . -<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> . -<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/EBOOK> <http://bob.example.com/> . <http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/title> "Just a Geek" <http://bob.example.com/> . <http://books.example.com/products/9780596007683.BOOK> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Expression> <http://bob.example.com/> . <http://books.example.com/products/9780596007683.BOOK> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> . http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/test-resources/src/test/resources/microdata/microdata-json-serialization.json ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-json-serialization.json b/test-resources/src/test/resources/microdata/microdata-json-serialization.json index 6a3beec..04d1180 100644 --- a/test-resources/src/test/resources/microdata/microdata-json-serialization.json +++ b/test-resources/src/test/resources/microdata/microdata-json-serialization.json @@ -1 +1 @@ -{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }] } \ No newline at end of file +{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } } ] }] } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads index 663ad5b..2193dc8 100644 --- a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads +++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads @@ -21,5 +21,4 @@ _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://www.w3.org/1999/02/22-rdf-syntax-n _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://schema.org/name> "James Cameron" <http://bob.example.com/> . _:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/director> _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://bob.example.com/> . _:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/name> "Avatar" <http://bob.example.com/> . -_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/name> "James Cameron" <http://bob.example.com/> . <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node3ecb85b37ebfd65a5d57ab82374a5 <http://bob.example.com/> . \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/test-resources/src/test/resources/microdata/microdata-nested-expected.properties ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties index ca05f33..4166db1 100644 --- a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties +++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties @@ -16,4 +16,4 @@ # result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] } -result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] } \ No newline at end of file +result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } } ] } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/8f0db89e/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads index 73cf794..4f4586b 100644 --- a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads +++ b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads @@ -22,11 +22,8 @@ _:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/locality> "Wa _:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/region> "Georgia" <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/address> _:nodee94f8737ad89876c85bd87156a1eb585 <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/affiliation> "University of Dreams" <http://bob.example.com/> . -_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/street-address> "1234 Peach Drive" <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/name> "John Doe" <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/nickname> "Johnny" <http://bob.example.com/> . -_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/locality> "Warner Robins" <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/title> "graduate research assistant" <http://bob.example.com/> . -_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/region> "Georgia" <http://bob.example.com/> . _:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/url> <http://www.JohnnyD.com> <http://bob.example.com/> . <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node9423934b5f186fd49d90edd31b5625ba <http://bob.example.com/> . \ No newline at end of file
