adding HEvent and HProduct Extractors
Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/cc0dfbe8 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/cc0dfbe8 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/cc0dfbe8 Branch: refs/heads/master Commit: cc0dfbe8127a00fa712c7d2df6785a73c290feae Parents: 1616c17 Author: Nisala <[email protected]> Authored: Mon Jul 20 01:12:27 2015 +0530 Committer: Nisala <[email protected]> Committed: Mon Jul 20 01:12:27 2015 +0530 ---------------------------------------------------------------------- .../java/org/apache/any23/vocab/HEvent.java | 57 ++++++ .../java/org/apache/any23/vocab/HProduct.java | 58 ++++++ .../html/microformats2/HEventExtractor.java | 195 +++++++++++++++++++ .../microformats2/HEventExtractorFactory.java | 57 ++++++ .../html/microformats2/HProductExtractor.java | 153 +++++++++++++++ .../microformats2/HProductExtractorFactory.java | 56 ++++++ .../apache/any23/prefixes/prefixes.properties | 2 + .../html/microformats2/HEventExtractorTest.java | 37 ++++ .../microformats2/HProductExtractorTest.java | 37 ++++ .../microformats2/h-event/h-event-test.html | 36 ++++ .../microformats2/h-product/h-product-test.html | 36 ++++ 11 files changed, 724 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HEvent.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/any23/vocab/HEvent.java b/api/src/main/java/org/apache/any23/vocab/HEvent.java new file mode 100644 index 0000000..b936c3e --- /dev/null +++ b/api/src/main/java/org/apache/any23/vocab/HEvent.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.vocab; + +import org.openrdf.model.URI; + +/** + * Vocabulary to map the <a href="http://microformats.org/wiki/h-event">h-event</a> microformat. + * + * @author Nisala Nirmana + */ +public class HEvent extends Vocabulary { + public static final String NS = SINDICE.NS + "hevent/"; + + private static HEvent instance; + + public static HEvent getInstance() { + if(instance == null) { + instance = new HEvent(); + } + return instance; + } + + public URI event = createClass(NS, "Event"); + + + public URI name = createProperty(NS, "name"); + public URI summary = createProperty(NS, "summary"); + public URI start = createProperty(NS, "start"); + public URI end = createProperty(NS, "end"); + public URI duration = createProperty(NS, "duration"); + public URI description = createProperty(NS, "description"); + public URI url = createProperty(NS, "url"); + public URI category = createProperty(NS, "category"); + public URI location = createProperty(NS, "location"); + public URI attendee = createProperty(NS, "attendee"); + + + private HEvent() { + super(NS); + } +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HProduct.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/any23/vocab/HProduct.java b/api/src/main/java/org/apache/any23/vocab/HProduct.java new file mode 100644 index 0000000..9630db3 --- /dev/null +++ b/api/src/main/java/org/apache/any23/vocab/HProduct.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.vocab; + +import org.openrdf.model.URI; + +/** + * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat. + * + * @author Nisala Nirmana + */ + +public class HProduct extends Vocabulary { + public static final String NS = SINDICE.NS + "hproduct/"; + + private static HProduct instance; + + public static HProduct getInstance() { + if(instance == null) { + instance = new HProduct(); + } + return instance; + } + + public URI product = createClass(NS, "Product"); + + + public URI name = createProperty(NS, "name"); + public URI photo = createProperty(NS, "photo"); + public URI brand = createProperty(NS, "brand"); + public URI category = createProperty(NS, "category"); + public URI description = createProperty(NS, "description"); + public URI url = createProperty(NS, "url"); + public URI identifier = createProperty(NS, "identifier"); + public URI price = createProperty(NS, "price"); + public URI review = createProperty(NS, "review"); + + + private HProduct() { + super(NS); + } + +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java new file mode 100644 index 0000000..8ce70a6 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.extractor.ExtractionResult; +import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.TagSoupExtractionResult; +import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor; +import org.apache.any23.vocab.HEvent; +import org.openrdf.model.BNode; +import org.openrdf.model.URI; +import org.openrdf.model.vocabulary.RDF; +import org.w3c.dom.Node; +import org.apache.any23.extractor.html.HTMLDocument; + +import static org.apache.any23.extractor.html.HTMLDocument.TextField; + + +/** + * Extractor for the <a href="http://microformats.org/wiki/hcalendar">hCalendar</a> + * microformat. + * + * @author Nisala Nirmana + */ +public class HEventExtractor extends EntityBasedMicroformatExtractor { + + private static final HEvent vEvent = HEvent.getInstance(); + + private String[] eventFields = { + "name", + "summary", + "start", + "end", + "duration", + "description", + "url", + "category", + "location", //toDO + "attendee" //toDO + }; + + + @Override + public ExtractorDescription getDescription() { + return HEventExtractorFactory.getDescriptionInstance(); + } + + @Override + protected String getBaseClassName() { + return Microformats2Prefixes.CLASS_PREFIX+"event"; + } + + @Override + protected void resetExtractor() { + // Empty. + } + + @Override + protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException { + final BNode event = getBlankNodeFor(node); + conditionallyAddResourceProperty(event, RDF.TYPE, vEvent.event); + final HTMLDocument fragment = new HTMLDocument(node); + addName(fragment, event); + addSummary(fragment, event); + addStart(fragment, event); + addEnd(fragment, event); + addDuration(fragment, event); + addDescription(fragment, event); + addURLs(fragment, event); + addCategories(fragment, event); + addLocation(fragment, event); + + return true; + } + + private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, + URI property) { + HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass); + conditionallyAddStringProperty( + title.source(), recipe, property, title.value() + ); + } + + private void addName(HTMLDocument fragment, BNode event) { + mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX + + eventFields[0], vEvent.name); + } + + private void addSummary(HTMLDocument fragment, BNode event) { + mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX + + eventFields[1], vEvent.summary); + } + + private void addStart(HTMLDocument fragment, BNode event) { + final TextField start = fragment.getSingularTextField( + Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[2]); + if(start.source()==null) + return; + Node attribute = start.source().getAttributes().getNamedItem("datetime"); + if (attribute == null) { + conditionallyAddStringProperty( + start.source(), + event, vEvent.start, start.value() + ); + } else { + conditionallyAddStringProperty( + start.source(), + event, vEvent.start, attribute.getNodeValue() + ); + } + } + + private void addEnd(HTMLDocument fragment, BNode event) { + final TextField end = fragment.getSingularTextField( + Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[3]); + if(end.source()==null) + return; + Node attribute = end.source().getAttributes().getNamedItem("datetime"); + if (attribute == null) { + conditionallyAddStringProperty( + end.source(), + event, vEvent.end, end.value() + ); + } else { + conditionallyAddStringProperty( + end.source(), + event, vEvent.end, attribute.getNodeValue() + ); + } + } + + private void addDuration(HTMLDocument fragment, BNode event) { + final TextField duration = fragment.getSingularTextField( + Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[4]); + if(duration.source()==null) + return; + Node attribute = duration.source().getAttributes().getNamedItem("datetime"); + if (attribute == null) { + conditionallyAddStringProperty( + duration.source(), + event, vEvent.duration, duration.value() + ); + } else { + conditionallyAddStringProperty( + duration.source(), + event, vEvent.duration, attribute.getNodeValue() + ); + } + } + + private void addDescription(HTMLDocument fragment, BNode event) { + mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX + + eventFields[5], vEvent.description); + } + + private void addURLs(HTMLDocument fragment, BNode event) throws ExtractionException { + final HTMLDocument.TextField[] urls = fragment.getPluralUrlField + (Microformats2Prefixes.URL_PROPERTY_PREFIX + eventFields[6]); + for(HTMLDocument.TextField url : urls) { + addURIProperty(event, vEvent.url, fragment.resolveURI(url.value())); + } + } + + private void addCategories(HTMLDocument fragment, BNode event) { + final HTMLDocument.TextField[] categories = fragment.getPluralTextField + (Microformats2Prefixes.PROPERTY_PREFIX + eventFields[7]); + for(HTMLDocument.TextField category : categories) { + conditionallyAddStringProperty( + category.source(), event, vEvent.category, category.value() + ); + } + } + + private void addLocation(HTMLDocument fragment, BNode event) { + mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX + + eventFields[8], vEvent.location); + } + +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java new file mode 100644 index 0000000..602b044 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import java.util.Arrays; + +import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.ExtractorFactory; +import org.apache.any23.extractor.SimpleExtractorFactory; +import org.apache.any23.rdf.PopularPrefixes; +import org.apache.any23.rdf.Prefixes; + +/** + * @author Peter Ansell [email protected] + * + */ +public class HEventExtractorFactory extends SimpleExtractorFactory<HEventExtractor> implements + ExtractorFactory<HEventExtractor> { + + public static final String NAME = "html-mf2-h-event"; + + public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hevent"); + + private static final ExtractorDescription descriptionInstance = new HEventExtractorFactory(); + + public HEventExtractorFactory() { + super( + HEventExtractorFactory.NAME, + HEventExtractorFactory.PREFIXES, + Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), + "example-mf2-h-event.html"); + } + + @Override + public HEventExtractor createExtractor() { + return new HEventExtractor(); + } + + public static ExtractorDescription getDescriptionInstance() { + return descriptionInstance; + } +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java new file mode 100644 index 0000000..0e93935 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.extractor.ExtractionResult; +import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor; +import org.apache.any23.extractor.html.HTMLDocument; +import org.apache.any23.vocab.HProduct; +import org.openrdf.model.BNode; +import org.openrdf.model.URI; +import org.openrdf.model.vocabulary.RDF; +import org.w3c.dom.Node; + +/** + * Extractor for the <a href="http://microformats.org/wiki/h-product">h-product</a> + * microformat. + * + * @author Nisala Nirmana + */ +public class HProductExtractor extends EntityBasedMicroformatExtractor { + + private static final HProduct vProduct = HProduct.getInstance(); + + private static final String[] productFields = { + "name", + "photo", + "brand", //toDo + "category", + "description", + "url", + "identifier", + "review", //toDo + "price" + }; + + @Override + public ExtractorDescription getDescription() { + return HProductExtractorFactory.getDescriptionInstance(); + } + + @Override + protected String getBaseClassName() { + return Microformats2Prefixes.CLASS_PREFIX+"product"; + } + + @Override + protected void resetExtractor() { + // Empty. + } + + @Override + protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException { + final BNode product = getBlankNodeFor(node); + conditionallyAddResourceProperty(product, RDF.TYPE, vProduct.product); + final HTMLDocument fragment = new HTMLDocument(node); + addName(fragment, product); + addPhoto(fragment, product); + addCategories(fragment, product); + addDescription(fragment, product); + addURLs(fragment, product); + addIdentifiers(fragment, product); + addPrice(fragment, product); + return true; + } + + private void mapFieldWithProperty(HTMLDocument fragment, BNode product, String fieldClass, + URI property) { + HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass); + conditionallyAddStringProperty( + title.source(), product, property, title.value() + ); + } + + private void addName(HTMLDocument fragment, BNode product) { + mapFieldWithProperty(fragment, product, Microformats2Prefixes.PROPERTY_PREFIX + + productFields[0], vProduct.name); + } + + private void addPhoto(HTMLDocument fragment, BNode product) throws ExtractionException { + final HTMLDocument.TextField[] photos = fragment.getPluralUrlField + (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[1]); + for(HTMLDocument.TextField photo : photos) { + addURIProperty(product, vProduct.photo, fragment.resolveURI(photo.value())); + } + } + + private void addCategories(HTMLDocument fragment, BNode product) { + final HTMLDocument.TextField[] categories = fragment.getPluralTextField + (Microformats2Prefixes.PROPERTY_PREFIX + productFields[3]); + for(HTMLDocument.TextField category : categories) { + conditionallyAddStringProperty( + category.source(), product, vProduct.category, category.value() + ); + } + } + + private void addDescription(HTMLDocument fragment, BNode product) { + mapFieldWithProperty(fragment, product, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX + + productFields[4], vProduct.description); + } + + private void addURLs(HTMLDocument fragment, BNode product) throws ExtractionException { + final HTMLDocument.TextField[] urls = fragment.getPluralUrlField + (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[5]); + for(HTMLDocument.TextField url : urls) { + addURIProperty(product, vProduct.url, fragment.resolveURI(url.value())); + } + } + + private void addIdentifiers(HTMLDocument fragment, BNode product) throws ExtractionException { + final HTMLDocument.TextField[] identifiers = fragment.getPluralUrlField + (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[6]); + for(HTMLDocument.TextField identifier :identifiers) { + addURIProperty(product, vProduct.identifier, fragment.resolveURI(identifier.value())); + } + } + + private void addPrice(HTMLDocument fragment, BNode product) { + final HTMLDocument.TextField price = fragment.getSingularTextField( + Microformats2Prefixes.PROPERTY_PREFIX + productFields[8]); + if(price.source()==null) + return; + Node attribute = price.source().getAttributes().getNamedItem("value"); + if (attribute == null) { + conditionallyAddStringProperty( + price.source(), + product, vProduct.price, price.value() + ); + } else { + conditionallyAddStringProperty( + price.source(), + product, vProduct.price, attribute.getNodeValue() + ); + } + } +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java new file mode 100644 index 0000000..f4b65d9 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.ExtractorFactory; +import org.apache.any23.extractor.SimpleExtractorFactory; +import org.apache.any23.rdf.PopularPrefixes; +import org.apache.any23.rdf.Prefixes; +import java.util.Arrays; + +/** + * @author Nisala Nirmana + * + */ +public class HProductExtractorFactory extends SimpleExtractorFactory<HProductExtractor> implements + ExtractorFactory<HProductExtractor> { + + public static final String NAME = "html-mf2-h-product"; + + public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hproduct"); + + private static final ExtractorDescription descriptionInstance = new HProductExtractorFactory(); + + public HProductExtractorFactory() { + super( + HProductExtractorFactory.NAME, + HProductExtractorFactory.PREFIXES, + Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), + "example-mf2-h-product.html"); + } + + @Override + public HProductExtractor createExtractor() { + return new HProductExtractor(); + } + + public static ExtractorDescription getDescriptionInstance() { + return descriptionInstance; + } +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties ---------------------------------------------------------------------- diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties index 58516ec..34e3975 100644 --- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties +++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties @@ -32,6 +32,8 @@ ex=http://example.com/ns# wo=http://purl.org/ontology/wo/ skos=http://www.w3.org/2004/02/skos/core# hrecipe=http://sindice.com/hrecipe/ +hevent=http://sindice.com/hevent/ +hproduct=http://sindice.com/hproduct/ sindice=http://vocab.sindice.net/ og=http://opengraphprotocol.org/schema/ fb=http://www.facebook.com/2008/fbml# http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java new file mode 100644 index 0000000..6c13909 --- /dev/null +++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import org.apache.any23.extractor.ExtractorFactory; +import org.apache.any23.extractor.html.AbstractExtractorTestCase; +import org.junit.Test; +import org.openrdf.repository.RepositoryException; +import org.openrdf.rio.RDFHandlerException; + +public class HEventExtractorTest extends AbstractExtractorTestCase { + protected ExtractorFactory<?> getExtractorFactory() { + return new HEventExtractorFactory(); + } + + @Test + public void testModelNotEmpty() throws RepositoryException, RDFHandlerException { + assertExtract("/microformats2/h-event/h-event-test.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 9); + } +} http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java new file mode 100644 index 0000000..3b46a7a --- /dev/null +++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.any23.extractor.html.microformats2; + +import org.apache.any23.extractor.ExtractorFactory; +import org.apache.any23.extractor.html.AbstractExtractorTestCase; +import org.junit.Test; +import org.openrdf.repository.RepositoryException; +import org.openrdf.rio.RDFHandlerException; + +public class HProductExtractorTest extends AbstractExtractorTestCase { + protected ExtractorFactory<?> getExtractorFactory() { + return new HProductExtractorFactory(); + } + + @Test + public void testModelNotEmpty() throws RepositoryException, RDFHandlerException { + assertExtract("/microformats2/h-product/h-product-test.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 11); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-event/h-event-test.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microformats2/h-event/h-event-test.html b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html new file mode 100644 index 0000000..b8af9de --- /dev/null +++ b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html @@ -0,0 +1,36 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!DOCTYPE html> +<html> + +<body> + <!-- Microformats 2 --> + + <div class="h-event"> + <h1 class="p-name">Microformats Meetup</h1> + <a class="u-url" href="http://microformats.org/meetup/">Official event web site</a> + <p>From + <time class="dt-start" datetime="2013-06-30 12:00">30<sup>th</sup> June 2013, 12:00</time> + to <time class="dt-end" datetime="2013-06-30 18:00">18:00</time> + at <span class="p-location">Some bar in SF</span></p> + <p class="p-summary">Get together and discuss all things microformats-related.</p> + <p class="p-description">This <span class="p-category">technical meetup</span> is hosted in aid of discussion related to new draft specification of microformats 2</p> + </div> + +</body> + +</html> http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-product/h-product-test.html ---------------------------------------------------------------------- diff --git a/test-resources/src/test/resources/microformats2/h-product/h-product-test.html b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html new file mode 100644 index 0000000..08ead4f --- /dev/null +++ b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html @@ -0,0 +1,36 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!DOCTYPE html> +<html> + +<body> + <!-- Microformats 2 --> + + <div class="h-product"> + <h1 class="p-name">Microformats For Dummies</h1> + <img class="u-photo" src="http://example.org/mfd.png" alt="" /> + <div class="e-description"> + <p>Want to get started using microformats, but intimidated by hyphens and mediawiki? This <span class="p-category">book</span> + contains everything you need to know!</p> + </div> + <p>Yours today for only <data class="p-price" value="20.00">$20.00</data> + from ACME Publishing inc.</p> + </div> + +</body> + +</html>
