http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java index ad333c4..84b5d88 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java @@ -30,7 +30,6 @@ import org.openrdf.repository.RepositoryException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** * * Reference Test class for the {@link HListingExtractor} extractor. @@ -40,294 +39,315 @@ import org.slf4j.LoggerFactory; */ public class HListingExtractorTest extends AbstractExtractorTestCase { - private static final SINDICE vSINDICE = SINDICE.getInstance(); - private static final HListing vHLISTING = HListing.getInstance(); - private static final FOAF vFOAF = FOAF.getInstance(); - - private static final Logger logger = LoggerFactory.getLogger(HListingExtractorTest.class); - - protected ExtractorFactory<?> getExtractorFactory() { - return new HListingExtractorFactory(); - } - - @Test - public void testNoMicroformats() throws RepositoryException { - assertExtract("/html/html-without-uf.html"); - assertModelEmpty(); - } - - @Test - public void testListingWithouthContent() throws RepositoryException { - assertExtract("/microformats/hlisting/empty.html"); - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 3); - } - - @Test - public void testSingleAction() throws RepositoryException { - assertExtract("/microformats/hlisting/single-action.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - } - - @Test - public void testMultipleActions() throws RepositoryException { - assertExtract("/microformats/hlisting/multiple-actions.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.action, vHLISTING.sell); - } - - @Test - public void testMultipleActionsNested() throws RepositoryException { - assertExtract("/microformats/hlisting/multiple-actions-nested.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.action, vHLISTING.sell); - assertContains(vHLISTING.action, vHLISTING.rent); - } - - @Test - public void testActionsOutside() throws RepositoryException { - assertExtract("/microformats/hlisting/single-action-outside.html"); - assertModelNotEmpty(); - assertNotContains(vHLISTING.action, vHLISTING.offer); - } - - @Test - public void testListerFn() throws RepositoryException { - assertExtract("/microformats/hlisting/actions-lister-fn.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(RDF.TYPE, vHLISTING.Lister); - assertContains(vHLISTING.listerName, "mike"); - } - - @Test - public void testListerFnTel() throws RepositoryException { - assertExtract("/microformats/hlisting/actions-lister-fn-tel.html"); - assertModelNotEmpty(); - - assertContains(vHLISTING.action , vHLISTING.offer); - assertContains(vHLISTING.listerName, "John Broker"); - assertContains(RDF.TYPE, vHLISTING.Lister); - assertContains(vHLISTING.tel, "(110) 555-1212"); - } - - @Test - public void testItemFn() throws RepositoryException { - assertExtract("/microformats/hlisting/item-fn.html"); - assertModelNotEmpty(); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.itemName, "Parking space"); - } - - @Test - public void testItemFnUrl() throws RepositoryException { - assertExtract("/microformats/hlisting/item-fn-url.html"); - assertModelNotEmpty(); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.itemUrl, RDFUtils.uri("http://item.com/")); - assertContains(vHLISTING.itemName, "Parking space"); - } - - @Test - public void testItemPhotoImg() throws RepositoryException { - assertExtract("/microformats/hlisting/item-fn-url-photo-img.html"); - assertModelNotEmpty(); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.itemUrl, RDFUtils.uri("http://item.com/")); - assertContains(vHLISTING.itemName, "Parking space"); - assertContains(vHLISTING.itemPhoto, RDFUtils.uri(baseURI.stringValue() + "photo.jpg")); - } - - @Test - public void testItemPhotoHref() throws RepositoryException { - assertExtract("/microformats/hlisting/item-fn-photo-href.html"); - assertModelNotEmpty(); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.itemName, "Parking space"); - assertContains(vHLISTING.itemPhoto, RDFUtils.uri(baseURI.stringValue() + "pic.jpg")); - } - - @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull") - @Test - public void testKelkoo() throws RepositoryException { - assertExtract("/microformats/hlisting/kelkoo.html"); - assertModelNotEmpty(); - - assertContains(RDF.TYPE, vHLISTING.Listing); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.itemName, "Benq MP622 - DLP Projector - 2700 ANSI lumens - XGA..."); - - assertContains(vHLISTING.description, (Resource) null); - - assertContains(RDF.TYPE, vHLISTING.Lister); - - assertContains(vHLISTING.listerUrl, RDFUtils.uri(baseURI.stringValue() + - "m-4621623-pc-world-business.html")); - assertContains(vHLISTING.listerOrg, "PC World Business"); - - assertContains(vHLISTING.listerLogo, RDFUtils.uri(baseURI.stringValue() + - "data/merchantlogos/4621623/pcworld.gif")); - - assertContains(vHLISTING.listerName, "PC World Business"); - - assertContains(vHLISTING.itemPhoto, - RDFUtils.uri("http://img.kelkoo.com/uk/medium/675/496/00117250662929509422269096808645163496675.jpg")); - - assertContains(vHLISTING.price, "\u00A3480.17"); - } - - @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull") - @Test - public void testKelkooFull() throws RepositoryException { - assertExtract("/microformats/hlisting/kelkoo-full.html"); - assertModelNotEmpty(); - assertContains(RDF.TYPE, vHLISTING.Listing); - assertContains(RDF.TYPE, vHLISTING.Item); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.itemUrl, RDFUtils.uri("http://bob.example.com/")); - assertContains(RDF.TYPE, vHLISTING.Lister); - - assertContains(vHLISTING.itemName, "Hanro Touch Feeling Shape Bodysuit Underwear"); - assertContains(vHLISTING.itemName, "Spanx Slim Cognito - Shaping Mid-Thigh Bodysuit"); - assertContains(vHLISTING.itemName, "Spanx Spanx Slim Cognito High Leg Shaping..."); - - assertContains(vHLISTING.itemPhoto, - RDFUtils.uri("http://img.kelkoo.com/uk/medium/657/449/00162475823966154731749844283942320449657.jpg")); - assertContains(vHLISTING.itemPhoto, - RDFUtils.uri("http://img.kelkoo.com/uk/medium/545/091/00154244199719224091151116421737036091545.jpg")); - assertContains(vHLISTING.itemPhoto, - RDFUtils.uri("http://img.kelkoo.com/uk/medium/018/426/00156227992563192632349212375692442426018.jpg")); - - - assertContains(vHLISTING.listerLogo, - RDFUtils.uri("http://bob.example.com/data/merchantlogos/6957423/socksfox.gif")); - assertContains(vHLISTING.listerLogo, - RDFUtils.uri("http://bob.example.com/data/merchantlogos/3590723/mytightsnew.gif")); - assertContains(vHLISTING.listerLogo, - RDFUtils.uri("http://bob.example.com/data/merchantlogos/2977501/pleaseonlinelogo88x311.gif")); - - - assertContains(vHLISTING.listerName, "Socks Fox"); - assertContains(vHLISTING.listerName, "My Tights"); - assertContains(vHLISTING.listerName, "Tightsplease"); - - - assertContains(vHLISTING.listerOrg, "Socks Fox"); - assertContains(vHLISTING.listerOrg, "My Tights"); - assertContains(vHLISTING.listerName, "Tightsplease"); - - assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://bob.example.com/m-6957423-socks-fox.html")); - assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://bob.example.com/m-3590723-my-tights.html")); - assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://bob.example.com/m-2977501-tightsplease.html")); - - assertContains(vHLISTING.price, "\u00A380"); - assertContains(vHLISTING.price, "\u00A347.95"); - assertContains(vHLISTING.price, "\u00A354.99"); - } - - @Test - public void testListerURL() throws RepositoryException { - assertExtract("/microformats/hlisting/actions-lister-url.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.listerName, "John Broker"); - assertContains(RDF.TYPE, vHLISTING.Lister); - assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://homepage.com")); - } - - @Test - public void testListerEmail() throws RepositoryException { - assertExtract("/microformats/hlisting/actions-lister-email.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.listerName, "John Broker"); - assertContains(RDF.TYPE, vHLISTING.Lister); - assertContains(vFOAF.mbox, RDFUtils.uri("mailto:[email protected]")); - } - - @Test - public void testListerEmailHref() throws RepositoryException { - assertExtract("/microformats/hlisting/actions-lister-email-href.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.action, vHLISTING.offer); - assertContains(RDF.TYPE, vHLISTING.Lister); - assertContains(vHLISTING.listerName, "John Broker"); - assertContains(vFOAF.mbox, RDFUtils.uri("mailto:[email protected]")); - } - - @Test - public void testDtListed() throws RepositoryException { - assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); - assertModelNotEmpty(); - assertNotContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.dtlisted, "2006-02-02"); - } - - @Test - public void testDtExpired() throws RepositoryException { - assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); - assertModelNotEmpty(); - assertNotContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.dtexpired, "2006-04-01"); - } - - @Test - public void testSummary() throws RepositoryException { - assertExtract("/microformats/hlisting/summary.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.summary, "summary stuff"); - } - - @Test - public void testDtListedAndExpired() throws RepositoryException { - assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); - assertModelNotEmpty(); - assertNotContains(vHLISTING.action, vHLISTING.offer); - assertContains(vHLISTING.dtlisted, "2006-02-02"); - assertContains(vHLISTING.dtexpired, "2006-04-01"); - } - - @Test - public void testPrice() throws RepositoryException { - assertExtract("/microformats/hlisting/price.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.price, "$215/qtr"); - } - - @Test - public void testPriceAndDt() throws RepositoryException { - assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.price, "$215/qtr"); - assertContains(vHLISTING.dtlisted, "2006-02-02"); - assertContains(vHLISTING.dtexpired, "2006-04-01"); - } - - @Test - public void testPermalink() throws RepositoryException { - assertExtract("/microformats/hlisting/summary-bookmark.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.permalink, "http://livre.com/book"); - assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://livre.com/author")); - } - - @Test - public void testComplexDescription() throws RepositoryException { - assertExtract("/microformats/hlisting/description-complex.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.description, - "BenQ today introduced two new additions to its renowned bus... + Show details"); - } - - @Test - public void testDescription() throws RepositoryException { - assertExtract("/microformats/hlisting/description.html"); - assertModelNotEmpty(); - assertContains(vHLISTING.description, "bla bla bla"); - } + private static final SINDICE vSINDICE = SINDICE.getInstance(); + private static final HListing vHLISTING = HListing.getInstance(); + private static final FOAF vFOAF = FOAF.getInstance(); + + private static final Logger logger = LoggerFactory + .getLogger(HListingExtractorTest.class); + + protected ExtractorFactory<?> getExtractorFactory() { + return new HListingExtractorFactory(); + } + + @Test + public void testNoMicroformats() throws Exception { + assertExtract("/html/html-without-uf.html"); + assertModelEmpty(); + } + + @Test + public void testListingWithouthContent() throws Exception { + assertExtract("/microformats/hlisting/empty.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 3); + } + + @Test + public void testSingleAction() throws Exception { + assertExtract("/microformats/hlisting/single-action.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + } + + @Test + public void testMultipleActions() throws Exception { + assertExtract("/microformats/hlisting/multiple-actions.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.action, vHLISTING.sell); + } + + @Test + public void testMultipleActionsNested() throws Exception { + assertExtract("/microformats/hlisting/multiple-actions-nested.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.action, vHLISTING.sell); + assertContains(vHLISTING.action, vHLISTING.rent); + } + + @Test + public void testActionsOutside() throws Exception { + assertExtract("/microformats/hlisting/single-action-outside.html"); + assertModelNotEmpty(); + assertNotContains(vHLISTING.action, vHLISTING.offer); + } + + @Test + public void testListerFn() throws Exception { + assertExtract("/microformats/hlisting/actions-lister-fn.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(RDF.TYPE, vHLISTING.Lister); + assertContains(vHLISTING.listerName, "mike"); + } + + @Test + public void testListerFnTel() throws Exception { + assertExtract("/microformats/hlisting/actions-lister-fn-tel.html"); + assertModelNotEmpty(); + + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.listerName, "John Broker"); + assertContains(RDF.TYPE, vHLISTING.Lister); + assertContains(vHLISTING.tel, "(110) 555-1212"); + } + + @Test + public void testItemFn() throws Exception { + assertExtract("/microformats/hlisting/item-fn.html"); + assertModelNotEmpty(); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.itemName, "Parking space"); + } + + @Test + public void testItemFnUrl() throws Exception { + assertExtract("/microformats/hlisting/item-fn-url.html"); + assertModelNotEmpty(); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.itemUrl, RDFUtils.uri("http://item.com/")); + assertContains(vHLISTING.itemName, "Parking space"); + } + + @Test + public void testItemPhotoImg() throws Exception { + assertExtract("/microformats/hlisting/item-fn-url-photo-img.html"); + assertModelNotEmpty(); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.itemUrl, RDFUtils.uri("http://item.com/")); + assertContains(vHLISTING.itemName, "Parking space"); + assertContains(vHLISTING.itemPhoto, + RDFUtils.uri(baseURI.stringValue() + "photo.jpg")); + } + + @Test + public void testItemPhotoHref() throws Exception { + assertExtract("/microformats/hlisting/item-fn-photo-href.html"); + assertModelNotEmpty(); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.itemName, "Parking space"); + assertContains(vHLISTING.itemPhoto, + RDFUtils.uri(baseURI.stringValue() + "pic.jpg")); + } + + @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull") + @Test + public void testKelkoo() throws Exception { + assertExtract("/microformats/hlisting/kelkoo.html"); + assertModelNotEmpty(); + + assertContains(RDF.TYPE, vHLISTING.Listing); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.itemName, + "Benq MP622 - DLP Projector - 2700 ANSI lumens - XGA..."); + + assertContains(vHLISTING.description, (Resource) null); + + assertContains(RDF.TYPE, vHLISTING.Lister); + + assertContains( + vHLISTING.listerUrl, + RDFUtils.uri(baseURI.stringValue() + + "m-4621623-pc-world-business.html")); + assertContains(vHLISTING.listerOrg, "PC World Business"); + + assertContains( + vHLISTING.listerLogo, + RDFUtils.uri(baseURI.stringValue() + + "data/merchantlogos/4621623/pcworld.gif")); + + assertContains(vHLISTING.listerName, "PC World Business"); + + assertContains( + vHLISTING.itemPhoto, + RDFUtils.uri("http://img.kelkoo.com/uk/medium/675/496/00117250662929509422269096808645163496675.jpg")); + + assertContains(vHLISTING.price, "\u00A3480.17"); + } + + @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull") + @Test + public void testKelkooFull() throws Exception { + assertExtract("/microformats/hlisting/kelkoo-full.html"); + assertModelNotEmpty(); + assertContains(RDF.TYPE, vHLISTING.Listing); + assertContains(RDF.TYPE, vHLISTING.Item); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.itemUrl, + RDFUtils.uri("http://bob.example.com/")); + assertContains(RDF.TYPE, vHLISTING.Lister); + + assertContains(vHLISTING.itemName, + "Hanro Touch Feeling Shape Bodysuit Underwear"); + assertContains(vHLISTING.itemName, + "Spanx Slim Cognito - Shaping Mid-Thigh Bodysuit"); + assertContains(vHLISTING.itemName, + "Spanx Spanx Slim Cognito High Leg Shaping..."); + + assertContains( + vHLISTING.itemPhoto, + RDFUtils.uri("http://img.kelkoo.com/uk/medium/657/449/00162475823966154731749844283942320449657.jpg")); + assertContains( + vHLISTING.itemPhoto, + RDFUtils.uri("http://img.kelkoo.com/uk/medium/545/091/00154244199719224091151116421737036091545.jpg")); + assertContains( + vHLISTING.itemPhoto, + RDFUtils.uri("http://img.kelkoo.com/uk/medium/018/426/00156227992563192632349212375692442426018.jpg")); + + assertContains( + vHLISTING.listerLogo, + RDFUtils.uri("http://bob.example.com/data/merchantlogos/6957423/socksfox.gif")); + assertContains( + vHLISTING.listerLogo, + RDFUtils.uri("http://bob.example.com/data/merchantlogos/3590723/mytightsnew.gif")); + assertContains( + vHLISTING.listerLogo, + RDFUtils.uri("http://bob.example.com/data/merchantlogos/2977501/pleaseonlinelogo88x311.gif")); + + assertContains(vHLISTING.listerName, "Socks Fox"); + assertContains(vHLISTING.listerName, "My Tights"); + assertContains(vHLISTING.listerName, "Tightsplease"); + + assertContains(vHLISTING.listerOrg, "Socks Fox"); + assertContains(vHLISTING.listerOrg, "My Tights"); + assertContains(vHLISTING.listerName, "Tightsplease"); + + assertContains(vHLISTING.listerUrl, + RDFUtils.uri("http://bob.example.com/m-6957423-socks-fox.html")); + assertContains(vHLISTING.listerUrl, + RDFUtils.uri("http://bob.example.com/m-3590723-my-tights.html")); + assertContains( + vHLISTING.listerUrl, + RDFUtils.uri("http://bob.example.com/m-2977501-tightsplease.html")); + + assertContains(vHLISTING.price, "\u00A380"); + assertContains(vHLISTING.price, "\u00A347.95"); + assertContains(vHLISTING.price, "\u00A354.99"); + } + + @Test + public void testListerURL() throws Exception { + assertExtract("/microformats/hlisting/actions-lister-url.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.listerName, "John Broker"); + assertContains(RDF.TYPE, vHLISTING.Lister); + assertContains(vHLISTING.listerUrl, RDFUtils.uri("http://homepage.com")); + } + + @Test + public void testListerEmail() throws Exception { + assertExtract("/microformats/hlisting/actions-lister-email.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.listerName, "John Broker"); + assertContains(RDF.TYPE, vHLISTING.Lister); + assertContains(vFOAF.mbox, RDFUtils.uri("mailto:[email protected]")); + } + + @Test + public void testListerEmailHref() throws Exception { + assertExtract("/microformats/hlisting/actions-lister-email-href.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.action, vHLISTING.offer); + assertContains(RDF.TYPE, vHLISTING.Lister); + assertContains(vHLISTING.listerName, "John Broker"); + assertContains(vFOAF.mbox, RDFUtils.uri("mailto:[email protected]")); + } + + @Test + public void testDtListed() throws Exception { + assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); + assertModelNotEmpty(); + assertNotContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.dtlisted, "2006-02-02"); + } + + @Test + public void testDtExpired() throws Exception { + assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); + assertModelNotEmpty(); + assertNotContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.dtexpired, "2006-04-01"); + } + + @Test + public void testSummary() throws Exception { + assertExtract("/microformats/hlisting/summary.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.summary, "summary stuff"); + } + + @Test + public void testDtListedAndExpired() throws Exception { + assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); + assertModelNotEmpty(); + assertNotContains(vHLISTING.action, vHLISTING.offer); + assertContains(vHLISTING.dtlisted, "2006-02-02"); + assertContains(vHLISTING.dtexpired, "2006-04-01"); + } + + @Test + public void testPrice() throws Exception { + assertExtract("/microformats/hlisting/price.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.price, "$215/qtr"); + } + + @Test + public void testPriceAndDt() throws Exception { + assertExtract("/microformats/hlisting/dtlisted-dtexpired.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.price, "$215/qtr"); + assertContains(vHLISTING.dtlisted, "2006-02-02"); + assertContains(vHLISTING.dtexpired, "2006-04-01"); + } + + @Test + public void testPermalink() throws Exception { + assertExtract("/microformats/hlisting/summary-bookmark.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.permalink, "http://livre.com/book"); + assertContains(vHLISTING.listerUrl, + RDFUtils.uri("http://livre.com/author")); + } + + @Test + public void testComplexDescription() throws Exception { + assertExtract("/microformats/hlisting/description-complex.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.description, + "BenQ today introduced two new additions to its renowned bus... + Show details"); + } + + @Test + public void testDescription() throws Exception { + assertExtract("/microformats/hlisting/description.html"); + assertModelNotEmpty(); + assertContains(vHLISTING.description, "bla bla bla"); + } }
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java index 3ec0653..5a171bc 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java @@ -31,36 +31,36 @@ import org.openrdf.repository.RepositoryException; */ public class HRecipeExtractorTest extends AbstractExtractorTestCase { - private static final SINDICE vSINDICE = SINDICE.getInstance(); - private static final HRecipe vHRECIPE = HRecipe.getInstance(); + private static final SINDICE vSINDICE = SINDICE.getInstance(); + private static final HRecipe vHRECIPE = HRecipe.getInstance(); - @Override - protected ExtractorFactory<?> getExtractorFactory() { - return new HRecipeExtractorFactory(); - } + @Override + protected ExtractorFactory<?> getExtractorFactory() { + return new HRecipeExtractorFactory(); + } - @Test - public void testNoMicroformats() throws RepositoryException { - assertExtract("/html/html-without-uf.html"); - assertModelEmpty(); - } + @Test + public void testNoMicroformats() throws Exception { + assertExtract("/html/html-without-uf.html"); + assertModelEmpty(); + } - @Test - public void testExtraction() throws RepositoryException { - assertExtract("/microformats/hrecipe/01-spec.html"); - assertModelNotEmpty(); - assertStatementsSize(RDF.TYPE, vHRECIPE.Recipe , 1); - assertStatementsSize(RDF.TYPE, vHRECIPE.Ingredient, 3); - assertStatementsSize(RDF.TYPE, vHRECIPE.Duration , 2); - assertStatementsSize(RDF.TYPE, vHRECIPE.Nutrition , 2); - assertStatementsSize(vHRECIPE.fn, (String) null, 1); - assertStatementsSize(vHRECIPE.yield, (String) null, 1); - assertStatementsSize(vHRECIPE.instructions, (String) null, 1); - assertStatementsSize(vHRECIPE.photo, (String) null, 1); - assertStatementsSize(vHRECIPE.summary, (String) null, 1); - assertStatementsSize(vHRECIPE.author, (String) null, 2); - assertStatementsSize(vHRECIPE.published, (String) null, 1); - assertStatementsSize(vHRECIPE.tag, (String) null, 2); - } + @Test + public void testExtraction() throws Exception { + assertExtract("/microformats/hrecipe/01-spec.html"); + assertModelNotEmpty(); + assertStatementsSize(RDF.TYPE, vHRECIPE.Recipe, 1); + assertStatementsSize(RDF.TYPE, vHRECIPE.Ingredient, 3); + assertStatementsSize(RDF.TYPE, vHRECIPE.Duration, 2); + assertStatementsSize(RDF.TYPE, vHRECIPE.Nutrition, 2); + assertStatementsSize(vHRECIPE.fn, (String) null, 1); + assertStatementsSize(vHRECIPE.yield, (String) null, 1); + assertStatementsSize(vHRECIPE.instructions, (String) null, 1); + assertStatementsSize(vHRECIPE.photo, (String) null, 1); + assertStatementsSize(vHRECIPE.summary, (String) null, 1); + assertStatementsSize(vHRECIPE.author, (String) null, 2); + assertStatementsSize(vHRECIPE.published, (String) null, 1); + assertStatementsSize(vHRECIPE.tag, (String) null, 2); + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java index a4287dd..5c1020d 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java @@ -43,130 +43,124 @@ import java.util.Set; */ public class HResumeExtractorTest extends AbstractExtractorTestCase { - private static final SINDICE vSINDICE = SINDICE.getInstance(); - private static final FOAF vFOAF = FOAF.getInstance(); - private static final DOAC vDOAC = DOAC.getInstance(); - private static final VCard vVCARD = VCard.getInstance(); - - private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class); - - protected ExtractorFactory<?> getExtractorFactory() { - return new HResumeExtractorFactory(); - } - - @Test - public void testNoMicroformats() throws RepositoryException { - assertExtract("/html/html-without-uf.html"); - assertModelEmpty(); - } - - @Test - public void testLinkedIn() throws RepositoryException { - assertExtract("/microformats/hresume/steveganz.html"); - assertModelNotEmpty(); - assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); - - Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); - - assertContains(person, vDOAC.summary, (Resource) null); - - assertContains( - person, - vDOAC.summary, - "Steve Ganz is passionate about connecting people,\n" + - "semantic markup, sushi, and disc golf - not necessarily in that order.\n" + - "Currently obsessed with developing the user experience at LinkedIn,\n" + - "Steve is a second generation Silicon Valley geek and a veteran web\n" + - "professional who has been building human-computer interfaces since 1994."); - - - assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null); - - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); - - assertStatementsSize(vDOAC.experience , (Value) null, 7); - assertStatementsSize(vDOAC.education , (Value) null, 2); - assertStatementsSize(vDOAC.affiliation, (Value) null, 8); - } - - @Test - public void testLinkedInComplete() throws RepositoryException { - - assertExtract("/microformats/hresume/steveganz.html"); - assertModelNotEmpty(); - - assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); - - assertStatementsSize(vDOAC.experience , (Value) null, 7 ); - assertStatementsSize(vDOAC.education , (Value) null, 2 ); - assertStatementsSize(vDOAC.affiliation, (Value) null, 8 ); - assertStatementsSize(vDOAC.skill , (Value) null, 17); - - RepositoryResult<Statement> statements = getStatements(null, vDOAC.organization, null); - - Set<String> checkSet = new HashSet<String>(); - - try { - while(statements.hasNext()) { - Statement statement = statements.next(); - checkSet.add(statement.getObject().stringValue()); - logger.debug( statement.getObject().stringValue() ); - } - - } finally { - statements.close(); - } - - String[] names = new String[]{ - "LinkedIn Corporation", - "PayPal, an eBay Company", - "McAfee, Inc.", - "Printable Technologies", - "Collabria, Inc.", - "Self-employed", - "3G Productions", - "Lee Strasberg Theatre and Film\n" + - "\tInstitute", - "Leland High School"}; - - for(String name: names) - Assert.assertTrue(checkSet.contains(name)); - - Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); - assertContains(person, vFOAF.isPrimaryTopicOf, (Value) null); - findExactlyOneObject(person, vFOAF.isPrimaryTopicOf); - } - - @Test - public void testAnt() throws RepositoryException { - assertExtract("/microformats/hresume/ant.html"); - assertModelNotEmpty(); - - assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); - - - Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); - assertContains(person, vDOAC.summary, (Resource) null); - - assertContains( - person, - vDOAC.summary, - "Senior Systems\n Analyst/Developer.\n " + - "Experienced in the analysis, design and\n " + - "implementation of distributed, multi-tier\n " + - "applications using Microsoft\n technologies.\n" + - " Specialising in data capture applications on the\n" + - " Web."); - - - assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null); - - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); - - assertStatementsSize(vDOAC.experience , (Value) null, 16); - assertStatementsSize(vDOAC.education , (Value) null, 2 ); - assertStatementsSize(vDOAC.affiliation, (Value) null, 0 ); - assertStatementsSize(vDOAC.skill , (Value) null, 4 ); - } + private static final SINDICE vSINDICE = SINDICE.getInstance(); + private static final FOAF vFOAF = FOAF.getInstance(); + private static final DOAC vDOAC = DOAC.getInstance(); + private static final VCard vVCARD = VCard.getInstance(); + + private static final Logger logger = LoggerFactory + .getLogger(HReviewExtractorTest.class); + + protected ExtractorFactory<?> getExtractorFactory() { + return new HResumeExtractorFactory(); + } + + @Test + public void testNoMicroformats() throws Exception { + assertExtract("/html/html-without-uf.html"); + assertModelEmpty(); + } + + @Test + public void testLinkedIn() throws Exception { + assertExtract("/microformats/hresume/steveganz.html"); + assertModelNotEmpty(); + assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); + + Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); + + assertContains(person, vDOAC.summary, (Resource) null); + + assertContains( + person, + vDOAC.summary, + "Steve Ganz is passionate about connecting people,\n" + + "semantic markup, sushi, and disc golf - not necessarily in that order.\n" + + "Currently obsessed with developing the user experience at LinkedIn,\n" + + "Steve is a second generation Silicon Valley geek and a veteran web\n" + + "professional who has been building human-computer interfaces since 1994."); + + assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null); + + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + assertStatementsSize(vDOAC.experience, (Value) null, 7); + assertStatementsSize(vDOAC.education, (Value) null, 2); + assertStatementsSize(vDOAC.affiliation, (Value) null, 8); + } + + @Test + public void testLinkedInComplete() throws Exception { + + assertExtract("/microformats/hresume/steveganz.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); + + assertStatementsSize(vDOAC.experience, (Value) null, 7); + assertStatementsSize(vDOAC.education, (Value) null, 2); + assertStatementsSize(vDOAC.affiliation, (Value) null, 8); + assertStatementsSize(vDOAC.skill, (Value) null, 17); + + RepositoryResult<Statement> statements = getStatements(null, + vDOAC.organization, null); + + Set<String> checkSet = new HashSet<String>(); + + try { + while (statements.hasNext()) { + Statement statement = statements.next(); + checkSet.add(statement.getObject().stringValue()); + logger.debug(statement.getObject().stringValue()); + } + + } finally { + statements.close(); + } + + String[] names = new String[] { "LinkedIn Corporation", + "PayPal, an eBay Company", "McAfee, Inc.", + "Printable Technologies", "Collabria, Inc.", "Self-employed", + "3G Productions", + "Lee Strasberg Theatre and Film\n" + "\tInstitute", + "Leland High School" }; + + for (String name : names) + Assert.assertTrue(checkSet.contains(name)); + + Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); + assertContains(person, vFOAF.isPrimaryTopicOf, (Value) null); + findExactlyOneObject(person, vFOAF.isPrimaryTopicOf); + } + + @Test + public void testAnt() throws Exception { + assertExtract("/microformats/hresume/ant.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vFOAF.Person, 1); + + Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person); + assertContains(person, vDOAC.summary, (Resource) null); + + assertContains( + person, + vDOAC.summary, + "Senior Systems\n Analyst/Developer.\n " + + "Experienced in the analysis, design and\n " + + "implementation of distributed, multi-tier\n " + + "applications using Microsoft\n technologies.\n" + + " Specialising in data capture applications on the\n" + + " Web."); + + assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null); + + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + assertStatementsSize(vDOAC.experience, (Value) null, 16); + assertStatementsSize(vDOAC.education, (Value) null, 2); + assertStatementsSize(vDOAC.affiliation, (Value) null, 0); + assertStatementsSize(vDOAC.skill, (Value) null, 4); + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java index bfdfa8a..e078df5 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java @@ -40,289 +40,308 @@ import org.slf4j.LoggerFactory; */ public class HReviewExtractorTest extends AbstractExtractorTestCase { - private static final DCTerms vDCTERMS = DCTerms.getInstance(); - private static final Review vREVIEW = Review.getInstance(); - private static final SINDICE vSINDICE = SINDICE.getInstance(); - private static final VCard vVCARD = VCard.getInstance(); - - private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class); - - protected ExtractorFactory<?> getExtractorFactory() { - return new HReviewExtractorFactory(); - } - - @Test - public void testNoMicroformats() throws RepositoryException { - assertExtract("/html/html-without-uf.html"); - assertModelEmpty(); - } - - @Test - public void test01Basic() throws RepositoryException { - assertExtract("/microformats/hreview/01-spec.html"); - assertModelNotEmpty(); - - assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); - - // reviewer, item - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + private static final DCTerms vDCTERMS = DCTerms.getInstance(); + private static final Review vREVIEW = Review.getInstance(); + private static final SINDICE vSINDICE = SINDICE.getInstance(); + private static final VCard vVCARD = VCard.getInstance(); + + private static final Logger logger = LoggerFactory + .getLogger(HReviewExtractorTest.class); + + protected ExtractorFactory<?> getExtractorFactory() { + return new HReviewExtractorFactory(); + } + + @Test + public void testNoMicroformats() throws Exception { + assertExtract("/html/html-without-uf.html"); + assertModelEmpty(); + } + + @Test + public void test01Basic() throws Exception { + assertExtract("/microformats/hreview/01-spec.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + + // reviewer, item + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + // there is one address in the item vcard + assertStatementsSize(RDF.TYPE, vVCARD.Address, 0); + + RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, + vREVIEW.Review); + + try { + while (reviews.hasNext()) { + + Resource review = reviews.next().getSubject(); + logger.debug(review.stringValue()); + assertContains(review, vREVIEW.rating, "5"); + assertContains(review, vREVIEW.title, + "Crepes on Cole is awesome"); + assertContains(review, vDCTERMS.date, "20050418T2300-0700"); - // there is one address in the item vcard - assertStatementsSize(RDF.TYPE, vVCARD.Address, 0); - - RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review); + assertContains( + vREVIEW.text, + "Crepes on Cole is one of the best little \n" + + " creperies in San Francisco.\n " + + "Excellent food and service. Plenty of tables in a variety of sizes\n" + + " for parties large and small. " + + "Window seating makes for excellent\n " + + "people watching to/from the N-Judah which stops right outside.\n" + + " I've had many fun social gatherings here, as well as gotten\n" + + " plenty of work done thanks to neighborhood WiFi."); + + assertContains(null, vREVIEW.hasReview, review); - try { - while (reviews.hasNext()) { + } + } finally { + reviews.close(); + } + + assertNotContains(vVCARD.locality, null); + assertNotContains(vVCARD.organization_name, null); + + } + + @Test + public void test02RatedTags() throws Exception { + + assertExtract("/microformats/hreview/02-spec-2.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + + // reviewer, item + assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); + assertStatementsSize(vREVIEW.hasReview, (Value) null, 1); + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + // there is one address in the item vcard + assertStatementsSize(RDF.TYPE, vVCARD.Address, 0); + + RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, + vREVIEW.Review); + + try { + while (reviews.hasNext()) { + Resource review = reviews.next().getSubject(); + assertContains(review, vREVIEW.rating, "18"); + assertContains(review, vREVIEW.title, "Cafe Borrone"); + assertContains(review, vDCTERMS.date, "20050428T2130-0700"); + + assertContains( + vREVIEW.text, + "This \n cafe\n " + + "is a welcoming oasis on " + + "the Peninsula.\n " + + "It even has a fountain outside which nearly eliminates\n " + + "the sounds of El Camino traffic. " + + "Next door to a superb indy bookstore,\n " + + "Cafe Borrone is an ideal spot to grab a\n coffee\n or " + + "a meal to accompany a newly purchased book or imported periodical.\n" + + " Soups and\n sandwich\n specials rotate daily. " + + "The corn chowder with croutons and big chunks of cheese\n " + + "goes especially well with a freshly toasted mini-baguette. " + + "Evenings are\n often crowded and may require sharing a table " + + "with a perfect stranger.\n " + + "Espresso\n afficionados will appreciate the\n Illy coffee.\n " + + "Noise levels can vary from peaceful in the late mornings to nearly overwhelming on\n" + + " jazz band nights."); - Resource review = reviews.next().getSubject(); - logger.debug(review.stringValue()); + assertContains(null, vREVIEW.hasReview, review); + assertContains(vREVIEW.type, "business"); - assertContains(review, vREVIEW.rating, "5"); - assertContains(review, vREVIEW.title, "Crepes on Cole is awesome"); - assertContains(review, vDCTERMS.date, "20050418T2300-0700"); + } - assertContains( - vREVIEW.text, - "Crepes on Cole is one of the best little \n" - + " creperies in San Francisco.\n " - + "Excellent food and service. Plenty of tables in a variety of sizes\n" - + " for parties large and small. " - + "Window seating makes for excellent\n " - + "people watching to/from the N-Judah which stops right outside.\n" - + " I've had many fun social gatherings here, as well as gotten\n" - + " plenty of work done thanks to neighborhood WiFi."); + } finally { + reviews.close(); + } - assertContains(null, vREVIEW.hasReview, review); + } + @Test + public void test03NoHcardForItem() throws Exception { + + assertExtract("/microformats/hreview/03-spec-3.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); + + RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, + vREVIEW.Review); + + try { + + while (reviews.hasNext()) { + + Resource review = reviews.next().getSubject(); + + assertContains(review, vREVIEW.rating, "5"); + assertNotContains(vREVIEW.title, null); + assertContains(review, vDCTERMS.date, "200502"); + + assertContains( + vREVIEW.text, + "\"The people thought they were just being rewarded for " + + "treating others\n as they like to be treated, for " + + "obeying stop signs and curing diseases,\n for mailing " + + "letters with the address of the sender... Don't wake me,\n " + + " I plan on sleeping in...\"\n \n \"Nothing Better\"" + + " is a great track on this album, too..."); + + RepositoryResult<Statement> reviewSubjects = getStatements( + null, vREVIEW.hasReview, review); + + try { + while (reviewSubjects.hasNext()) { + Resource reviewSubject = reviewSubjects.next() + .getSubject(); + assertContains(reviewSubject, vVCARD.fn, + "The Postal Service: Give Up"); + assertContains( + reviewSubject, + vVCARD.url, + RDFUtils.uri("http://www.amazon.com/exec/obidos/ASIN/B000089CJI/")); + assertContains( + reviewSubject, + vVCARD.photo, + RDFUtils.uri("http://images.amazon.com/images/P/B000089CJI.01._SCTHUMBZZZ_.jpg")); + } + } finally { + reviewSubjects.close(); + } + + } + + } finally { + reviews.close(); + } + + } + + @Test + public void test04NoHcardForItem() throws Exception { + + assertExtract("/microformats/hreview/04-spec-4.html"); + assertModelNotEmpty(); + + assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + // reviewer, no item + assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); + + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, + vREVIEW.Review); + + try { + + while (reviews.hasNext()) { + + Resource review = reviews.next().getSubject(); - } - } finally { - reviews.close(); - } + assertContains(review, vREVIEW.rating, "4"); + assertNotContains(vREVIEW.title, null); + assertContains(review, vDCTERMS.date, "20050418"); - assertNotContains(vVCARD.locality, null); - assertNotContains(vVCARD.organization_name, null); - - } - - @Test - public void test02RatedTags() throws RepositoryException { - - assertExtract("/microformats/hreview/02-spec-2.html"); - assertModelNotEmpty(); - - assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + assertContains(vREVIEW.text, + "This movie has great music and visuals."); - // reviewer, item - assertStatementsSize(vREVIEW.reviewer, (Value)null, 1); - assertStatementsSize(vREVIEW.hasReview, (Value) null, 1); - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); - - // there is one address in the item vcard - assertStatementsSize(RDF.TYPE, vVCARD.Address, 0); + assertStatementsSize(vREVIEW.hasReview, review, 1); - RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review); - - try { - while (reviews.hasNext()) { - Resource review = reviews.next().getSubject(); - assertContains(review, vREVIEW.rating, "18"); - assertContains(review, vREVIEW.title, "Cafe Borrone"); - assertContains(review, vDCTERMS.date, "20050428T2130-0700"); + RepositoryResult<Statement> reviewSubjects = getStatements( + null, vREVIEW.hasReview, review); - assertContains( - vREVIEW.text, - "This \n cafe\n " + - "is a welcoming oasis on " + - "the Peninsula.\n " + - "It even has a fountain outside which nearly eliminates\n " + - "the sounds of El Camino traffic. " + - "Next door to a superb indy bookstore,\n " + - "Cafe Borrone is an ideal spot to grab a\n coffee\n or " + - "a meal to accompany a newly purchased book or imported periodical.\n" + - " Soups and\n sandwich\n specials rotate daily. " + - "The corn chowder with croutons and big chunks of cheese\n " + - "goes especially well with a freshly toasted mini-baguette. " + - "Evenings are\n often crowded and may require sharing a table " + - "with a perfect stranger.\n " + - "Espresso\n afficionados will appreciate the\n Illy coffee.\n " + - "Noise levels can vary from peaceful in the late mornings to nearly overwhelming on\n" + - " jazz band nights." - ); + try { + while (reviewSubjects.hasNext()) { + Resource reviewSubject = reviewSubjects.next() + .getSubject(); + assertContains(reviewSubject, vVCARD.fn, + "Ying Xiong (HERO)"); + assertContains( + reviewSubject, + vVCARD.url, + RDFUtils.uri("http://www.imdb.com/title/tt0299977/")); + } - assertContains(null, vREVIEW.hasReview, review); - assertContains(vREVIEW.type, "business"); + } finally { + reviewSubjects.close(); + } + + } + + } finally { + reviews.close(); + } + + } + + /** + * This test is the same defined in + * {@link HReviewExtractorTest#test04NoHcardForItem} but assess the behavior + * in presence of a <i>Microformat</i> name with a different letter + * capitalization. + * + * @throws RepositoryException + */ + @Test + public void testCaseSensitiveness() throws Exception { + assertExtract("/microformats/hreview/05-spec.html"); + assertModelNotEmpty(); + assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); + // reviewer, no item + assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); + + assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); + + RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, + vREVIEW.Review); + + try { - } + while (reviews.hasNext()) { - } finally { - reviews.close(); - } + Resource review = reviews.next().getSubject(); - } + assertContains(review, vREVIEW.rating, "4"); + assertNotContains(vREVIEW.title, null); + assertContains(review, vDCTERMS.date, "20050418"); - @Test - public void test03NoHcardForItem() throws RepositoryException { + assertContains(vREVIEW.text, + "This movie has great music and visuals."); - assertExtract("/microformats/hreview/03-spec-3.html"); - assertModelNotEmpty(); + assertStatementsSize(vREVIEW.hasReview, review, 1); - assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); - assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); + RepositoryResult<Statement> reviewSubjects = getStatements( + null, vREVIEW.hasReview, review); - RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review); + try { + while (reviewSubjects.hasNext()) { + Resource reviewSubject = reviewSubjects.next() + .getSubject(); + assertContains(reviewSubject, vVCARD.fn, + "Ying Xiong (HERO)"); + assertContains( + reviewSubject, + vVCARD.url, + RDFUtils.uri("http://www.imdb.com/title/tt0299977/")); + } - try { + } finally { + reviewSubjects.close(); + } - while (reviews.hasNext()) { + } - Resource review = reviews.next().getSubject(); - - assertContains(review, vREVIEW.rating, "5"); - assertNotContains(vREVIEW.title, null); - assertContains(review, vDCTERMS.date, "200502"); - - assertContains( - vREVIEW.text, - "\"The people thought they were just being rewarded for " + - "treating others\n as they like to be treated, for " + - "obeying stop signs and curing diseases,\n for mailing " + - "letters with the address of the sender... Don't wake me,\n " + - " I plan on sleeping in...\"\n \n \"Nothing Better\"" + - " is a great track on this album, too..."); - - RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review); - - try { - while (reviewSubjects.hasNext()) { - Resource reviewSubject = reviewSubjects.next().getSubject(); - assertContains(reviewSubject, vVCARD.fn, "The Postal Service: Give Up"); - assertContains(reviewSubject, vVCARD.url, - RDFUtils.uri("http://www.amazon.com/exec/obidos/ASIN/B000089CJI/")); - assertContains(reviewSubject, vVCARD.photo, - RDFUtils.uri("http://images.amazon.com/images/P/B000089CJI.01._SCTHUMBZZZ_.jpg")); - } - } finally { - reviewSubjects.close(); - } - - } - - } finally { - reviews.close(); - } - - } - - @Test - public void test04NoHcardForItem() throws RepositoryException { - - assertExtract("/microformats/hreview/04-spec-4.html"); - assertModelNotEmpty(); - - assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); - // reviewer, no item - assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); - - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); - - - RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review); - - try { - - while (reviews.hasNext()) { - - Resource review = reviews.next().getSubject(); - - assertContains(review, vREVIEW.rating, "4"); - assertNotContains(vREVIEW.title, null); - assertContains(review, vDCTERMS.date, "20050418"); - - assertContains( - vREVIEW.text, - "This movie has great music and visuals."); - - assertStatementsSize(vREVIEW.hasReview, review, 1); - - RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review); - - try { - while(reviewSubjects.hasNext()) { - Resource reviewSubject = reviewSubjects.next().getSubject(); - assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)"); - assertContains(reviewSubject, vVCARD.url, RDFUtils.uri("http://www.imdb.com/title/tt0299977/")); - } - - } finally { - reviewSubjects.close(); - } - - - } - - } finally { - reviews.close(); - } - - } - - /** - * This test is the same defined in {@link HReviewExtractorTest#test04NoHcardForItem} but - * assess the behavior in presence of a <i>Microformat</i> name with a different letter - * capitalization. - * - * @throws RepositoryException - */ - @Test - public void testCaseSensitiveness() throws RepositoryException { - assertExtract("/microformats/hreview/05-spec.html"); - assertModelNotEmpty(); - assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1); - // reviewer, no item - assertStatementsSize(vREVIEW.reviewer, (Value) null, 1); - - assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0); - - RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review); - - try { - - while (reviews.hasNext()) { - - Resource review = reviews.next().getSubject(); - - assertContains(review, vREVIEW.rating, "4"); - assertNotContains(vREVIEW.title, null); - assertContains(review, vDCTERMS.date, "20050418"); - - assertContains( - vREVIEW.text, - "This movie has great music and visuals."); - - assertStatementsSize(vREVIEW.hasReview, review, 1); - - RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review); - - try { - while(reviewSubjects.hasNext()) { - Resource reviewSubject = reviewSubjects.next().getSubject(); - assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)"); - assertContains(reviewSubject, vVCARD.url, RDFUtils.uri("http://www.imdb.com/title/tt0299977/")); - } - - } finally { - reviewSubjects.close(); - } - - - } - - } finally { - reviews.close(); - } - } + } finally { + reviews.close(); + } + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java index 981c4e8..b35e33c 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java @@ -30,65 +30,44 @@ import org.openrdf.repository.RepositoryException; */ public class HTMLMetaExtractorTest extends AbstractExtractorTestCase { - private static final SINDICE vSINDICE = SINDICE.getInstance(); + private static final SINDICE vSINDICE = SINDICE.getInstance(); - protected ExtractorFactory<?> getExtractorFactory() { - return new HTMLMetaExtractorFactory(); - } + protected ExtractorFactory<?> getExtractorFactory() { + return new HTMLMetaExtractorFactory(); + } - @Test - public void testExtractPageMeta() throws RepositoryException { - assertExtract("/html/html-head-meta-extractor.html"); - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 7); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl("http://purl.org/dc/elements/1.1/title"), - "XHTML+RDFa example", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl("http://purl.org/dc/elements/1.1/language"), - "en", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl("http://purl.org/dc/elements/1.1/subject"), - "XHTML+RDFa, semantic web", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl("http://purl.org/dc/elements/1.1/format"), - "application/xhtml+xml", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl("http://purl.org/dc/elements/1.1/description"), - "Example for Extensible Hypertext Markup Language + Resource Description Framework â in â attributes.", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl(vSINDICE.NAMESPACE.toString() + "robots"), - "index, follow", - "en" - ); - assertContains( - new URIImpl("http://bob.example.com/"), - new URIImpl(vSINDICE.NAMESPACE.toString() + "content-language"), - "en", - "en" - ); - } + @Test + public void testExtractPageMeta() throws Exception { + assertExtract("/html/html-head-meta-extractor.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 7); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + "http://purl.org/dc/elements/1.1/title"), "XHTML+RDFa example", + "en"); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + "http://purl.org/dc/elements/1.1/language"), "en", "en"); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + "http://purl.org/dc/elements/1.1/subject"), + "XHTML+RDFa, semantic web", "en"); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + "http://purl.org/dc/elements/1.1/format"), + "application/xhtml+xml", "en"); + assertContains( + new URIImpl("http://bob.example.com/"), + new URIImpl("http://purl.org/dc/elements/1.1/description"), + "Example for Extensible Hypertext Markup Language + Resource Description Framework â in â attributes.", + "en"); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + vSINDICE.NAMESPACE.toString() + "robots"), "index, follow", + "en"); + assertContains(new URIImpl("http://bob.example.com/"), new URIImpl( + vSINDICE.NAMESPACE.toString() + "content-language"), "en", "en"); + } + + @Test + public void testNoMeta() throws Exception { + assertExtract("/html/html-head-link-extractor.html"); + assertModelEmpty(); + } - @Test - public void testNoMeta() throws RepositoryException { - assertExtract("/html/html-head-link-extractor.html"); - assertModelEmpty(); - } - }
