Repository: any23 Updated Branches: refs/heads/master ddda9bc39 -> 6620c1efa
ANY23-140 Revise Any23 tests to remove fetching of web content Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6620c1ef Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6620c1ef Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6620c1ef Branch: refs/heads/master Commit: 6620c1efa94489e99ab129d01926cd9b25937f64 Parents: ddda9bc Author: Lewis John McGibbney <[email protected]> Authored: Sat Dec 30 18:59:29 2017 +0000 Committer: Lewis John McGibbney <[email protected]> Committed: Sat Dec 30 18:59:29 2017 +0000 ---------------------------------------------------------------------- README.md | 16 ------- .../any23/extractor/SimpleExtractorFactory.java | 50 +++++++++++--------- .../extractor/rdfa/RDFa11ExtractorFactory.java | 4 +- .../test/java/org/apache/any23/Any23Test.java | 27 ++++++----- core/src/test/resources/log4j.properties | 2 +- pom.xml | 8 ++-- 6 files changed, 48 insertions(+), 59 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 735a2c8..f2b4543 100644 --- a/README.md +++ b/README.md @@ -11,22 +11,6 @@ Apache Anything To Triples (Any23) is a library and web service that extracts structured data in RDF format from a variety of Web documents. Any23 documentation can be found on the [website](http://any23.apache.org) -# Distribution Content - - * [api](https://github.com/apache/any23/tree/master/api): Any23 library external API. - * [core](https://github.com/apache/any23/tree/master/core): The library core codebase. - * [csvutils](https://github.com/apache/any23/tree/master/csvutils): A CSV specific package - * [encoding](https://github.com/apache/any23/tree/master/encoding): Encoding detection library. - * [mime](https://github.com/apache/any23/tree/master/mime): MIME Type detection library. - * [nquads](https://github.com/apache/any23/tree/master/nquads): NQuads parsing and serialization library. - * [plugins](https://github.com/apache/any23/tree/master/plugins): Library plugins codebase (read [plugins/README.md](https://github.com/apache/any23/blob/master/plugins/README.md) for further details). - * [service](https://github.com/apache/any23/tree/master/service): The library HTTP service codebase. - * [src](https://github.com/apache/any23/tree/master/src): Packaging for Any23 artifacts. - * [test-resources](https://github.com/apache/any23/tree/master/test-resources): Material relating to Any23 JUnit test cases. - * [RELEASE-NOTES.txt](https://github.com/apache/any23/blob/master/RELEASE-NOTES.txt): File reporting main release notes for every version. - * [LICENSE.txt](https://github.com/apache/any23/blob/master/LICENSE.txt): Applicable project license. - * README.md: This file. - # Online Documentation For details on the command line tool and web interface, see [here](http://any23.apache.org/getting-started.html) http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java index 67c8fb3..cee0713 100644 --- a/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java @@ -34,13 +34,35 @@ public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements private final Prefixes prefixes; - private Collection<MIMEType> supportedMIMETypes = new ArrayList<MIMEType>(); + private Collection<MIMEType> supportedMIMETypes = new ArrayList<>(); private String exampleInput; - + + protected SimpleExtractorFactory( + String name, + Prefixes prefixes) { + this.name = name; + this.prefixes = prefixes; + } + + protected SimpleExtractorFactory( + String name, + Prefixes prefixes, + Collection<String> supportedMIMETypes, + String exampleInput + ) { + this.name = name; + this.prefixes = (prefixes == null) ? Prefixes.EMPTY : prefixes; + for (String type : supportedMIMETypes) { + this.supportedMIMETypes.add(MIMEType.parse(type)); + } + this.exampleInput = exampleInput; + } + /** * @return the name of the {@link Extractor} */ + @Override public String getExtractorName() { return name; } @@ -48,6 +70,7 @@ public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements /** * @return the label of the {@link Extractor} */ + @Override public String getExtractorLabel() { return this.getClass().getName(); } @@ -55,6 +78,7 @@ public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements /** * @return the handled {@link org.apache.any23.rdf.Prefixes} */ + @Override public Prefixes getPrefixes() { return prefixes; } @@ -62,6 +86,7 @@ public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements /** * @return the supported {@link org.apache.any23.mime.MIMEType} */ + @Override public Collection<MIMEType> getSupportedMIMETypes() { return supportedMIMETypes; } @@ -74,25 +99,4 @@ public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements return exampleInput; } - protected SimpleExtractorFactory( - String name, - Prefixes prefixes) { - this.name = name; - this.prefixes = prefixes; - } - - protected SimpleExtractorFactory( - String name, - Prefixes prefixes, - Collection<String> supportedMIMETypes, - String exampleInput - ) { - this.name = name; - this.prefixes = (prefixes == null) ? Prefixes.EMPTY : prefixes; - for (String type : supportedMIMETypes) { - this.supportedMIMETypes.add(MIMEType.parse(type)); - } - this.exampleInput = exampleInput; - } - } http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java index 4c2ffe4..db2f9a0 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java @@ -36,7 +36,7 @@ public class RDFa11ExtractorFactory extends SimpleExtractorFactory<RDFa11Extract public static final Prefixes PREFIXES = null; private static final ExtractorDescription descriptionInstance = new RDFa11ExtractorFactory(); - + public RDFa11ExtractorFactory() { super( RDFa11ExtractorFactory.NAME, @@ -44,7 +44,7 @@ public class RDFa11ExtractorFactory extends SimpleExtractorFactory<RDFa11Extract Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"), "example-rdfa11.html"); } - + @Override public RDFa11Extractor createExtractor() { return new RDFa11Extractor(); http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/test/java/org/apache/any23/Any23Test.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java index b0cdf6d..b66bd78 100644 --- a/core/src/test/java/org/apache/any23/Any23Test.java +++ b/core/src/test/java/org/apache/any23/Any23Test.java @@ -28,7 +28,9 @@ import org.apache.any23.extractor.microdata.MicrodataExtractor; import org.apache.any23.filter.IgnoreAccidentalRDFa; import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments; import org.apache.any23.http.DefaultHTTPClient; +import org.apache.any23.http.DefaultHTTPClientConfiguration; import org.apache.any23.http.HTTPClient; +import org.apache.any23.http.HTTPClientConfiguration; import org.apache.any23.source.DocumentSource; import org.apache.any23.source.HTTPDocumentSource; import org.apache.any23.source.StringDocumentSource; @@ -211,7 +213,7 @@ public class Any23Test extends Any23OnlineTestBase { assumeOnlineAllowed(); /* 1 */Any23 runner = new Any23(); - /* 2 */runner.setHTTPUserAgent("test-user-agent"); + /* 2 */runner.setHTTPUserAgent("apache-any23-test-user-agent"); /* 3 */HTTPClient httpClient = runner.getHTTPClient(); /* 4 */DocumentSource source = new HTTPDocumentSource(httpClient, "http://dbpedia.org/resource/Trento"); @@ -301,20 +303,16 @@ public class Any23Test extends Any23OnlineTestBase { public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException { assumeOnlineAllowed(); - - Any23 runner = new Any23(); - runner.setHTTPUserAgent("test-user-agent"); - HTTPClient httpClient = runner.getHTTPClient(); - DocumentSource source = new HTTPDocumentSource(httpClient, - "http://products.semweb.bestbuy.com/y/products/7590289/"); + final Any23 runner = new Any23(); + runner.setHTTPUserAgent("apache-any23-test-user-agent"); + DocumentSource source = new HTTPDocumentSource(runner.getHTTPClient(), + "https://dev.w3.org/html5/rdfa/"); ByteArrayOutputStream out = new ByteArrayOutputStream(); TripleHandler handler = new NTriplesWriter(out); runner.extract(source, handler); String n3 = out.toString("UTF-8"); - logger.debug("N3 " + n3); Assert.assertTrue(n3.length() > 0); - } @Test @@ -451,11 +449,14 @@ public class Any23Test extends Any23OnlineTestBase { ExtractionException { assumeOnlineAllowed(); final Any23 any23 = new Any23(); - any23.setHTTPUserAgent("test-user-agent"); + any23.setHTTPUserAgent("apache-any23-test-user-agent"); + HTTPClient client = any23.getHTTPClient(); + HTTPClientConfiguration configuration = new DefaultHTTPClientConfiguration("application/xml"); + client.init(configuration); final CountingTripleHandler cth = new CountingTripleHandler(false); final ReportingTripleHandler rth = new ReportingTripleHandler(cth); final ExtractionReport report = any23.extract( - "http://www.nativeremedies.com/XML/combos.xml", rth); + "http://www.legislation.gov.uk/ukpga/2015/17/section/4/data.xml", rth); Assert.assertFalse(report.hasMatchingExtractors()); Assert.assertEquals(0, cth.getCount()); } @@ -464,11 +465,11 @@ public class Any23Test extends Any23OnlineTestBase { public void testBlankNodesViaURL() throws IOException, ExtractionException { assumeOnlineAllowed(); final Any23 any23 = new Any23(); - any23.setHTTPUserAgent("test-user-agent"); + any23.setHTTPUserAgent("apache-any23-test-user-agent"); final CountingTripleHandler cth = new CountingTripleHandler(false); final ReportingTripleHandler rth = new ReportingTripleHandler(cth); final ExtractionReport report = any23.extract( - "http://www.usarab.org/news/?tag=england", rth); + "https://www.w3.org/", rth); Assert.assertTrue(report.hasMatchingExtractors()); } http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties index 32492dd..4634d6b 100644 --- a/core/src/test/resources/log4j.properties +++ b/core/src/test/resources/log4j.properties @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -log4j.rootCategory=INFO, O +log4j.rootCategory=DEBUG, O # Stdout log4j.appender.O=org.apache.log4j.ConsoleAppender http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 9261bcc..56d59d1 100644 --- a/pom.xml +++ b/pom.xml @@ -242,18 +242,18 @@ <httpclient.version>4.5.3</httpclient.version> <httpcore.version>4.4.6</httpcore.version> - <owlapi.version>5.1.0</owlapi.version> + <owlapi.version>5.1.3</owlapi.version> <poi.version>3.16</poi.version> - <rdf4j.version>2.2.2</rdf4j.version> + <rdf4j.version>2.2.4</rdf4j.version> <semargl.version>0.7</semargl.version> <slf4j.logger.version>1.7.25</slf4j.logger.version> - <tika.version>1.15</tika.version> + <tika.version>1.17</tika.version> <!-- Overridden in profiles to add JDK specific arguments to surefire --> <surefire-extra-args /> <!-- Used to track API changes based on Semantic Versioning --> - <latest.stable.released>2.0</latest.stable.released> + <latest.stable.released>2.1</latest.stable.released> <!-- Google Analytics id for website --> <form.tracker.id>UA-59636188-1</form.tracker.id>
