Repository: any23
Updated Branches:
  refs/heads/master ddda9bc39 -> 6620c1efa


ANY23-140 Revise Any23 tests to remove fetching of web content


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6620c1ef
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6620c1ef
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6620c1ef

Branch: refs/heads/master
Commit: 6620c1efa94489e99ab129d01926cd9b25937f64
Parents: ddda9bc
Author: Lewis John McGibbney <[email protected]>
Authored: Sat Dec 30 18:59:29 2017 +0000
Committer: Lewis John McGibbney <[email protected]>
Committed: Sat Dec 30 18:59:29 2017 +0000

----------------------------------------------------------------------
 README.md                                       | 16 -------
 .../any23/extractor/SimpleExtractorFactory.java | 50 +++++++++++---------
 .../extractor/rdfa/RDFa11ExtractorFactory.java  |  4 +-
 .../test/java/org/apache/any23/Any23Test.java   | 27 ++++++-----
 core/src/test/resources/log4j.properties        |  2 +-
 pom.xml                                         |  8 ++--
 6 files changed, 48 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 735a2c8..f2b4543 100644
--- a/README.md
+++ b/README.md
@@ -11,22 +11,6 @@ Apache Anything To Triples (Any23) is a library and web 
service that extracts
 structured data in RDF format from a variety of Web documents.
 Any23 documentation can be found on the [website](http://any23.apache.org)
 
-# Distribution Content
-
- * [api](https://github.com/apache/any23/tree/master/api): Any23 library 
external API.
- * [core](https://github.com/apache/any23/tree/master/core): The library core 
codebase.
- * [csvutils](https://github.com/apache/any23/tree/master/csvutils): A CSV 
specific package
- * [encoding](https://github.com/apache/any23/tree/master/encoding): Encoding 
detection library.
- * [mime](https://github.com/apache/any23/tree/master/mime): MIME Type 
detection library.
- * [nquads](https://github.com/apache/any23/tree/master/nquads): NQuads 
parsing and serialization library.
- * [plugins](https://github.com/apache/any23/tree/master/plugins): Library 
plugins codebase (read 
[plugins/README.md](https://github.com/apache/any23/blob/master/plugins/README.md)
 for further details).
- * [service](https://github.com/apache/any23/tree/master/service): The library 
HTTP service codebase.
- * [src](https://github.com/apache/any23/tree/master/src): Packaging for Any23 
artifacts.
- * 
[test-resources](https://github.com/apache/any23/tree/master/test-resources): 
Material relating to Any23 JUnit test cases.
- * 
[RELEASE-NOTES.txt](https://github.com/apache/any23/blob/master/RELEASE-NOTES.txt):
 File reporting main release notes for every version.
- * [LICENSE.txt](https://github.com/apache/any23/blob/master/LICENSE.txt): 
Applicable project license.
- * README.md: This file.
-
 # Online Documentation
 
 For details on the command line tool and web interface, see 
[here](http://any23.apache.org/getting-started.html)

http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java 
b/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
index 67c8fb3..cee0713 100644
--- a/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
@@ -34,13 +34,35 @@ public abstract class SimpleExtractorFactory<T extends 
Extractor<?>> implements
 
     private final Prefixes prefixes;
 
-    private Collection<MIMEType> supportedMIMETypes = new 
ArrayList<MIMEType>();
+    private Collection<MIMEType> supportedMIMETypes = new ArrayList<>();
 
     private String exampleInput;
-    
+
+    protected SimpleExtractorFactory(
+            String name,
+            Prefixes prefixes) {
+        this.name = name;
+        this.prefixes = prefixes;
+    }
+
+    protected SimpleExtractorFactory(
+            String name,
+            Prefixes prefixes,
+            Collection<String> supportedMIMETypes,
+            String exampleInput
+    ) {
+        this.name = name;
+        this.prefixes = (prefixes == null) ? Prefixes.EMPTY : prefixes;
+        for (String type : supportedMIMETypes) {
+            this.supportedMIMETypes.add(MIMEType.parse(type));
+        }
+        this.exampleInput = exampleInput;
+    }
+
     /**
      * @return the name of the {@link Extractor}
      */
+    @Override
     public String getExtractorName() {
         return name;
     }
@@ -48,6 +70,7 @@ public abstract class SimpleExtractorFactory<T extends 
Extractor<?>> implements
     /**
      * @return the label of the {@link Extractor}
      */
+    @Override
     public String getExtractorLabel() {
         return this.getClass().getName();
     }
@@ -55,6 +78,7 @@ public abstract class SimpleExtractorFactory<T extends 
Extractor<?>> implements
     /**
      * @return the handled {@link org.apache.any23.rdf.Prefixes}
      */
+    @Override
     public Prefixes getPrefixes() {
         return prefixes;
     }
@@ -62,6 +86,7 @@ public abstract class SimpleExtractorFactory<T extends 
Extractor<?>> implements
     /**
      * @return the supported {@link org.apache.any23.mime.MIMEType}
      */
+    @Override
     public Collection<MIMEType> getSupportedMIMETypes() {
         return supportedMIMETypes;
     }
@@ -74,25 +99,4 @@ public abstract class SimpleExtractorFactory<T extends 
Extractor<?>> implements
         return exampleInput;
     }
 
-    protected SimpleExtractorFactory(
-            String name,
-            Prefixes prefixes) {
-        this.name = name;
-        this.prefixes = prefixes;
-    }
-    
-    protected SimpleExtractorFactory(
-            String name,
-            Prefixes prefixes,
-            Collection<String> supportedMIMETypes,
-            String exampleInput
-    ) {
-        this.name = name;
-        this.prefixes = (prefixes == null) ? Prefixes.EMPTY : prefixes;
-        for (String type : supportedMIMETypes) {
-            this.supportedMIMETypes.add(MIMEType.parse(type));
-        }
-        this.exampleInput = exampleInput;
-    }
-
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
 
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
index 4c2ffe4..db2f9a0 100644
--- 
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
+++ 
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
@@ -36,7 +36,7 @@ public class RDFa11ExtractorFactory extends 
SimpleExtractorFactory<RDFa11Extract
     public static final Prefixes PREFIXES = null;
 
     private static final ExtractorDescription descriptionInstance = new 
RDFa11ExtractorFactory();
-    
+
     public RDFa11ExtractorFactory() {
         super(
                 RDFa11ExtractorFactory.NAME, 
@@ -44,7 +44,7 @@ public class RDFa11ExtractorFactory extends 
SimpleExtractorFactory<RDFa11Extract
                 Arrays.asList("text/html;q=0.3", 
"application/xhtml+xml;q=0.3"),
                 "example-rdfa11.html");
     }
-    
+
     @Override
     public RDFa11Extractor createExtractor() {
         return new RDFa11Extractor();

http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/test/java/org/apache/any23/Any23Test.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java 
b/core/src/test/java/org/apache/any23/Any23Test.java
index b0cdf6d..b66bd78 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -28,7 +28,9 @@ import 
org.apache.any23.extractor.microdata.MicrodataExtractor;
 import org.apache.any23.filter.IgnoreAccidentalRDFa;
 import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
 import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
 import org.apache.any23.http.HTTPClient;
+import org.apache.any23.http.HTTPClientConfiguration;
 import org.apache.any23.source.DocumentSource;
 import org.apache.any23.source.HTTPDocumentSource;
 import org.apache.any23.source.StringDocumentSource;
@@ -211,7 +213,7 @@ public class Any23Test extends Any23OnlineTestBase {
         assumeOnlineAllowed();
 
         /* 1 */Any23 runner = new Any23();
-        /* 2 */runner.setHTTPUserAgent("test-user-agent");
+        /* 2 */runner.setHTTPUserAgent("apache-any23-test-user-agent");
         /* 3 */HTTPClient httpClient = runner.getHTTPClient();
         /* 4 */DocumentSource source = new HTTPDocumentSource(httpClient,
                 "http://dbpedia.org/resource/Trento";);
@@ -301,20 +303,16 @@ public class Any23Test extends Any23OnlineTestBase {
     public void testGZippedContent() throws IOException, URISyntaxException,
             ExtractionException {
         assumeOnlineAllowed();
-
-        Any23 runner = new Any23();
-        runner.setHTTPUserAgent("test-user-agent");
-        HTTPClient httpClient = runner.getHTTPClient();
-        DocumentSource source = new HTTPDocumentSource(httpClient,
-                "http://products.semweb.bestbuy.com/y/products/7590289/";);
+        final Any23 runner = new Any23();
+        runner.setHTTPUserAgent("apache-any23-test-user-agent");
+        DocumentSource source = new HTTPDocumentSource(runner.getHTTPClient(),
+                "https://dev.w3.org/html5/rdfa/";);
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         TripleHandler handler = new NTriplesWriter(out);
         runner.extract(source, handler);
         String n3 = out.toString("UTF-8");
-
         logger.debug("N3 " + n3);
         Assert.assertTrue(n3.length() > 0);
-
     }
 
     @Test
@@ -451,11 +449,14 @@ public class Any23Test extends Any23OnlineTestBase {
             ExtractionException {
         assumeOnlineAllowed();
         final Any23 any23 = new Any23();
-        any23.setHTTPUserAgent("test-user-agent");
+        any23.setHTTPUserAgent("apache-any23-test-user-agent");
+        HTTPClient client = any23.getHTTPClient();
+        HTTPClientConfiguration configuration = new 
DefaultHTTPClientConfiguration("application/xml");
+        client.init(configuration);
         final CountingTripleHandler cth = new CountingTripleHandler(false);
         final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
         final ExtractionReport report = any23.extract(
-                "http://www.nativeremedies.com/XML/combos.xml";, rth);
+                
"http://www.legislation.gov.uk/ukpga/2015/17/section/4/data.xml";, rth);
         Assert.assertFalse(report.hasMatchingExtractors());
         Assert.assertEquals(0, cth.getCount());
     }
@@ -464,11 +465,11 @@ public class Any23Test extends Any23OnlineTestBase {
     public void testBlankNodesViaURL() throws IOException, ExtractionException 
{
         assumeOnlineAllowed();
         final Any23 any23 = new Any23();
-        any23.setHTTPUserAgent("test-user-agent");
+        any23.setHTTPUserAgent("apache-any23-test-user-agent");
         final CountingTripleHandler cth = new CountingTripleHandler(false);
         final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
         final ExtractionReport report = any23.extract(
-                "http://www.usarab.org/news/?tag=england";, rth);
+                "https://www.w3.org/";, rth);
         Assert.assertTrue(report.hasMatchingExtractors());
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/core/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/core/src/test/resources/log4j.properties 
b/core/src/test/resources/log4j.properties
index 32492dd..4634d6b 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-log4j.rootCategory=INFO, O
+log4j.rootCategory=DEBUG, O
 
 # Stdout
 log4j.appender.O=org.apache.log4j.ConsoleAppender  

http://git-wip-us.apache.org/repos/asf/any23/blob/6620c1ef/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9261bcc..56d59d1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -242,18 +242,18 @@
 
     <httpclient.version>4.5.3</httpclient.version>
     <httpcore.version>4.4.6</httpcore.version>
-    <owlapi.version>5.1.0</owlapi.version>
+    <owlapi.version>5.1.3</owlapi.version>
     <poi.version>3.16</poi.version>
-    <rdf4j.version>2.2.2</rdf4j.version>
+    <rdf4j.version>2.2.4</rdf4j.version>
     <semargl.version>0.7</semargl.version>
     <slf4j.logger.version>1.7.25</slf4j.logger.version>
-    <tika.version>1.15</tika.version>
+    <tika.version>1.17</tika.version>
 
     <!-- Overridden in profiles to add JDK specific arguments to surefire -->
     <surefire-extra-args />
 
     <!-- Used to track API changes based on Semantic Versioning -->
-    <latest.stable.released>2.0</latest.stable.released>
+    <latest.stable.released>2.1</latest.stable.released>
 
     <!-- Google Analytics id for website -->
     <form.tracker.id>UA-59636188-1</form.tracker.id>

Reply via email to