This is an automated email from the ASF dual-hosted git repository.

hansbrende pushed a commit to branch ANY23-443
in repository https://gitbox.apache.org/repos/asf/any23.git


The following commit(s) were added to refs/heads/ANY23-443 by this push:
     new d9f1fa4  ANY23-443 cleanup
d9f1fa4 is described below

commit d9f1fa4036133158b1a91976d9d05d152c02feaa
Author: Hans <[email protected]>
AuthorDate: Fri Sep 20 23:58:56 2019 -0500

    ANY23-443 cleanup
---
 .../any23/extractor/rdf/BaseRDFExtractor.java      |  4 ++-
 .../any23/extractor/rdfa/BaseRDFaExtractor.java    | 36 ++++++++++++++++++++++
 .../apache/any23/extractor/rdfa/JsoupScanner.java  | 29 ++++++++++++-----
 .../any23/extractor/rdfa/RDFa11Extractor.java      | 32 +++++++++++++++++++
 .../apache/any23/extractor/rdfa/RDFaExtractor.java | 32 +++++++++++++++++++
 .../apache/any23/extractor/rdfa/SemarglSink.java   | 20 ++++++++++++
 .../any23/extractor/rdfa/RDFa11ExtractorTest.java  |  1 +
 7 files changed, 145 insertions(+), 9 deletions(-)

diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 25d105e..2ea04a0 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -120,7 +120,9 @@ public abstract class BaseRDFExtractor implements 
Extractor.ContentExtractor {
         }
     }
 
-    protected static String toString(Throwable th) {
+    // keep private to avoid backwards compatibility woes (may move around 
later)
+    @SuppressWarnings("Duplicates")
+    private static String toString(Throwable th) {
         StringWriter writer = new StringWriter();
         try (PrintWriter pw = new PrintWriter(writer)) {
             th.printStackTrace(pw);
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
index c183499..6027409 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.extractor.rdfa;
 
 import org.apache.any23.extractor.ExtractionContext;
@@ -20,7 +37,12 @@ import org.semarglproject.source.StreamProcessor;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 
+/**
+ * @author Hans Brende ([email protected])
+ */
 abstract class BaseRDFaExtractor extends BaseRDFExtractor {
 
     private final short version;
@@ -56,4 +78,18 @@ abstract class BaseRDFaExtractor extends BaseRDFExtractor {
             extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
toString(e), -1, -1);
         }
     }
+
+    @SuppressWarnings("Duplicates")
+    private static String toString(Throwable th) {
+        StringWriter writer = new StringWriter();
+        try (PrintWriter pw = new PrintWriter(writer)) {
+            th.printStackTrace(pw);
+        }
+        String string = writer.toString();
+        if (string.length() > 1024) {
+            return string.substring(0, 1021) + "...";
+        }
+        return string;
+    }
+
 }
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java 
b/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
index 7fec69c..066f050 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.extractor.rdfa;
 
 import org.jsoup.nodes.CDataNode;
@@ -13,6 +30,9 @@ import org.xml.sax.helpers.NamespaceSupport;
 
 import java.util.ArrayList;
 
+/**
+ * @author Hans Brende ([email protected])
+ */
 class JsoupScanner implements NodeVisitor {
 
     private final NamespaceSupport ns = new NamespaceSupport();
@@ -29,10 +49,6 @@ class JsoupScanner implements NodeVisitor {
         return str == null ? "" : str;
     }
 
-//    private static String orNull(String str) {
-//        return "".equals(str) ? null : str;
-//    }
-
     private void startElement(Element e) throws SAXException {
         ns.pushContext();
 
@@ -111,7 +127,6 @@ class JsoupScanner implements NodeVisitor {
         handler.comment(str.toCharArray(), 0, str.length());
     }
 
-
     @Override
     public void head(Node node, int depth) {
         try {
@@ -141,6 +156,7 @@ class JsoupScanner implements NodeVisitor {
                 endElement((Element) node);
             } else if (node instanceof CDataNode) {
                 handler.endCDATA();
+                // TODO support document types
 //            } else if (node instanceof DocumentType) {
 //                handler.endDTD();
             }
@@ -149,11 +165,8 @@ class JsoupScanner implements NodeVisitor {
         }
     }
 
-
-
     @SuppressWarnings("unchecked")
     private static <E extends Throwable> void sneakyThrow(Throwable e) throws 
E {
         throw (E)e;
     }
-
 }
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
index ae6c5ae..ff0d03c 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
@@ -29,14 +29,40 @@ import org.semarglproject.vocab.RDFa;
  * <a href="http://www.w3.org/TR/rdfa-core/";>RDFa 1.1</a> specification.
  *
  * @author Michele Mostarda ([email protected])
+ * @author Hans Brende ([email protected])
  */
 public class RDFa11Extractor extends BaseRDFaExtractor {
 
+    /**
+     * @deprecated since 2.4. This extractor has never supported these 
settings. Use {@link #RDFa11Extractor()} instead.
+     * @param verifyDataType has no effect
+     * @param stopAtFirstError has no effect
+     */
     @Deprecated
     public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
         this();
     }
 
+    /**
+     * @deprecated since 2.4. This extractor has never supported this setting. 
Do not use.
+     * @param stopAtFirstError has no effect
+     */
+    @Deprecated
+    @Override
+    public void setStopAtFirstError(boolean stopAtFirstError) {
+        super.setStopAtFirstError(stopAtFirstError);
+    }
+
+    /**
+     * @deprecated since 2.4. This extractor has never supported this setting. 
Do not use.
+     * @param verifyDataType has no effect
+     */
+    @Deprecated
+    @Override
+    public void setVerifyDataType(boolean verifyDataType) {
+        super.setVerifyDataType(verifyDataType);
+    }
+
     public RDFa11Extractor() {
         super(RDFa.VERSION_11);
     }
@@ -46,6 +72,12 @@ public class RDFa11Extractor extends BaseRDFaExtractor {
         return RDFa11ExtractorFactory.getDescriptionInstance();
     }
 
+    /**
+     * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link 
RDFParser}. Do not use this method.
+     * @param extractionContext the extraction context
+     * @param extractionResult the extraction result
+     * @return a {@link RDFParser}
+     */
     @Override
     @Deprecated
     protected RDFParser getParser(ExtractionContext extractionContext, 
ExtractionResult extractionResult) {
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
index 1d8eda6..d8583b3 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
@@ -29,14 +29,40 @@ import org.semarglproject.vocab.RDFa;
  * <a href="http://www.w3.org/TR/rdfa-syntax/";>RDFa 1.0</a> specification.
  *
  * @author Michele Mostarda ([email protected])
+ * @author Hans Brende ([email protected])
  */
 public class RDFaExtractor extends BaseRDFaExtractor {
 
+    /**
+     * @deprecated since 2.4. This extractor has never supported these 
settings. Use {@link #RDFaExtractor()} instead.
+     * @param verifyDataType has no effect
+     * @param stopAtFirstError has no effect
+     */
     @Deprecated
     public RDFaExtractor(boolean verifyDataType, boolean stopAtFirstError) {
         this();
     }
 
+    /**
+     * @deprecated since 2.4. This extractor has never supported this setting. 
Do not use.
+     * @param stopAtFirstError has no effect
+     */
+    @Deprecated
+    @Override
+    public void setStopAtFirstError(boolean stopAtFirstError) {
+        super.setStopAtFirstError(stopAtFirstError);
+    }
+
+    /**
+     * @deprecated since 2.4. This extractor has never supported this setting. 
Do not use.
+     * @param verifyDataType has no effect
+     */
+    @Deprecated
+    @Override
+    public void setVerifyDataType(boolean verifyDataType) {
+        super.setVerifyDataType(verifyDataType);
+    }
+
     public RDFaExtractor() {
         super(RDFa.VERSION_10);
     }
@@ -46,6 +72,12 @@ public class RDFaExtractor extends BaseRDFaExtractor {
         return RDFaExtractorFactory.getDescriptionInstance();
     }
 
+    /**
+     * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link 
RDFParser}. Do not use this method.
+     * @param extractionContext the extraction context
+     * @param extractionResult the extraction result
+     * @return a {@link RDFParser}
+     */
     @Override
     @Deprecated
     protected RDFParser getParser(ExtractionContext extractionContext, 
ExtractionResult extractionResult) {
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java 
b/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
index 3e043f1..b642a1c 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.extractor.rdfa;
 
 import org.apache.any23.extractor.ExtractionResult;
@@ -6,6 +23,9 @@ import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.ValueFactory;
 
+/**
+ * @author Hans Brende ([email protected])
+ */
 final class SemarglSink implements org.semarglproject.sink.TripleSink, 
org.semarglproject.rdf.ProcessorGraphHandler {
 
     private static final String BNODE_PREFIX = 
org.semarglproject.vocab.RDF.BNODE_PREFIX;
diff --git 
a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java 
b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
index 35ae030..6a9eedb 100644
--- 
a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
+++ 
b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
@@ -94,6 +94,7 @@ public class RDFa11ExtractorTest extends 
AbstractRDFaExtractorTestCase {
     public void testBasicWithSyntaxErrors() {
         //test issues ANY23-347 and ANY23-350
         assertExtract("/html/rdfa/basic-with-errors.html");
+        System.out.println(dumpModelToTurtle());
         assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice", 
"en"));
         assertContains(null, vDCTERMS.title,
                 RDFUtils.literal("The trouble with Bob", "en"));

Reply via email to