This is an automated email from the ASF dual-hosted git repository.
hansbrende pushed a commit to branch ANY23-443
in repository https://gitbox.apache.org/repos/asf/any23.git
The following commit(s) were added to refs/heads/ANY23-443 by this push:
new d9f1fa4 ANY23-443 cleanup
d9f1fa4 is described below
commit d9f1fa4036133158b1a91976d9d05d152c02feaa
Author: Hans <[email protected]>
AuthorDate: Fri Sep 20 23:58:56 2019 -0500
ANY23-443 cleanup
---
.../any23/extractor/rdf/BaseRDFExtractor.java | 4 ++-
.../any23/extractor/rdfa/BaseRDFaExtractor.java | 36 ++++++++++++++++++++++
.../apache/any23/extractor/rdfa/JsoupScanner.java | 29 ++++++++++++-----
.../any23/extractor/rdfa/RDFa11Extractor.java | 32 +++++++++++++++++++
.../apache/any23/extractor/rdfa/RDFaExtractor.java | 32 +++++++++++++++++++
.../apache/any23/extractor/rdfa/SemarglSink.java | 20 ++++++++++++
.../any23/extractor/rdfa/RDFa11ExtractorTest.java | 1 +
7 files changed, 145 insertions(+), 9 deletions(-)
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 25d105e..2ea04a0 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -120,7 +120,9 @@ public abstract class BaseRDFExtractor implements
Extractor.ContentExtractor {
}
}
- protected static String toString(Throwable th) {
+ // keep private to avoid backwards compatibility woes (may move around
later)
+ @SuppressWarnings("Duplicates")
+ private static String toString(Throwable th) {
StringWriter writer = new StringWriter();
try (PrintWriter pw = new PrintWriter(writer)) {
th.printStackTrace(pw);
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
b/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
index c183499..6027409 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/BaseRDFaExtractor.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.any23.extractor.rdfa;
import org.apache.any23.extractor.ExtractionContext;
@@ -20,7 +37,12 @@ import org.semarglproject.source.StreamProcessor;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+/**
+ * @author Hans Brende ([email protected])
+ */
abstract class BaseRDFaExtractor extends BaseRDFExtractor {
private final short version;
@@ -56,4 +78,18 @@ abstract class BaseRDFaExtractor extends BaseRDFExtractor {
extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL,
toString(e), -1, -1);
}
}
+
+ @SuppressWarnings("Duplicates")
+ private static String toString(Throwable th) {
+ StringWriter writer = new StringWriter();
+ try (PrintWriter pw = new PrintWriter(writer)) {
+ th.printStackTrace(pw);
+ }
+ String string = writer.toString();
+ if (string.length() > 1024) {
+ return string.substring(0, 1021) + "...";
+ }
+ return string;
+ }
+
}
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
b/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
index 7fec69c..066f050 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.any23.extractor.rdfa;
import org.jsoup.nodes.CDataNode;
@@ -13,6 +30,9 @@ import org.xml.sax.helpers.NamespaceSupport;
import java.util.ArrayList;
+/**
+ * @author Hans Brende ([email protected])
+ */
class JsoupScanner implements NodeVisitor {
private final NamespaceSupport ns = new NamespaceSupport();
@@ -29,10 +49,6 @@ class JsoupScanner implements NodeVisitor {
return str == null ? "" : str;
}
-// private static String orNull(String str) {
-// return "".equals(str) ? null : str;
-// }
-
private void startElement(Element e) throws SAXException {
ns.pushContext();
@@ -111,7 +127,6 @@ class JsoupScanner implements NodeVisitor {
handler.comment(str.toCharArray(), 0, str.length());
}
-
@Override
public void head(Node node, int depth) {
try {
@@ -141,6 +156,7 @@ class JsoupScanner implements NodeVisitor {
endElement((Element) node);
} else if (node instanceof CDataNode) {
handler.endCDATA();
+ // TODO support document types
// } else if (node instanceof DocumentType) {
// handler.endDTD();
}
@@ -149,11 +165,8 @@ class JsoupScanner implements NodeVisitor {
}
}
-
-
@SuppressWarnings("unchecked")
private static <E extends Throwable> void sneakyThrow(Throwable e) throws
E {
throw (E)e;
}
-
}
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
index ae6c5ae..ff0d03c 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
@@ -29,14 +29,40 @@ import org.semarglproject.vocab.RDFa;
* <a href="http://www.w3.org/TR/rdfa-core/">RDFa 1.1</a> specification.
*
* @author Michele Mostarda ([email protected])
+ * @author Hans Brende ([email protected])
*/
public class RDFa11Extractor extends BaseRDFaExtractor {
+ /**
+ * @deprecated since 2.4. This extractor has never supported these
settings. Use {@link #RDFa11Extractor()} instead.
+ * @param verifyDataType has no effect
+ * @param stopAtFirstError has no effect
+ */
@Deprecated
public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
this();
}
+ /**
+ * @deprecated since 2.4. This extractor has never supported this setting.
Do not use.
+ * @param stopAtFirstError has no effect
+ */
+ @Deprecated
+ @Override
+ public void setStopAtFirstError(boolean stopAtFirstError) {
+ super.setStopAtFirstError(stopAtFirstError);
+ }
+
+ /**
+ * @deprecated since 2.4. This extractor has never supported this setting.
Do not use.
+ * @param verifyDataType has no effect
+ */
+ @Deprecated
+ @Override
+ public void setVerifyDataType(boolean verifyDataType) {
+ super.setVerifyDataType(verifyDataType);
+ }
+
public RDFa11Extractor() {
super(RDFa.VERSION_11);
}
@@ -46,6 +72,12 @@ public class RDFa11Extractor extends BaseRDFaExtractor {
return RDFa11ExtractorFactory.getDescriptionInstance();
}
+ /**
+ * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link
RDFParser}. Do not use this method.
+ * @param extractionContext the extraction context
+ * @param extractionResult the extraction result
+ * @return a {@link RDFParser}
+ */
@Override
@Deprecated
protected RDFParser getParser(ExtractionContext extractionContext,
ExtractionResult extractionResult) {
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
index 1d8eda6..d8583b3 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
@@ -29,14 +29,40 @@ import org.semarglproject.vocab.RDFa;
* <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.0</a> specification.
*
* @author Michele Mostarda ([email protected])
+ * @author Hans Brende ([email protected])
*/
public class RDFaExtractor extends BaseRDFaExtractor {
+ /**
+ * @deprecated since 2.4. This extractor has never supported these
settings. Use {@link #RDFaExtractor()} instead.
+ * @param verifyDataType has no effect
+ * @param stopAtFirstError has no effect
+ */
@Deprecated
public RDFaExtractor(boolean verifyDataType, boolean stopAtFirstError) {
this();
}
+ /**
+ * @deprecated since 2.4. This extractor has never supported this setting.
Do not use.
+ * @param stopAtFirstError has no effect
+ */
+ @Deprecated
+ @Override
+ public void setStopAtFirstError(boolean stopAtFirstError) {
+ super.setStopAtFirstError(stopAtFirstError);
+ }
+
+ /**
+ * @deprecated since 2.4. This extractor has never supported this setting.
Do not use.
+ * @param verifyDataType has no effect
+ */
+ @Deprecated
+ @Override
+ public void setVerifyDataType(boolean verifyDataType) {
+ super.setVerifyDataType(verifyDataType);
+ }
+
public RDFaExtractor() {
super(RDFa.VERSION_10);
}
@@ -46,6 +72,12 @@ public class RDFaExtractor extends BaseRDFaExtractor {
return RDFaExtractorFactory.getDescriptionInstance();
}
+ /**
+ * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link
RDFParser}. Do not use this method.
+ * @param extractionContext the extraction context
+ * @param extractionResult the extraction result
+ * @return a {@link RDFParser}
+ */
@Override
@Deprecated
protected RDFParser getParser(ExtractionContext extractionContext,
ExtractionResult extractionResult) {
diff --git
a/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
b/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
index 3e043f1..b642a1c 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/SemarglSink.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.any23.extractor.rdfa;
import org.apache.any23.extractor.ExtractionResult;
@@ -6,6 +23,9 @@ import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
+/**
+ * @author Hans Brende ([email protected])
+ */
final class SemarglSink implements org.semarglproject.sink.TripleSink,
org.semarglproject.rdf.ProcessorGraphHandler {
private static final String BNODE_PREFIX =
org.semarglproject.vocab.RDF.BNODE_PREFIX;
diff --git
a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
index 35ae030..6a9eedb 100644
---
a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
+++
b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
@@ -94,6 +94,7 @@ public class RDFa11ExtractorTest extends
AbstractRDFaExtractorTestCase {
public void testBasicWithSyntaxErrors() {
//test issues ANY23-347 and ANY23-350
assertExtract("/html/rdfa/basic-with-errors.html");
+ System.out.println(dumpModelToTurtle());
assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice",
"en"));
assertContains(null, vDCTERMS.title,
RDFUtils.literal("The trouble with Bob", "en"));