This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/any23.git
The following commit(s) were added to refs/heads/master by this push:
new 5d3a549 ANY23-524 fix invalid href attribute (#216)
5d3a549 is described below
commit 5d3a5498545d4534a44492897831719f616a5cfc
Author: Igor Shevchenko <[email protected]>
AuthorDate: Sat Oct 23 02:02:18 2021 +0300
ANY23-524 fix invalid href attribute (#216)
---
.../apache/any23/extractor/html/XFNExtractor.java | 9 ++++++--
.../any23/extractor/html/XFNExtractorTest.java | 7 ++++++
.../xfn/some-links-with-incorrect-href.html | 27 ++++++++++++++++++++++
3 files changed, 41 insertions(+), 2 deletions(-)
diff --git
a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
index 3d39cb7..5cf63c9 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
@@ -81,8 +81,13 @@ public class XFNExtractor implements TagSoupDOMExtractor {
}
private boolean extractLink(Node firstLink, BNode subject, IRI
documentIRI) throws ExtractionException {
- String href =
firstLink.getAttributes().getNamedItem("href").getNodeValue();
- String rel =
firstLink.getAttributes().getNamedItem("rel").getNodeValue();
+ Node hrefNodeItem = firstLink.getAttributes().getNamedItem("href");
+ Node relNodeItem = firstLink.getAttributes().getNamedItem("rel");
+ if (hrefNodeItem == null || relNodeItem == null) {
+ return false;
+ }
+ String href = hrefNodeItem.getNodeValue();
+ String rel = relNodeItem.getNodeValue();
String[] rels = rel.split("\\s+");
IRI link = document.resolveIRI(href);
diff --git
a/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
b/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
index ed22d48..617b9ca 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
@@ -139,6 +139,13 @@ public class XFNExtractorTest extends
AbstractExtractorTestCase {
}
@Test
+ public void testSomeLinksWithIncorrectHrefAttribute() throws
RepositoryException {
+ assertExtract("/microformats/xfn/some-links-with-incorrect-href.html");
+ assertNotContains(null, null, alicesHomepage);
+ assertContains(null, vXFN.mePage, charliesHomepage);
+ }
+
+ @Test
public void testForSomeReasonICantBeMyOwnSweetheart() throws
RepositoryException {
assertExtract("/microformats/xfn/me-and-sweetheart.html");
assertModelEmpty();
diff --git
a/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
b/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
new file mode 100644
index 0000000..a7981ce
--- /dev/null
+++
b/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
@@ -0,0 +1,27 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+ <head>
+ <title>Hello World!</title>
+ </head>
+ <body>
+ <h1>Hello World!</h1>
+ <p>I'm Bob and my friends are<a
v-bind:href="http://alice.example.com/" rel="friend">Alice</a>,
+ and <a href="http://charlie.example.com/" rel="friend">Charlie</a>.
+ </p>
+ </body>
+</html>
\ No newline at end of file