This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/any23.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d3a549  ANY23-524 fix invalid href attribute (#216)
5d3a549 is described below

commit 5d3a5498545d4534a44492897831719f616a5cfc
Author: Igor Shevchenko <[email protected]>
AuthorDate: Sat Oct 23 02:02:18 2021 +0300

    ANY23-524 fix invalid href attribute (#216)
---
 .../apache/any23/extractor/html/XFNExtractor.java  |  9 ++++++--
 .../any23/extractor/html/XFNExtractorTest.java     |  7 ++++++
 .../xfn/some-links-with-incorrect-href.html        | 27 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git 
a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
index 3d39cb7..5cf63c9 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
@@ -81,8 +81,13 @@ public class XFNExtractor implements TagSoupDOMExtractor {
     }
 
     private boolean extractLink(Node firstLink, BNode subject, IRI 
documentIRI) throws ExtractionException {
-        String href = 
firstLink.getAttributes().getNamedItem("href").getNodeValue();
-        String rel = 
firstLink.getAttributes().getNamedItem("rel").getNodeValue();
+        Node hrefNodeItem = firstLink.getAttributes().getNamedItem("href");
+        Node relNodeItem = firstLink.getAttributes().getNamedItem("rel");
+        if (hrefNodeItem == null || relNodeItem == null) {
+            return false;
+        }
+        String href = hrefNodeItem.getNodeValue();
+        String rel = relNodeItem.getNodeValue();
 
         String[] rels = rel.split("\\s+");
         IRI link = document.resolveIRI(href);
diff --git 
a/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java 
b/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
index ed22d48..617b9ca 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
@@ -139,6 +139,13 @@ public class XFNExtractorTest extends 
AbstractExtractorTestCase {
     }
 
     @Test
+    public void testSomeLinksWithIncorrectHrefAttribute() throws 
RepositoryException {
+        assertExtract("/microformats/xfn/some-links-with-incorrect-href.html");
+        assertNotContains(null, null, alicesHomepage);
+        assertContains(null, vXFN.mePage, charliesHomepage);
+    }
+
+    @Test
     public void testForSomeReasonICantBeMyOwnSweetheart() throws 
RepositoryException {
         assertExtract("/microformats/xfn/me-and-sweetheart.html");
         assertModelEmpty();
diff --git 
a/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
 
b/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
new file mode 100644
index 0000000..a7981ce
--- /dev/null
+++ 
b/test-resources/src/test/resources/microformats/xfn/some-links-with-incorrect-href.html
@@ -0,0 +1,27 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+    <head>
+        <title>Hello World!</title>
+    </head>
+    <body>
+        <h1>Hello World!</h1>
+        <p>I'm Bob and my friends are<a 
v-bind:href="http://alice.example.com/"; rel="friend">Alice</a>,
+            and <a href="http://charlie.example.com/"; rel="friend">Charlie</a>.
+        </p>
+    </body>
+</html>
\ No newline at end of file

Reply via email to