Author: thorsten
Date: Fri Apr 30 13:30:27 2010
New Revision: 939662
URL: http://svn.apache.org/viewvc?rev=939662&view=rev
Log:
DROIDS-74
Reporter: Richard Frovarp
Patch: Richard Frovarp
review: thorsten
Modified:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
Modified:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java?rev=939662&r1=939661&r2=939662&view=diff
==============================================================================
---
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
(original)
+++
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
Fri Apr 30 13:30:27 2010
@@ -61,7 +61,7 @@ public class LinkExtractor extends Defau
/**
* List of links
*/
- private Collection<Link> links = new ArrayList<Link>();
+ private ArrayList<Link> links = new ArrayList<Link>();
/**
* Set of URIs visited yet
@@ -83,6 +83,11 @@ public class LinkExtractor extends Defau
*/
private URI link = null;
+ /**
+ * Anchor text
+ */
+ private StringBuilder anchorText = new StringBuilder();
+
public LinkExtractor(Link base, Map<String, String> elements) {
super();
this.base = base;
@@ -121,11 +126,30 @@ public class LinkExtractor extends Defau
if (link != null) {
addOutlinkURI(link.toString());
link = null;
+ anchorText = new StringBuilder();
}
}
}
}
+ @Override
+ public void characters(char[] ch, int start, int length) {
+ anchorText.append(ch, start, length);
+ }
+
+ @Override
+ public void endElement(String uri, String loc, String raw) {
+ Iterator<String> it = elements.keySet().iterator();
+ String elem;
+ while (it.hasNext()) {
+ elem = it.next();
+ if (elem.equalsIgnoreCase(loc)) {
+ addAnchorText(anchorText.toString());
+ }
+ }
+ }
+
+
@Override
public void endDocument() throws SAXException
{
@@ -134,6 +158,19 @@ public class LinkExtractor extends Defau
}
/**
+ * Setting Anchor text of last added anchor
+ * @param anchorText Text to be added
+ */
+ private void addAnchorText(String anchorText) {
+ if(links.size() > 0) {
+ LinkTask l = (LinkTask) links.get(links.size() - 1);
+ l.setAnchorText(anchorText.replaceAll("\\s+", " ").trim());
+ log.debug("Adding anchor: " + l.getAnchorText() + " on link: " + l);
+ }
+ }
+
+
+ /**
* Add the outlink to the {...@code links} list if the value is a valid URI.
* @param value the outlink.
*/