Author: lewismc
Date: Thu Jun 20 20:44:19 2013
New Revision: 1495174
URL: http://svn.apache.org/r1495174
Log:
NUTCH-1585 Ensure duplicate tags do not exist in microformat-reltag tag set.
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1495174&r1=1495173&r2=1495174&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Jun 20 20:44:19 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1585 Ensure duplicate tags do not exist in microformat-reltag tag set
(lewismc)
+
* NUTCH-1475 Index-More Plugin -- A better fall back value for date field
(James Sullivan, snagel via lewismc)
* NUTCH-1420 Get rid of the dreaded � (markus + lewismc)
Modified:
nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java?rev=1495174&r1=1495173&r2=1495174&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
(original)
+++
nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
Thu Jun 20 20:44:19 2013
@@ -85,8 +85,10 @@ public class RelTagParser implements Par
if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
String tag = parseTag(hrefNode.getNodeValue());
if (!StringUtil.isEmpty(tag)) {
- tags.add(tag);
- LOG.debug("Adding tag: " + tag + " to tag set.");
+ if(!tags.contains(tag)){
+ tags.add(tag);
+ LOG.debug("Adding tag: " + tag + " to tag set.");
+ }
}
}
}