Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The following page has been changed by JakeVanderdray: http://wiki.apache.org/nutch/WritingPlugins ------------------------------------------------------------------------------ </project> }}} + == The HTML Parser Extension == + + {{{ + package org.apache.nutch.parse.recommended; + + // JDK imports + import java.util.Enumeration; + import java.util.Properties; + import java.util.logging.Logger; + + // Nutch imports + import org.apache.nutch.parse.HTMLMetaTags; + import org.apache.nutch.parse.Parse; + import org.apache.nutch.parse.HtmlParseFilter; + import org.apache.nutch.protocol.Content; + import org.apache.nutch.util.LogFormatter; + + public class RecommendedParser implements HtmlParseFilter { + + private static final Logger LOG = LogFormatter + .getLogger(RecommendedParser.class.getName()); + + /** The language meta data attribute name */ + public static final String META_RECOMMENDED_NAME="Recommended"; + + + /** + * Scan the HTML document looking for a recommended meta tag. + */ + public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) { + // Trying to find the document's recommended term + String recommendation = null; + + Properties generalMetaTags = metaTags.getGeneralTags(); + + for (Enumeration tagNames = generalMetaTags.propertyNames(); tagNames.hasMoreElements(); ) { + if (tagNames.nextElement() == "recommended") { + recommendation = metaTags.getGeneralTags().getProperty("recommended"); + } + } + + if (recommendation != null) { + LOG.info("No Recommendataion"); + } else { + LOG.info("Adding Recommendation for " + recommendation); + parse.getData().getMetadata().put(META_RECOMMENDED_NAME, recommendation); + } + + + return parse; + } + } + }}} + == Getting Nutch to Use Your Plugin == In order to get Nutch to use your plugin, you need to edit your conf/nutch-site.xml file and add in a block like this:
