Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change 
notification.

The following page has been changed by JakeVanderdray:
http://wiki.apache.org/nutch/WritingPlugins

------------------------------------------------------------------------------
  </project>
  }}}
  
+ == The HTML Parser Extension ==
+ 
+ {{{
+ package org.apache.nutch.parse.recommended;
+ 
+ // JDK imports
+ import java.util.Enumeration;
+ import java.util.Properties;
+ import java.util.logging.Logger;
+ 
+ // Nutch imports
+ import org.apache.nutch.parse.HTMLMetaTags;
+ import org.apache.nutch.parse.Parse;
+ import org.apache.nutch.parse.HtmlParseFilter;
+ import org.apache.nutch.protocol.Content;
+ import org.apache.nutch.util.LogFormatter;
+ 
+ public class RecommendedParser implements HtmlParseFilter {
+ 
+   private static final Logger LOG = LogFormatter
+     .getLogger(RecommendedParser.class.getName());
+ 
+   /** The language meta data attribute name */
+   public static final String META_RECOMMENDED_NAME="Recommended";
+ 
+ 
+   /**
+    * Scan the HTML document looking for a recommended meta tag.
+    */
+   public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, 
DocumentFragment doc) {
+     // Trying to find the document's recommended term
+       String recommendation = null;
+ 
+       Properties generalMetaTags = metaTags.getGeneralTags();
+       
+       for (Enumeration tagNames = generalMetaTags.propertyNames(); 
tagNames.hasMoreElements(); ) {
+                       if (tagNames.nextElement() == "recommended") {
+                               recommendation = 
metaTags.getGeneralTags().getProperty("recommended");
+                       }
+       }
+ 
+       if (recommendation != null) {
+                       LOG.info("No Recommendataion");
+       } else {
+                       LOG.info("Adding Recommendation for " + recommendation);
+               parse.getData().getMetadata().put(META_RECOMMENDED_NAME, 
recommendation);
+       }
+ 
+ 
+     return parse;
+   }
+ }
+ }}}
+ 
  == Getting Nutch to Use Your Plugin ==
  
  In order to get Nutch to use your plugin, you need to edit your 
conf/nutch-site.xml file and add in a block like this:

Reply via email to