please i want to post my question becuase somebody ruined it by asking some
other irrelevant question in reply to my question.
the plugin compiles.but it doesnt index the dc meta fields for some strange
reason
----Indexer-------------------------
package org.apache.nutch.parse.dcmeta;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
public class DCMetaIndexingFilter implements IndexingFilter {
public static final Log LOG = LogFactory.getLog(
DCMetaIndexingFilter.class.getName());
private Configuration conf;
public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum
datum, Inlinks inlinks)
throws IndexingException {
String dc_title = parse.getData().getMeta("DC.title");
if (dc_title != null) {
LOG.info("found DC.title "+dc_title);
doc.add(new Field("DC_title", dc_title, Field.Store.YES,
Field.Index.TOKENIZED));
}
return doc;
}
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return this.conf;
}
}
--------------------------------------------------------------------------------------------------------
-Parser---
package org.apache.nutch.parse.dcmeta;
import java.util.Enumeration;
import java.util.Properties;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.HtmlParseFilter;
import org.apache.nutch.protocol.Content;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.DocumentFragment;
public class DCMetaParseFilter implements HtmlParseFilter {
private static final Log LOG = LogFactory.getLog(
DCMetaParseFilter.class.getName());
private Configuration conf;
public Parse filter(Content content, Parse parse,
HTMLMetaTags metaTags, DocumentFragment doc) {
String recommendation = null;
Properties generalMetaTags = metaTags.getGeneralTags();
for (Enumeration tagNames = generalMetaTags.propertyNames();
tagNames.hasMoreElements(); ) {
String tagName = (String)tagNames.nextElement();
if (tagName.startsWith("DC.")) {
parse.getData().getContentMeta().set(tagName,
generalMetaTags.getProperty(tagName));
LOG.info("Found DC metadata " + tagName + " : " +
generalMetaTags.getProperty(tagName));
}
}
return parse;
}
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return this.conf;
}
}
thankyou