Hi list ! Can someone explain me better what the following code do ? That's the code of OPICScoringFilter.java. Can I change it and change the scoring when the results are displayed by adding the value of a META TAG ? Please it's very difficult for me to progam my own plugin so I need your help.
This is the code : package org.apache.nutch.scoring.opic; import java.net.MalformedURLException; import java.net.URL; import java.util.List; // Commons Logging imports import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.lucene.document.Document; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.Inlinks; import org.apache.nutch.fetcher.Fetcher; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.metadata.Nutch; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseData; import org.apache.nutch.protocol.Content; import org.apache.nutch.scoring.ScoringFilter; import org.apache.nutch.scoring.ScoringFilterException; import org.apache.nutch.util.LogUtil; /** * This plugin implements a variant of an Online Page Importance Computation * (OPIC) score, described in this paper: * <a href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/> * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003), * Adaptive On-Line Page Importance Computation * </a>. * * @author Andrzej Bialecki */ public class OPICScoringFilter implements ScoringFilter { private final static Log LOG = LogFactory.getLog(OPICScoringFilter.class); private Configuration conf; private float scoreInjected; private float scorePower; private float internalScoreFactor; private float externalScoreFactor; private boolean countFiltered; public Configuration getConf() { return conf; } public void setConf(Configuration conf) { this.conf = conf; scoreInjected = conf.getFloat("db.score.injected", 1.0f); scorePower = conf.getFloat("indexer.score.power", 0.5f); internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f); externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f); countFiltered = conf.getBoolean("db.score.count.filtered", false); } /** Set to the value defined in config, 1.0f by default. */ public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException { datum.setScore(scoreInjected); } /** Set to 0.0f (unknown value) - inlink contributions will bring it to * a correct level. Newly discovered pages have at least one inlink. */ public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException { datum.setScore(0.0f); } /** Use [EMAIL PROTECTED] CrawlDatum#getScore()}. */ public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException { return datum.getScore(); } /** Increase the score by a sum of inlinked scores. */ public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List inlinked) throws ScoringFilterException { float adjust = 0.0f; for (int i = 0; i < inlinked.size(); i++) { CrawlDatum linked = (CrawlDatum)inlinked.get(i); adjust += linked.getScore(); } if (old == null) old = datum; datum.setScore(old.getScore() + adjust); } /** Store a float value of CrawlDatum.getScore() under Fetcher.SCORE_KEY. */ public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) { content.getMetadata().set(Nutch.SCORE_KEY, "" + datum.getScore()); } /** Copy the value from Content metadata under Fetcher.SCORE_KEY to parseData. */ public void passScoreAfterParsing(Text url, Content content, Parse parse) { parse.getData().getContentMeta().set(Nutch.SCORE_KEY, content.getMetadata().get(Nutch.SCORE_KEY)); } /** Get a float value from Fetcher.SCORE_KEY, divide it by the number of outlinks and apply. */ public CrawlDatum distributeScoreToOutlink(Text fromUrl, Text toUrl, ParseData parseData, CrawlDatum target, CrawlDatum adjust, int allCount, int validCount) throws ScoringFilterException { float score = scoreInjected; String scoreString = parseData.getContentMeta().get(Nutch.SCORE_KEY); if (scoreString != null) { try { score = Float.parseFloat(scoreString); } catch (Exception e) { e.printStackTrace(LogUtil.getWarnStream(LOG)); } } if (countFiltered) { score /= allCount; } else { score /= validCount; } // internal or external score factor try { String toHost = new URL(toUrl.toString()).getHost(); String fromHost = new URL(fromUrl.toString()).getHost(); if(toHost.equalsIgnoreCase(fromHost)){ score *= internalScoreFactor; } else { score *= externalScoreFactor; } } catch (MalformedURLException e) { e.printStackTrace(LogUtil.getWarnStream(LOG)); score *= externalScoreFactor; } target.setScore(score); // XXX (ab) no adjustment? I think this is contrary to the algorithm descr. // XXX in the paper, where page "loses" its score if it's distributed to // XXX linked pages... return adjust; } /** Dampen the boost value by scorePower.*/ public float indexerScore(Text url, Document doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException { return (float)Math.pow(dbDatum.getScore(), scorePower); } } Thank's in advance, Jisay _________________________________________________________________ Changez votre Live en un clic ! http://get.live.com
