Update of /cvsroot/nutch/nutch/src/java/net/nutch/indexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9211/src/java/net/nutch/indexer

Modified Files:
        NutchSimilarity.java 
Log Message:
Some scoring tweaks.

Index: NutchSimilarity.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/indexer/NutchSimilarity.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** NutchSimilarity.java        5 Apr 2004 16:44:56 -0000       1.4
--- NutchSimilarity.java        6 Apr 2004 02:54:05 -0000       1.5
***************
*** 10,23 ****
    private static final int MIN_CONTENT_LENGTH = 1000;
  
! /** Normalize field by length. */
    public float lengthNorm(String fieldName, int numTokens) {
      if ("url".equals(fieldName)) {                // URL: prefer short
        return 1.0f / numTokens;                    // use linear normalization
        
      } else if ("content".equals(fieldName)) {     // Content: penalize short
        return super.lengthNorm(fieldName,          // treat short as longer
                                Math.max(numTokens, MIN_CONTENT_LENGTH));
  
!     } else {                                      // Anchor: use default
        return super.lengthNorm(fieldName, numTokens);
      }
--- 10,26 ----
    private static final int MIN_CONTENT_LENGTH = 1000;
  
!   /** Normalize field by length.  Called at index time. */
    public float lengthNorm(String fieldName, int numTokens) {
      if ("url".equals(fieldName)) {                // URL: prefer short
        return 1.0f / numTokens;                    // use linear normalization
        
+     } else if ("anchor".equals(fieldName)) {      // Anchor: prefer more
+       return (float)(1.0 / Math.log(numTokens));  // use log
+ 
      } else if ("content".equals(fieldName)) {     // Content: penalize short
        return super.lengthNorm(fieldName,          // treat short as longer
                                Math.max(numTokens, MIN_CONTENT_LENGTH));
  
!     } else {                                      // use default
        return super.lengthNorm(fieldName, numTokens);
      }



-------------------------------------------------------
This SF.Net email is sponsored by: IBM Linux Tutorials
Free Linux tutorial presented by Daniel Robbins, President and CEO of
GenToo technologies. Learn everything from fundamentals to system
administration.http://ads.osdn.com/?ad_id=1470&alloc_id=3638&op=click
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to