Author: koji
Date: Fri Jan 11 00:38:12 2013
New Revision: 1431760

URL: http://svn.apache.org/viewvc?rev=1431760&view=rev
Log:
add noidf, nocoord and iddSymbol options to the simple evaliator (specialized 
for ukbench)

Modified:
    labs/alike/trunk/build.xml
    labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java

Modified: labs/alike/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1431760&r1=1431759&r2=1431760&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Fri Jan 11 00:38:12 2013
@@ -150,9 +150,12 @@
     <!-- = LAUNCH TOOLS                                                   = -->
     <!-- ================================================================== -->
     <target name="eval" depends="alike-compile" description="run 
SimpleTopHitsEvaluator program">
+        <property name="noidf" value="false"/>
+        <property name="nocoord" value="false"/>
+        <property name="tddSymbol" value=""/>
         <java classname="org.apache.alike.eval.SimpleTopHitsEvaluator" 
fork="true">
             <jvmarg line="-Dfile.encoding=UTF-8"/>
-            <arg line="${index} ${noidf} ${nocoord}"/>
+            <arg line="${index} ${noidf} ${nocoord} ${tddSymbol}"/>
             <classpath refid="common.path.lib"/>
             <classpath path="${cls.dir}"/>
         </java>

Modified: 
labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
URL: 
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java?rev=1431760&r1=1431759&r2=1431760&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java 
(original)
+++ labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java 
Fri Jan 11 00:38:12 2013
@@ -41,6 +41,10 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 
+/**
+ * A specialized evaluator for ukbench
+ *
+ */
 public class SimpleTopHitsEvaluator {
 
   static String F_ID = "imgFile";
@@ -53,7 +57,8 @@ public class SimpleTopHitsEvaluator {
   public static void main(String[] args) throws Exception {
     final String index = args[0];
     final boolean noIdf = args.length >= 2 ? Boolean.parseBoolean(args[1]) : 
false;
-    final boolean noCoord = args.length == 3 ? Boolean.parseBoolean(args[2]) : 
false;
+    final boolean noCoord = args.length >= 3 ? Boolean.parseBoolean(args[2]) : 
false;
+    final TrainingDataDecider tdd = args.length >= 4 ? 
TrainingDataDecider.getDecider(args[3]) : null;
     System.out.printf("index:\"%s\"\n", index);
     Directory dir = FSDirectory.open(new File(index));
     parser = new QueryParser(Version.LUCENE_40, F_HISTOGRAM, new 
WhitespaceAnalyzer(Version.LUCENE_40));
@@ -62,14 +67,23 @@ public class SimpleTopHitsEvaluator {
     searcher.setSimilarity(new NoIdfCoordSimilarity(noIdf, noCoord));
     System.out.printf("use IDF : %s\n", String.valueOf(!noIdf));
     System.out.printf("use Coord : %s\n", String.valueOf(!noCoord));
+    System.out.printf("separated : %s\n", String.valueOf(tdd != null));
     
-    String[][] sortedIdsQueries = getSortedIDsQueries();
-    int num = sortedIdsQueries.length;
+    int numDocs = reader.numDocs();
+    String[][] sortedIdsQueries = getSortedIDsQueries(numDocs);
     //StringBuilder sb = new StringBuilder();
     float total3 = 0;
     float total10 = 0;
     float total37 = 0;
-    for(int i = 0; i < num; i++){
+    // the following variables are used only when separated=true, i.e. tdd != 
null
+    float trTotal3 = 0;
+    float trTotal10 = 0;
+    float trTotal37 = 0;
+    float ukTotal3 = 0;
+    float ukTotal10 = 0;
+    float ukTotal37 = 0;
+    int numTr = 0;
+    for(int i = 0; i < numDocs; i++){
       String id = sortedIdsQueries[i][0];
       String query = sortedIdsQueries[i][1] + " NOT " + F_ID + ":" + id;
       List<Integer> top10docs = getSimilarDocs(id, query, 10);
@@ -98,21 +112,41 @@ public class SimpleTopHitsEvaluator {
         }
       }
       score37 = score3 + (score10 - score3) * 0.5F;
+
       total3 += score3;
       total10 += score10;
       total37 += score37;
+      
+      if(tdd != null){
+        if(tdd.isTrainingData(i)){
+          trTotal3 += score3;
+          trTotal10 += score10;
+          trTotal37 += score37;
+          numTr++;
+        }
+        else{
+          ukTotal3 += score3;
+          ukTotal10 += score10;
+          ukTotal37 += score37;
+        }
+      }
     }
     
-    System.out.printf("mean TOP3   = %1.6f\n", total3  / (float)num);
-    System.out.printf("mean TOP10  = %1.6f\n", total10 / (float)num);
-    System.out.printf("mean TOP3+7 = %1.6f\n", total37 / (float)num);
+    System.out.printf("numDocs=%d\n", numDocs);
+
+    System.out.println("\t\tTOP3\t\tTOP10\t\tTOP3+7");
+    System.out.printf("all:\t%1.6f\t%1.6f\t%1.6f\n", total3  / numDocs, 
total10 / numDocs, total37 / numDocs);
+    
+    if(tdd != null){
+      System.out.printf("tr :\t%1.6f\t%1.6f\t%1.6f\n", trTotal3  / numTr, 
trTotal10 / numTr, trTotal37 / numTr);
+      int numUk = numDocs - numTr;
+      System.out.printf("uk :\t%1.6f\t%1.6f\t%1.6f\n", ukTotal3  / numUk, 
ukTotal10 / numUk, ukTotal37 / numUk);
+    }
     
     reader.close();
   }
 
-  static String[][] getSortedIDsQueries() throws IOException {
-    int numDocs = reader.numDocs();
-    System.out.printf("numDocs=%d\n", numDocs);
+  static String[][] getSortedIDsQueries(int numDocs) throws IOException {
     String[][] sortedIdsQueries = new String[numDocs][2];
     SortField sf = new SortField(F_ID, Type.STRING);
     TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), numDocs, new 
Sort(sf));
@@ -206,4 +240,74 @@ public class SimpleTopHitsEvaluator {
       return noCoord ? 1 : super.coord(overlap, maxOverlap);
     }
   }
+  
+  static abstract class TrainingDataDecider {
+
+    protected final String symbol;
+
+    protected TrainingDataDecider(String symbol){
+      this.symbol = symbol;
+    }
+
+    public static TrainingDataDecider getDecider(String symbol){
+      if(symbol.charAt(0) == 'h'){
+        return new HorizontalSeparatedDecider(symbol.substring(1));
+      }
+      else if(symbol.charAt(0) == 'v'){
+        return new VerticalSeparatedDecider(symbol.substring(1));
+      }
+      else
+        throw new IllegalArgumentException(symbol + " is wrong for 
representing training data");
+    }
+    
+    public abstract boolean isTrainingData(int n);
+    
+    private static final class HorizontalSeparatedDecider extends 
TrainingDataDecider {
+      
+      private final int min, max; // NOTE: use them inclusive
+      
+      // ex) 0,1000   -> 0...999 : training data;                 1000... : 
unknown data
+      //     1000,500 -> 1000...1499 : training data;             0...999 & 
1500... : unknown data
+      private HorizontalSeparatedDecider(String symbol){
+        super(symbol);
+        String[] args = symbol.split(",");
+        if(args.length != 2){
+          throw new IllegalArgumentException(symbol + " is wrong for 
representing training data");
+        }
+        min = Integer.parseInt(args[0]);
+        max = min + Integer.parseInt(args[1]) - 1;
+      }
+
+      public boolean isTrainingData(int n) {
+        return n >= min && n <= max;
+      }
+    }
+    
+    private static final class VerticalSeparatedDecider extends 
TrainingDataDecider {
+
+      private final boolean fa, fb, fc, fd;
+      
+      // ex) ab       -> 0,1,4,5,8,9,... : training data;         
2,3,6,7,10,11,... : unknown data
+      //     abc      -> 0,1,2,4,5,6,8,9,10,... : training data;  3,7,11,... : 
unknown data
+      //     cd       -> 2,3,6,7,10,11,... : training data;       
0,1,4,5,8,9,... : unknown data
+      private VerticalSeparatedDecider(String symbol){
+        super(symbol);
+        fa = symbol.indexOf('a') >= 0;
+        fb = symbol.indexOf('b') >= 0;
+        fc = symbol.indexOf('c') >= 0;
+        fd = symbol.indexOf('d') >= 0;
+      }
+
+      public boolean isTrainingData(int n) {
+        int remainder = n % 4;
+        switch (remainder) {
+          case 0: return fa;
+          case 1: return fb;
+          case 2: return fc;
+          case 3: return fd;
+        }
+        return false;
+      }
+    }
+  }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to