Author: koji
Date: Fri Jan 11 00:38:12 2013
New Revision: 1431760
URL: http://svn.apache.org/viewvc?rev=1431760&view=rev
Log:
add noidf, nocoord and iddSymbol options to the simple evaliator (specialized
for ukbench)
Modified:
labs/alike/trunk/build.xml
labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
Modified: labs/alike/trunk/build.xml
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1431760&r1=1431759&r2=1431760&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Fri Jan 11 00:38:12 2013
@@ -150,9 +150,12 @@
<!-- = LAUNCH TOOLS = -->
<!-- ================================================================== -->
<target name="eval" depends="alike-compile" description="run
SimpleTopHitsEvaluator program">
+ <property name="noidf" value="false"/>
+ <property name="nocoord" value="false"/>
+ <property name="tddSymbol" value=""/>
<java classname="org.apache.alike.eval.SimpleTopHitsEvaluator"
fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
- <arg line="${index} ${noidf} ${nocoord}"/>
+ <arg line="${index} ${noidf} ${nocoord} ${tddSymbol}"/>
<classpath refid="common.path.lib"/>
<classpath path="${cls.dir}"/>
</java>
Modified:
labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
URL:
http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java?rev=1431760&r1=1431759&r2=1431760&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
(original)
+++ labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
Fri Jan 11 00:38:12 2013
@@ -41,6 +41,10 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+/**
+ * A specialized evaluator for ukbench
+ *
+ */
public class SimpleTopHitsEvaluator {
static String F_ID = "imgFile";
@@ -53,7 +57,8 @@ public class SimpleTopHitsEvaluator {
public static void main(String[] args) throws Exception {
final String index = args[0];
final boolean noIdf = args.length >= 2 ? Boolean.parseBoolean(args[1]) :
false;
- final boolean noCoord = args.length == 3 ? Boolean.parseBoolean(args[2]) :
false;
+ final boolean noCoord = args.length >= 3 ? Boolean.parseBoolean(args[2]) :
false;
+ final TrainingDataDecider tdd = args.length >= 4 ?
TrainingDataDecider.getDecider(args[3]) : null;
System.out.printf("index:\"%s\"\n", index);
Directory dir = FSDirectory.open(new File(index));
parser = new QueryParser(Version.LUCENE_40, F_HISTOGRAM, new
WhitespaceAnalyzer(Version.LUCENE_40));
@@ -62,14 +67,23 @@ public class SimpleTopHitsEvaluator {
searcher.setSimilarity(new NoIdfCoordSimilarity(noIdf, noCoord));
System.out.printf("use IDF : %s\n", String.valueOf(!noIdf));
System.out.printf("use Coord : %s\n", String.valueOf(!noCoord));
+ System.out.printf("separated : %s\n", String.valueOf(tdd != null));
- String[][] sortedIdsQueries = getSortedIDsQueries();
- int num = sortedIdsQueries.length;
+ int numDocs = reader.numDocs();
+ String[][] sortedIdsQueries = getSortedIDsQueries(numDocs);
//StringBuilder sb = new StringBuilder();
float total3 = 0;
float total10 = 0;
float total37 = 0;
- for(int i = 0; i < num; i++){
+ // the following variables are used only when separated=true, i.e. tdd !=
null
+ float trTotal3 = 0;
+ float trTotal10 = 0;
+ float trTotal37 = 0;
+ float ukTotal3 = 0;
+ float ukTotal10 = 0;
+ float ukTotal37 = 0;
+ int numTr = 0;
+ for(int i = 0; i < numDocs; i++){
String id = sortedIdsQueries[i][0];
String query = sortedIdsQueries[i][1] + " NOT " + F_ID + ":" + id;
List<Integer> top10docs = getSimilarDocs(id, query, 10);
@@ -98,21 +112,41 @@ public class SimpleTopHitsEvaluator {
}
}
score37 = score3 + (score10 - score3) * 0.5F;
+
total3 += score3;
total10 += score10;
total37 += score37;
+
+ if(tdd != null){
+ if(tdd.isTrainingData(i)){
+ trTotal3 += score3;
+ trTotal10 += score10;
+ trTotal37 += score37;
+ numTr++;
+ }
+ else{
+ ukTotal3 += score3;
+ ukTotal10 += score10;
+ ukTotal37 += score37;
+ }
+ }
}
- System.out.printf("mean TOP3 = %1.6f\n", total3 / (float)num);
- System.out.printf("mean TOP10 = %1.6f\n", total10 / (float)num);
- System.out.printf("mean TOP3+7 = %1.6f\n", total37 / (float)num);
+ System.out.printf("numDocs=%d\n", numDocs);
+
+ System.out.println("\t\tTOP3\t\tTOP10\t\tTOP3+7");
+ System.out.printf("all:\t%1.6f\t%1.6f\t%1.6f\n", total3 / numDocs,
total10 / numDocs, total37 / numDocs);
+
+ if(tdd != null){
+ System.out.printf("tr :\t%1.6f\t%1.6f\t%1.6f\n", trTotal3 / numTr,
trTotal10 / numTr, trTotal37 / numTr);
+ int numUk = numDocs - numTr;
+ System.out.printf("uk :\t%1.6f\t%1.6f\t%1.6f\n", ukTotal3 / numUk,
ukTotal10 / numUk, ukTotal37 / numUk);
+ }
reader.close();
}
- static String[][] getSortedIDsQueries() throws IOException {
- int numDocs = reader.numDocs();
- System.out.printf("numDocs=%d\n", numDocs);
+ static String[][] getSortedIDsQueries(int numDocs) throws IOException {
String[][] sortedIdsQueries = new String[numDocs][2];
SortField sf = new SortField(F_ID, Type.STRING);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), numDocs, new
Sort(sf));
@@ -206,4 +240,74 @@ public class SimpleTopHitsEvaluator {
return noCoord ? 1 : super.coord(overlap, maxOverlap);
}
}
+
+ static abstract class TrainingDataDecider {
+
+ protected final String symbol;
+
+ protected TrainingDataDecider(String symbol){
+ this.symbol = symbol;
+ }
+
+ public static TrainingDataDecider getDecider(String symbol){
+ if(symbol.charAt(0) == 'h'){
+ return new HorizontalSeparatedDecider(symbol.substring(1));
+ }
+ else if(symbol.charAt(0) == 'v'){
+ return new VerticalSeparatedDecider(symbol.substring(1));
+ }
+ else
+ throw new IllegalArgumentException(symbol + " is wrong for
representing training data");
+ }
+
+ public abstract boolean isTrainingData(int n);
+
+ private static final class HorizontalSeparatedDecider extends
TrainingDataDecider {
+
+ private final int min, max; // NOTE: use them inclusive
+
+ // ex) 0,1000 -> 0...999 : training data; 1000... :
unknown data
+ // 1000,500 -> 1000...1499 : training data; 0...999 &
1500... : unknown data
+ private HorizontalSeparatedDecider(String symbol){
+ super(symbol);
+ String[] args = symbol.split(",");
+ if(args.length != 2){
+ throw new IllegalArgumentException(symbol + " is wrong for
representing training data");
+ }
+ min = Integer.parseInt(args[0]);
+ max = min + Integer.parseInt(args[1]) - 1;
+ }
+
+ public boolean isTrainingData(int n) {
+ return n >= min && n <= max;
+ }
+ }
+
+ private static final class VerticalSeparatedDecider extends
TrainingDataDecider {
+
+ private final boolean fa, fb, fc, fd;
+
+ // ex) ab -> 0,1,4,5,8,9,... : training data;
2,3,6,7,10,11,... : unknown data
+ // abc -> 0,1,2,4,5,6,8,9,10,... : training data; 3,7,11,... :
unknown data
+ // cd -> 2,3,6,7,10,11,... : training data;
0,1,4,5,8,9,... : unknown data
+ private VerticalSeparatedDecider(String symbol){
+ super(symbol);
+ fa = symbol.indexOf('a') >= 0;
+ fb = symbol.indexOf('b') >= 0;
+ fc = symbol.indexOf('c') >= 0;
+ fd = symbol.indexOf('d') >= 0;
+ }
+
+ public boolean isTrainingData(int n) {
+ int remainder = n % 4;
+ switch (remainder) {
+ case 0: return fa;
+ case 1: return fb;
+ case 2: return fc;
+ case 3: return fd;
+ }
+ return false;
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]