Author: ogrisel
Date: Mon Jan 16 18:59:44 2012
New Revision: 1232114

URL: http://svn.apache.org/viewvc?rev=1232114&view=rev
Log:
STANBOL-197: drafting test fold evaluation

Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1232114&r1=1232113&r2=1232114&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Mon Jan 16 18:59:44 2012
@@ -830,11 +830,41 @@ public class TopicClassificationEngine e
         classifier.setCrossValidationInfo(cvFoldIndex, cvFoldCount);
         classifier.updateModel(false);
 
+        final int foldCount = cvFoldCount;
+        final int foldIndex = cvFoldIndex;
+
         // iterate over the topics again to compute scores on the test fold
         batchOverTopics(new BatchProcessor<SolrDocument>() {
+
             @Override
-            public int process(List<SolrDocument> batch) {
-                return 0;
+            public int process(List<SolrDocument> batch) throws 
TrainingSetException, ClassifierException {
+                for (SolrDocument topicMetadata : batch) {
+                    String topic = 
topicMetadata.getFirstValue(topicUriField).toString();
+                    List<String> impactedTopics = new ArrayList<String>();
+                    int offset = 0;
+                    Batch<String> examples = Batch.emtpyBatch(String.class);
+                    do {
+                        examples = 
trainingSet.getPositiveExamples(impactedTopics, examples.nextOffset);
+                        for (String example : examples.items) {
+                            if (!(offset % foldCount == foldIndex)) {
+                                // this example is not part of the test fold, 
skip it
+                                offset++;
+                                continue;
+                            }
+                            offset++;
+                            if 
(classifier.suggestTopics(example).contains(topic)) {
+                                // count positive success
+                            } else {
+                                // collect false negatives
+                            }
+                        }
+                    } while (examples.hasMore); // TODO: put a bound on the 
number of examples
+
+                    // TODO: handle false positives with negative examples here
+
+                    // TODO: store performance statistics for current model in 
the original classifier
+                }
+                return batch.size();
             }
         });
 


Reply via email to