Author: ogrisel
Date: Mon Jan 16 18:59:44 2012
New Revision: 1232114
URL: http://svn.apache.org/viewvc?rev=1232114&view=rev
Log:
STANBOL-197: drafting test fold evaluation
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1232114&r1=1232113&r2=1232114&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Mon Jan 16 18:59:44 2012
@@ -830,11 +830,41 @@ public class TopicClassificationEngine e
classifier.setCrossValidationInfo(cvFoldIndex, cvFoldCount);
classifier.updateModel(false);
+ final int foldCount = cvFoldCount;
+ final int foldIndex = cvFoldIndex;
+
// iterate over the topics again to compute scores on the test fold
batchOverTopics(new BatchProcessor<SolrDocument>() {
+
@Override
- public int process(List<SolrDocument> batch) {
- return 0;
+ public int process(List<SolrDocument> batch) throws
TrainingSetException, ClassifierException {
+ for (SolrDocument topicMetadata : batch) {
+ String topic =
topicMetadata.getFirstValue(topicUriField).toString();
+ List<String> impactedTopics = new ArrayList<String>();
+ int offset = 0;
+ Batch<String> examples = Batch.emtpyBatch(String.class);
+ do {
+ examples =
trainingSet.getPositiveExamples(impactedTopics, examples.nextOffset);
+ for (String example : examples.items) {
+ if (!(offset % foldCount == foldIndex)) {
+ // this example is not part of the test fold,
skip it
+ offset++;
+ continue;
+ }
+ offset++;
+ if
(classifier.suggestTopics(example).contains(topic)) {
+ // count positive success
+ } else {
+ // collect false negatives
+ }
+ }
+ } while (examples.hasMore); // TODO: put a bound on the
number of examples
+
+ // TODO: handle false positives with negative examples here
+
+ // TODO: store performance statistics for current model in
the original classifier
+ }
+ return batch.size();
}
});