Author: ogrisel
Date: Mon Jan 16 13:33:13 2012
New Revision: 1231995
URL: http://svn.apache.org/viewvc?rev=1231995&view=rev
Log:
STANBOL-197: more progress on metadata storage for model evaluation
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/classifier/schema.xml
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1231995&r1=1231994&r2=1231995&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Mon Jan 16 13:33:13 2012
@@ -102,7 +102,11 @@ import org.slf4j.LoggerFactory;
@Property(name = TopicClassificationEngine.RECALL_FIELD,
value = "recall"),
@Property(name = TopicClassificationEngine.F1_FIELD,
value = "f1"),
@Property(name =
TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, value = "model_entry_id"),
- @Property(name =
TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, value =
"last_evaluation_dt")})
+ @Property(name =
TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD, value =
"last_evaluation_dt"),
+ @Property(name =
TopicClassificationEngine.FALSE_NEGATIVES_FIELD, value = "false_negatives"),
+ @Property(name =
TopicClassificationEngine.FALSE_POSITIVES_FIELD, value = "false_positives"),
+ @Property(name =
TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, value = "positive_support"),
+ @Property(name =
TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, value = "negative_support")})
public class TopicClassificationEngine extends ConfiguredSolrCoreTracker
implements EnhancementEngine,
ServiceProperties, TopicClassifier {
@@ -140,6 +144,14 @@ public class TopicClassificationEngine e
public static final String F1_FIELD =
"org.apache.stanbol.enhancer.engine.topic.f1Field";
+ public static final String FALSE_POSITIVES_FIELD =
"org.apache.stanbol.enhancer.engine.topic.falsePositivesField";
+
+ public static final String FALSE_NEGATIVES_FIELD =
"org.apache.stanbol.enhancer.engine.topic.falseNegativesField";
+
+ public static final String POSITIVE_SUPPORT_FIELD =
"org.apache.stanbol.enhancer.engine.topic.positiveSupportField";
+
+ public static final String NEGATIVE_SUPPORT_FIELD =
"org.apache.stanbol.enhancer.engine.topic.negativeSupportField";
+
private static final Logger log =
LoggerFactory.getLogger(TopicClassificationEngine.class);
// TODO: make the following bounds configurable
@@ -185,6 +197,14 @@ public class TopicClassificationEngine e
protected String modelEntryIdField;
+ protected String positiveSupportField;
+
+ protected String negativeSupportField;
+
+ protected String falsePositivesField;
+
+ protected String falseNegativesField;
+
// customize the behavior of the classifier instance for model evaluation
protected int cvFoldIndex = 0;
@@ -218,6 +238,10 @@ public class TopicClassificationEngine e
f1Field = getRequiredStringParam(config, F1_FIELD);
modelUpdateDateField = getRequiredStringParam(config,
MODEL_UPDATE_DATE_FIELD);
modelEvaluationDateField = getRequiredStringParam(config,
MODEL_EVALUATION_DATE_FIELD);
+ falsePositivesField = getRequiredStringParam(config,
FALSE_POSITIVES_FIELD);
+ falseNegativesField = getRequiredStringParam(config,
FALSE_NEGATIVES_FIELD);
+ positiveSupportField = getRequiredStringParam(config,
POSITIVE_SUPPORT_FIELD);
+ negativeSupportField = getRequiredStringParam(config,
NEGATIVE_SUPPORT_FIELD);
configureSolrCore(config, SOLR_CORE);
// optional fields, can be null
@@ -719,11 +743,19 @@ public class TopicClassificationEngine e
float precision = (Float) metadata.getFirstValue(precisionField);
float recall = (Float) metadata.getFirstValue(recallField);
float f1 = (Float) metadata.getFirstValue(f1Field);
- // int positiveSupport = (Integer) metadata.getFirstValue(po);
- // int negativeSupport = 0;
+ int positiveSupport = (Integer)
metadata.getFirstValue(positiveSupportField);
+ int negativeSupport = (Integer)
metadata.getFirstValue(negativeSupportField);
Date evaluationDate = (Date)
metadata.getFirstValue(modelEvaluationDateField);
boolean uptodate = evaluationDate != null;
- return new ClassificationReport(precision, recall, f1, 0, 0,
uptodate, evaluationDate);
+ ClassificationReport report = new ClassificationReport(precision,
recall, f1, positiveSupport,
+ negativeSupport, uptodate, evaluationDate);
+ for (Object falsePositiveId :
metadata.getFieldValues(FALSE_POSITIVES_FIELD)) {
+ report.falsePositiveExampleIds.add(falsePositiveId.toString());
+ }
+ for (Object falseNegativeId :
metadata.getFieldValues(FALSE_NEGATIVES_FIELD)) {
+ report.falseNegativeExampleIds.add(falseNegativeId.toString());
+ }
+ return report;
} catch (SolrServerException e) {
throw new ClassifierException(String.format("Error fetching the
performance report for topic "
+ topic));
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1231995&r1=1231994&r2=1231995&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Mon Jan 16 13:33:13 2012
@@ -485,6 +485,10 @@ public class TopicEngineTest extends Bas
config.put(TopicClassificationEngine.PRECISION_FIELD, "precision");
config.put(TopicClassificationEngine.RECALL_FIELD, "recall");
config.put(TopicClassificationEngine.F1_FIELD, "f1");
+ config.put(TopicClassificationEngine.POSITIVE_SUPPORT_FIELD,
"positive_support");
+ config.put(TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD,
"negative_support");
+ config.put(TopicClassificationEngine.FALSE_POSITIVES_FIELD,
"false_positives");
+ config.put(TopicClassificationEngine.FALSE_NEGATIVES_FIELD,
"false_negatives");
return config;
}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/classifier/schema.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/classifier/schema.xml?rev=1231995&r1=1231994&r2=1231995&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/classifier/schema.xml
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/resources/classifier/schema.xml
Mon Jan 16 13:33:13 2012
@@ -16,7 +16,7 @@
<fieldType name="string" class="solr.StrField"
sortMissingLast="true" omitNorms="true" />
- <fieldType name="int" class="solr.TrieIntField"
+ <fieldType name="tint" class="solr.TrieIntField"
precisionStep="0" omitNorms="true" positionIncrementGap="0" />
<fieldType name="tfloat" class="solr.TrieFloatField"
@@ -87,6 +87,13 @@
<field name="f1" type="tfloat" indexed="true" stored="true" />
<field name="last_evaluation_dt" type="tdate" indexed="true"
stored="true" />
+ <field name="positive_support" type="tint" indexed="false" stored="true" />
+ <field name="negative_support" type="tint" indexed="false" stored="true" />
+ <!-- Store ids of some false positive and negative examples -->
+ <field name="false_positives" type="string" indexed="false"
+ multiValued="true" stored="true" />
+ <field name="negative_positives" type="string" indexed="false"
+ multiValued="true" stored="true" />
</fields>