Any interest in generifying the relation extractor to make it easier to
instantiate all relation extractors as clearTK style annotators? This would
make it easier to share feature extractors, allow me to rewrite the coreference
code base, and make it easier to contribute to multiple projects without
learning a bunch of different implementations.
For example, the quick mock-up patch below (which is not compiler-error free,
see TODOs) changes the abstract RelationExtractorAnnotator class to be
templated with 3 types: Spanning Type (what is the span of this relation), and
Arg1 and Arg2 types. All template types extend Annotation.
So the EntityMentionPairExtractor would extend
RelationExtractorAnnotator<Sentence,IdentifiedAnnotation,IdentifiedAnnotation>,
coreference would extend with the args
<Document,IdentifiedAnnotation,IdentifiedAnnotation>,
DegreeOf would extend with args <Sentence,IdentifiedAnnotation,Modifier>,
and so on.
The other aspect is the feature extractor, which is currently defined in the
abstract class. However, it is protected so we could just have default
features in this class and expect children to define their own features.
Alternatively, we could make it an abstract method so that instantiations have
to explicitly enumerate features.
Any thoughts? It is early in the morning so let me know if I missed something
obvious.
Tim
Index:
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
===================================================================
---
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
(revision 1421247)
+++
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
(working copy)
@@ -49,7 +49,7 @@
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-public abstract class RelationExtractorAnnotator extends
CleartkAnnotator<String> {
+public abstract class RelationExtractorAnnotator<SPANNING_TYPE extends
Annotation,A1TYPE extends Annotation,A2TYPE extends Annotation> extends
CleartkAnnotator<String> {
public static final String NO_RELATION_CATEGORY = "-NONE-";
@@ -94,7 +94,7 @@
/**
* Selects the relevant mentions/annotations within a sentence for relation
identification/extraction
*/
- public abstract List<IdentifiedAnnotationPair>
getCandidateRelationArgumentPairs(JCas identifiedAnnotationView, Sentence
sentence);
+ public abstract List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>>
getCandidateRelationArgumentPairs(JCas identifiedAnnotationView, SPANNING_TYPE
span);
/*
* Implement the standard UIMA process method.
@@ -122,15 +122,16 @@
}
// walk through each sentence in the text
- for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ // TODO - causes compiler error, didn't think about how to fix this yet
+ for (SPANNING_TYPE span : JCasUtil.select(jCas, SPANNING_TYPE.class)) {
// collect all relevant relation arguments from the sentence
- List<IdentifiedAnnotationPair> candidatePairs =
this.getCandidateRelationArgumentPairs(identifiedAnnotationView, sentence);
+ List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>> candidatePairs =
this.getCandidateRelationArgumentPairs(identifiedAnnotationView, span);
// walk through the pairs of annotations
- for (IdentifiedAnnotationPair pair : candidatePairs) {
- IdentifiedAnnotation arg1 = pair.getArg1();
- IdentifiedAnnotation arg2 = pair.getArg2();
+ for (IdentifiedAnnotationPair<A1TYPE,A2TYPE> pair : candidatePairs) {
+ A1TYPE arg1 = pair.getArg1();
+ A2TYPE arg2 = pair.getArg2();
// apply all the feature extractors to extract the list of
features
List<Feature> features = new ArrayList<Feature>();
for (RelationFeaturesExtractor extractor :
this.featureExtractors) {
@@ -162,9 +163,10 @@
if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
// if we predict an inverted relation, reverse
the order of the arguments
+ // TODO - only makes sense if TYPE1 and TYPE2
are the same...maybe another parameter? also causes compiler error in this state
if (predictedCategory.endsWith("-1")) {
predictedCategory =
predictedCategory.substring(0, predictedCategory.length() - 2);
- IdentifiedAnnotation temp = arg1;
+ A1TYPE temp = arg1;
arg1 = arg2;
arg2 = temp;
}
@@ -198,7 +200,7 @@
* otherwise it returns the label sent to the datawriter
*/
protected abstract String getRelationCategory(Map<List<Annotation>,
BinaryTextRelation> relationLookup,
- IdentifiedAnnotation arg1, IdentifiedAnnotation arg2);
+ A1TYPE arg1, A2TYPE arg2);
/**
* Creates a lookup map between lists of arguments and their relation
@@ -225,17 +227,17 @@
return relationLookup;
}
- public static class IdentifiedAnnotationPair {
+ public static class IdentifiedAnnotationPair<A1TYPE,A2TYPE> {
- private final IdentifiedAnnotation arg1;
- private final IdentifiedAnnotation arg2;
- public IdentifiedAnnotationPair(IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) {
+ private final A1TYPE arg1;
+ private final A2TYPE arg2;
+ public IdentifiedAnnotationPair(A1TYPE arg1, A2TYPE arg2) {
this.arg1 = arg1;
this.arg2 = arg2;
}
- public final IdentifiedAnnotation getArg1() { return arg1; }
+ public final A1TYPE getArg1() { return arg1; }
- public final IdentifiedAnnotation getArg2() { return arg2; }
+ public final A2TYPE getArg2() { return arg2; }
}
}
Index:
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
===================================================================
---
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
(revision 1421247)
+++
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
(working copy)
@@ -22,10 +22,9 @@
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-
/**
* Define an interface for people to implement feature extractors.
*/
@@ -42,6 +41,6 @@
* The second identified annotation in the text.
* @return A list of features indicative of the relation between the named
entities
*/
- public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2)
+ public List<Feature> extract(JCas jCas, Annotation arg1, Annotation arg2)
throws AnalysisEngineProcessException;
}
\ No newline at end of file