generify relation extractor annotator

Miller, Timothy Thu, 13 Dec 2012 04:57:27 -0800

Any interest in generifying the relation extractor to make it easier to 
instantiate all relation extractors as clearTK style annotators?  This would 
make it easier to share feature extractors, allow me to rewrite the coreference 
code base, and make it easier to contribute to multiple projects without 
learning a bunch of different implementations.


For example, the quick mock-up patch below (which is not compiler-error free, 
see TODOs) changes the abstract RelationExtractorAnnotator class to be 
templated with 3 types: Spanning Type (what is the span of this relation), and 
Arg1 and Arg2 types.  All template types extend Annotation.

So the EntityMentionPairExtractor would extend 
RelationExtractorAnnotator<Sentence,IdentifiedAnnotation,IdentifiedAnnotation>,
coreference would extend with the args 
<Document,IdentifiedAnnotation,IdentifiedAnnotation>,
DegreeOf would extend with args <Sentence,IdentifiedAnnotation,Modifier>,
and so on.

The other aspect is the feature extractor, which is currently defined in the 
abstract class.  However, it is protected so we could just have default 
features in this class and expect children to define their own features.  
Alternatively, we could make it an abstract method so that instantiations have 
to explicitly enumerate features.

Any thoughts? It is early in the morning so let me know if I missed something 
obvious.

Tim


Index: 
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
===================================================================
--- 
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
        (revision 1421247)
+++ 
src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
        (working copy)
@@ -49,7 +49,7 @@
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;

-public abstract class RelationExtractorAnnotator extends 
CleartkAnnotator<String> {
+public abstract class RelationExtractorAnnotator<SPANNING_TYPE extends 
Annotation,A1TYPE extends Annotation,A2TYPE extends Annotation> extends 
CleartkAnnotator<String> {

  public static final String NO_RELATION_CATEGORY = "-NONE-";

@@ -94,7 +94,7 @@
  /**
   * Selects the relevant mentions/annotations within a sentence for relation 
identification/extraction
   */
-  public abstract List<IdentifiedAnnotationPair> 
getCandidateRelationArgumentPairs(JCas identifiedAnnotationView, Sentence 
sentence);
+  public abstract List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>> 
getCandidateRelationArgumentPairs(JCas identifiedAnnotationView, SPANNING_TYPE 
span);

  /*
   * Implement the standard UIMA process method.
@@ -122,15 +122,16 @@
    }

    // walk through each sentence in the text
-    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+    // TODO - causes compiler error, didn't think about how to fix this yet
+    for (SPANNING_TYPE span : JCasUtil.select(jCas, SPANNING_TYPE.class)) {

        // collect all relevant relation arguments from the sentence
-       List<IdentifiedAnnotationPair> candidatePairs = 
this.getCandidateRelationArgumentPairs(identifiedAnnotationView, sentence);
+       List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>> candidatePairs = 
this.getCandidateRelationArgumentPairs(identifiedAnnotationView, span);

        // walk through the pairs of annotations
-       for (IdentifiedAnnotationPair pair : candidatePairs) {
-               IdentifiedAnnotation arg1 = pair.getArg1();
-               IdentifiedAnnotation arg2 = pair.getArg2();
+       for (IdentifiedAnnotationPair<A1TYPE,A2TYPE> pair : candidatePairs) {
+               A1TYPE arg1 = pair.getArg1();
+               A2TYPE arg2 = pair.getArg2();
                // apply all the feature extractors to extract the list of 
features
                List<Feature> features = new ArrayList<Feature>();
                for (RelationFeaturesExtractor extractor : 
this.featureExtractors) {
@@ -162,9 +163,10 @@
                        if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {

                                // if we predict an inverted relation, reverse 
the order of the arguments
+                               // TODO - only makes sense if TYPE1 and TYPE2 
are the same...maybe another parameter? also causes compiler error in this state
                                if (predictedCategory.endsWith("-1")) {
                                        predictedCategory = 
predictedCategory.substring(0, predictedCategory.length() - 2);
-                                       IdentifiedAnnotation temp = arg1;
+                                       A1TYPE temp = arg1;
                                        arg1 = arg2;
                                        arg2 = temp;
                                }
@@ -198,7 +200,7 @@
   *         otherwise it returns the label sent to the datawriter
   */
  protected abstract String getRelationCategory(Map<List<Annotation>, 
BinaryTextRelation> relationLookup,
-                 IdentifiedAnnotation arg1, IdentifiedAnnotation arg2);
+                 A1TYPE arg1, A2TYPE arg2);

  /**
   * Creates a lookup map between lists of arguments and their relation
@@ -225,17 +227,17 @@
          return relationLookup;
  }

-  public static class IdentifiedAnnotationPair {
+  public static class IdentifiedAnnotationPair<A1TYPE,A2TYPE> {
          
-        private final IdentifiedAnnotation arg1;
-        private final IdentifiedAnnotation arg2;
-        public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, 
IdentifiedAnnotation arg2) {
+        private final A1TYPE arg1;
+        private final A2TYPE arg2;
+        public IdentifiedAnnotationPair(A1TYPE arg1, A2TYPE arg2) {
                 this.arg1 = arg1;
                 this.arg2 = arg2;
         }
         
-        public final IdentifiedAnnotation getArg1() { return arg1; }
+        public final A1TYPE getArg1() { return arg1; }
                 
-        public final IdentifiedAnnotation getArg2() { return arg2; }
+        public final A2TYPE getArg2() { return arg2; }
  }
}
Index: 
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
===================================================================
--- 
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
        (revision 1421247)
+++ 
src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
        (working copy)
@@ -22,10 +22,9 @@

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;

-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-
/**
 * Define an interface for people to implement feature extractors.
 */
@@ -42,6 +41,6 @@
   *          The second identified annotation in the text.
   * @return A list of features indicative of the relation between the named 
entities
   */
-  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, 
IdentifiedAnnotation arg2)
+  public List<Feature> extract(JCas jCas, Annotation arg1, Annotation arg2)
      throws AnalysisEngineProcessException;
}
\ No newline at end of file

generify relation extractor annotator

Reply via email to