This is an automated email from the ASF dual-hosted git repository.
seanfinan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ctakes.git
The following commit(s) were added to refs/heads/main by this push:
new 553fe21 BackwardsTimeAnnotator log init, process start
ConstituencyParser log init, process start DegreeOfRelationExtractorAnnotator
log init, process start DeterministicMarkableAnnotator log init, process start
EventAnnotator log init, process start EventEventRelationAnnotator log init,
process start EventTimeRelationAnnotator log init, process start ListAnnotator
log init, process start ListSentenceFixer log init, process start
LocationOfRelationExtractorAnnotator log init, pr [...]
553fe21 is described below
commit 553fe210bfb735b6c4f0e393caa800800d2d014c
Author: Sean Finan <[email protected]>
AuthorDate: Mon Dec 19 18:06:13 2022 -0500
BackwardsTimeAnnotator log init, process start
ConstituencyParser log init, process start
DegreeOfRelationExtractorAnnotator log init, process start
DeterministicMarkableAnnotator log init, process start
EventAnnotator log init, process start
EventEventRelationAnnotator log init, process start
EventTimeRelationAnnotator log init, process start
ListAnnotator log init, process start
ListSentenceFixer log init, process start
LocationOfRelationExtractorAnnotator log init, process start
MarkableSalienceAnnotator log init, process start
MaxentParserWrapper log init, process start
MentionClusterCoreferenceAnnotator log init, process start
ModifierExtractorAnnotator log init, process start
ParagraphAnnotator log init, process start
ParagraphSentenceFixer log init, process start
RegexSectionizer log init, process start
RelationSubPipe.piper log init, process start
ClosestVerbExtractor attempted speed increase for dtr (not used)
DateAndMeasurementExtractor attempted speed increase for dtr (not used)
DocTimeRelAnnotator attempted speed increase for dtr (not used)
EventPropertyExtractor attempted speed increase for dtr (not used)
NearbyVerbTenseXExtractor attempted speed increase for dtr (not used)
SectionHeaderExtractor attempted speed increase for dtr (not used)
TimeXExtractor attempted speed increase for dtr (not used)
TreeUtils bring up to jdk8
HyphenTextModifierImplTests Comment
TestCDASegmentAnnotator Comment
TestClearNLPAnalysisEngines Comment
---
.../constituency/parser/MaxentParserWrapper.java | 6 +-
.../constituency/parser/ae/ConstituencyParser.java | 8 +-
.../ctakes/constituency/parser/util/TreeUtils.java | 211 +++++++++++----------
.../org/apache/ctakes/core/ae/ListAnnotator.java | 5 +-
.../apache/ctakes/core/ae/ListSentenceFixer.java | 4 +-
.../apache/ctakes/core/ae/ParagraphAnnotator.java | 5 +-
.../ctakes/core/ae/ParagraphSentenceFixer.java | 4 +-
.../apache/ctakes/core/ae/RegexSectionizer.java | 5 +-
.../ctakes/core/ae/TestCDASegmentAnnotator.java | 3 +-
.../core/ci/HyphenTextModifierImplTests.java | 6 +-
.../ae/DeterministicMarkableAnnotator.java | 9 +-
.../coreference/ae/MarkableSalienceAnnotator.java | 2 -
.../ae/MentionClusterCoreferenceAnnotator.java | 6 +-
.../ae/util/TestClearNLPAnalysisEngines.java | 5 +-
.../ae/DegreeOfRelationExtractorAnnotator.java | 18 ++
.../ae/LocationOfRelationExtractorAnnotator.java | 19 ++
.../ae/ModifierExtractorAnnotator.java | 4 +
.../extractor/pipeline/RelationSubPipe.piper | 6 +-
.../ctakes/temporal/ae/BackwardsTimeAnnotator.java | 6 +
.../ctakes/temporal/ae/DocTimeRelAnnotator.java | 191 ++++++++++++++++++-
.../apache/ctakes/temporal/ae/EventAnnotator.java | 9 +
.../temporal/ae/EventEventRelationAnnotator.java | 7 +
.../temporal/ae/EventTimeRelationAnnotator.java | 7 +
.../temporal/ae/feature/ClosestVerbExtractor.java | 34 +++-
.../ae/feature/DateAndMeasurementExtractor.java | 25 +++
.../ae/feature/EventPropertyExtractor.java | 99 ++++++++++
.../ae/feature/NearbyVerbTenseXExtractor.java | 26 ++-
.../ae/feature/SectionHeaderExtractor.java | 19 ++
.../ctakes/temporal/ae/feature/TimeXExtractor.java | 83 +++++++-
29 files changed, 678 insertions(+), 154 deletions(-)
diff --git
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
index 1ef1e58..5648004 100644
---
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
+++
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/MaxentParserWrapper.java
@@ -44,7 +44,7 @@ public class MaxentParserWrapper implements ParserWrapper {
Parser parser = null;
private String parseStr = "";
- Logger logger = Logger.getLogger(this.getClass().getName());
+ static private final Logger LOGGER = Logger.getLogger(
"MaxentParserWrapper" );
private int maxTokens;
public MaxentParserWrapper(InputStream in){
@@ -78,7 +78,7 @@ public class MaxentParserWrapper implements ParserWrapper {
@Override
public void createAnnotations( final JCas jcas ) throws
AnalysisEngineProcessException {
final String docId = DocIdUtil.getDocumentID( jcas );
- logger.info( "Started processing: " + docId );
+ LOGGER.info( "Started processing: " + docId );
// iterate over sentences
Parse parse = null;
// final Collection<Sentence> allSentences =
org.apache.uima.fit.util.JCasUtil.select( jcas, Sentence.class );
@@ -103,7 +103,7 @@ public class MaxentParserWrapper implements ParserWrapper {
final TopTreebankNode top = TreeUtils.buildAlignedTree( jcas, parse,
terminalArray, sentence );
top.addToIndexes();
}
- logger.info( "Done parsing: " + docId );
+// LOGGER.info( "Done parsing: " + docId );
}
/**
diff --git
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
index 959ed24..260f18c 100644
---
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
+++
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/ae/ConstituencyParser.java
@@ -61,24 +61,24 @@ public class ConstituencyParser extends
JCasAnnotator_ImplBase {
private ParserWrapper parser = null;
- private Logger logger = Logger.getLogger(this.getClass());
+ static private final Logger LOGGER = Logger.getLogger(
"ConstituencyParser" );
@Override
public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
super.initialize( aContext );
- logger.info( "Initializing ..." );
try ( DotLogger dotter = new DotLogger() ) {
parser = new MaxentParserWrapper(
FileLocator.getAsStream( modelFilename ), this.maxTokens );
} catch ( IOException ioE ) {
- logger.error( "Error reading parser model
file/directory: " + ioE.getMessage() );
+ LOGGER.error( "Error reading parser model
file/directory: " + ioE.getMessage() );
throw new ResourceInitializationException( ioE );
}
- logger.info( "Finished." );
}
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Processing ..." );
parser.createAnnotations(jcas);
}
diff --git
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
index 68ea74b..9252f93 100644
---
a/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
+++
b/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
@@ -21,6 +21,7 @@ package org.apache.ctakes.constituency.parser.util;
import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.tools.parser.Parse;
import opennlp.tools.util.Span;
+import org.apache.ctakes.core.util.StringUtil;
import org.apache.ctakes.typesystem.type.syntax.*;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.utils.tree.SimpleTree;
@@ -30,10 +31,7 @@ import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.regex.Pattern;
final public class TreeUtils {
@@ -59,7 +57,7 @@ final public class TreeUtils {
public static List<TreebankNode> getNodeList(TopTreebankNode tree){
- ArrayList<TreebankNode> list = new ArrayList<TreebankNode>();
+ List<TreebankNode> list = new ArrayList<>();
list.add(tree);
int ind = 0;
while(ind < list.size()){
@@ -72,41 +70,39 @@ final public class TreeUtils {
return list;
}
- public static List<Parse> getNodeList(Parse tree){
- ArrayList<Parse> list = new ArrayList<Parse>();
- list.add(tree);
- int ind = 0;
- while(ind < list.size()){
- Parse cur = list.get(ind);
- Parse[] children = cur.getChildren();
- for(int i = 0; i < children.length; i++){
- list.add(children[i]);
- }
- ind++;
+ public static List<Parse> getNodeList( final Parse tree ) {
+ final List<Parse> list = new ArrayList<>();
+ list.add( tree );
+ int index = 0;
+ while ( index < list.size() ) {
+ Parse parent = list.get( index );
+ Collections.addAll( list, parent.getChildren() );
+ index++;
}
return list;
}
- public static String tree2str(TreebankNode pathTree){
- StringBuffer buff = new StringBuffer();
- buff.append("(");
- try{
- buff.append(pathTree.getNodeType());
- }catch(Exception e){
+ public static String tree2str( final TreebankNode pathTree ) {
+ StringBuilder sb = new StringBuilder();
+ sb.append( "(" );
+ try {
+ sb.append( pathTree.getNodeType() );
+ } catch(Exception e){
System.err.println("Caught NPE");
}
- if(pathTree.getLeaf()){ //pathTree.getChildren().size() == 1 &&
pathTree.getChildren(0).getLeaf()){
- buff.append(" ");
- buff.append(pathTree.getNodeValue());
+ if ( pathTree.getLeaf() ) {
+ //pathTree.getChildren().size() == 1 &&
pathTree.getChildren(0).getLeaf()){
+ sb.append( " " ).append( pathTree.getNodeValue() );
// buff.append(")");
- }else{
- for(int i = 0; i < pathTree.getChildren().size(); i++){
- buff.append(" ");
- buff.append(tree2str(pathTree.getChildren(i)));
+ } else {
+ final FSArray children = pathTree.getChildren();
+ final int size = children.size();
+ for ( int i = 0; i < size; i++ ) {
+ sb.append( " " ).append( tree2str(
(TreebankNode)children.get( i ) ) );
}
}
- buff.append(")");
- return buff.toString();
+ sb.append( ")" );
+ return sb.toString();
}
// public static boolean contains(TreebankNode n, SimpleTree frag){
@@ -160,23 +156,23 @@ final public class TreeUtils {
return count;
}
- private static boolean fragmentMatch(SimpleTree node, SimpleTree frag,
boolean ignoreCase){
- boolean same = false;
- if((ignoreCase && node.cat.equalsIgnoreCase(frag.cat)) ||
(!ignoreCase && node.cat.equals(frag.cat))){
- if((frag.children.size() == 0 || node.children.size()
== frag.children.size())){
-
- same = true;
- for(int i = 0; i < frag.children.size(); i++){
- if(!fragmentMatch(node.children.get(i),
frag.children.get(i), ignoreCase)){
- same = false;
- break;
- }
+
+ private static boolean fragmentMatch( final SimpleTree node, final
SimpleTree frag, final boolean ignoreCase ) {
+ if ( !catsEqual( node, frag, ignoreCase ) ) {
+ return false;
+ }
+ if ( frag.children.isEmpty()
+ || node.children.size() == frag.children.size() ) {
+ for ( int i = 0; i < frag.children.size(); i++ ) {
+ if ( !fragmentMatch( node.children.get(i),
frag.children.get(i), ignoreCase ) ) {
+ return false;
}
}
+ return true;
}
- return same;
+ return false;
}
-
+
public static int countDepFrags(SimpleTree node, SimpleTree frag){
int count = 0;
if(depFragmentMatch(node, frag, true)) count++;
@@ -200,27 +196,31 @@ final public class TreeUtils {
return false;
}
- private static boolean depFragmentMatch(SimpleTree node, SimpleTree
frag, boolean ignoreCase){
- boolean same = false;
- if(frag.children.size() > 1){
- System.err.println("Only chain fragments are currently supported!");
- throw new UIMA_UnsupportedOperationException();
- }
-
- if((ignoreCase && node.cat.equalsIgnoreCase(frag.cat)) ||
(!ignoreCase && node.cat.equals(frag.cat))){
- if(frag.children.size() == 0){
- return true;
- }
- for(int i = 0; i < node.children.size(); i++){
- if(depFragmentMatch(node.children.get(i), frag.children.get(0),
ignoreCase)){
- return true;
- }
- }
- }
-
- return same;
+ private static boolean depFragmentMatch( final SimpleTree node, final
SimpleTree frag, final boolean ignoreCase ) {
+ if ( frag.children.size() > 1 ) {
+ System.err.println("Only chain fragments are currently
supported!");
+ throw new UIMA_UnsupportedOperationException();
+ }
+ if ( !catsEqual( node, frag, ignoreCase ) ) {
+ return false;
+ }
+ if ( frag.children.isEmpty() ) {
+ return true;
+ }
+ for ( int i = 0; i < node.children.size(); i++ ) {
+ if ( depFragmentMatch( node.children.get(i),
frag.children.get(0), ignoreCase ) ) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static private boolean catsEqual( final SimpleTree node, final
SimpleTree frag, final boolean ignoreCase ) {
+ return ignoreCase ? node.cat.equalsIgnoreCase( frag.cat ) :
node.cat.equals( frag.cat );
}
+
+
public static int getHighestIndexTerm(TreebankNode inTree) {
if(inTree instanceof TerminalTreebankNode){
return ((TerminalTreebankNode) inTree).getIndex();
@@ -228,19 +228,16 @@ final public class TreeUtils {
return
getHighestIndexTerm(inTree.getChildren(inTree.getChildren().size()-1));
}
- public static TopTreebankNode getTopNode(TreebankNode inTree) {
+ public static TopTreebankNode getTopNode( final TreebankNode inTree ) {
TreebankNode cur = inTree;
- TopTreebankNode top = null;
-
- while(!(cur instanceof TopTreebankNode)){
+ while ( !(cur instanceof TopTreebankNode) ) {
cur = cur.getParent();
}
- top = (TopTreebankNode) cur;
- return top;
+ return (TopTreebankNode) cur;
}
- /**
+ /**
* @param jcas ye olde ...
* @param parse opennlp parse
* @param sentence -
@@ -296,31 +293,36 @@ final public class TreeUtils {
* @return terminals for the sentence
*/
public static FSArray getTerminals( final JCas jcas, final
List<BaseToken> baseTokens ) {
- final List<BaseToken> wordList = new ArrayList<>();
+ final Collection<TerminalTreebankNode> nodeList = new
ArrayList<>();
+ int termIndex = 0;
for ( BaseToken baseToken : baseTokens ) {
- if ( !(baseToken instanceof NewlineToken) ) {
- wordList.add( baseToken );
+ if ( (baseToken instanceof NewlineToken) ) {
+ continue;
}
- }
- final FSArray terminals = new FSArray( jcas, wordList.size() );
- int termIndex = 0;
- for ( BaseToken word : wordList ) {
- final TerminalTreebankNode ttn = new
TerminalTreebankNode( jcas, word.getBegin(), word.getEnd() );
+ final TerminalTreebankNode ttn = new
TerminalTreebankNode( jcas, baseToken.getBegin(), baseToken.getEnd() );
ttn.setChildren( null );
ttn.setIndex( termIndex );
ttn.setTokenIndex( termIndex );
ttn.setLeaf( true );
ttn.setNodeTags( null );
- final String wordText = word.getCoveredText();
- if ( word instanceof PunctuationToken &&
BRACKET_MAP.containsKey( wordText ) ) {
+ final String wordText = baseToken.getCoveredText();
+ if ( baseToken instanceof PunctuationToken &&
BRACKET_MAP.containsKey( wordText ) ) {
ttn.setNodeValue( BRACKET_MAP.get( wordText ) );
} else {
ttn.setNodeValue( wordText );
}
// ttn.addToIndexes();
- terminals.set( termIndex, ttn );
+ nodeList.add( ttn );
+// terminals.set( termIndex, ttn );
termIndex++;
}
+ final FSArray terminals = new FSArray( jcas, nodeList.size() );
+ int arrIdx = 0;
+ for ( TerminalTreebankNode ttn : nodeList ) {
+ terminals.set( arrIdx, ttn );
+ arrIdx++;
+ }
+ terminals.addToIndexes( jcas );
return terminals;
}
@@ -342,11 +344,11 @@ final public class TreeUtils {
private static void recursivelyCreateStructure(JCas jcas, TreebankNode
parent, Parse parse, TopTreebankNode root) throws
AnalysisEngineProcessException{
String[] typeParts;
- if(parse.getType().startsWith("-")){
+ if ( parse.getType().charAt( 0 ) == '-' ) {
// check for dash at the start (for escaped types like
-RRB- and so forth that cannot take function tags anyways)
typeParts = new String[]{parse.getType()};
- }else{
- typeParts = parse.getType().split("-");
+ } else {
+ typeParts = StringUtil.fastSplit( parse.getType(), '-'
);
}
parent.setNodeType(typeParts[0]);
parent.setNodeValue(null);
@@ -390,19 +392,23 @@ final public class TreeUtils {
// parent.addToIndexes();
}
- public static void replaceChild(TreebankNode parent, TreebankNode
oldTree,
- TreebankNode newTree) {
+ public static void replaceChild( final TreebankNode parent, final
TreebankNode oldTree,
+
final TreebankNode newTree) {
// if parent is null that means we're already at the top -- no
pointers to fix.
- if(parent != null){
- for(int i = 0; i < parent.getChildren().size(); i++){
- if(parent.getChildren(i) == oldTree){
- parent.setChildren(i, newTree);
- }
- }
- }
- }
+ if ( parent == null ) {
+ return;
+ }
+ final FSArray parentChildren = parent.getChildren();
+ final int size = parentChildren.size();
+ for ( int i = 0; i < size; i++ ) {
+ if ( parentChildren.get( i ).equals( oldTree ) ) {
+ parentChildren.set( i, newTree );
+ }
+ }
+ }
- /**
+
+ /**
* @param sentenceOffset begin offest character index for sentence
* @param text text of the sentence
* @param terminalArray [token] terminals in the sentence
@@ -411,8 +417,9 @@ final public class TreeUtils {
public static Parse ctakesTokensToOpennlpTokens( final int sentenceOffset,
final String text,
final FSArray
terminalArray ) {
// based on the first part of parseLine in the opennlp libraries
- final Parse sentenceParse = new Parse( text, new Span( 0, text
- .length() ), AbstractBottomUpParser.INC_NODE, 0, 0 );
+ final Parse sentenceParse = new Parse( text,
+
new Span( 0, text.length() ),
+
AbstractBottomUpParser.INC_NODE, 0, 0 );
for ( int i = 0; i < terminalArray.size(); i++ ) {
final TerminalTreebankNode token =
(TerminalTreebankNode)terminalArray.get( i );
final Span span = new Span( token.getBegin() - sentenceOffset,
token.getEnd() - sentenceOffset );
@@ -421,12 +428,10 @@ final public class TreeUtils {
return sentenceParse;
}
- public static String escapePunct(String in){
- if(BRACKET_MAP.containsKey(in)){
- return BRACKET_MAP.get(in);
- }
- return in;
- }
+ public static String escapePunct( final String in ) {
+ return BRACKET_MAP.getOrDefault( in, in );
+ }
+
}
diff --git
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
index 9afe944..562bf4b 100644
--- a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
+++ b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
@@ -88,6 +88,7 @@ final public class ListAnnotator extends
JCasAnnotator_ImplBase {
*/
@Override
public void initialize( final UimaContext context ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
super.initialize( context );
if ( _listTypesPath == null ) {
LOGGER.error( "No " + LIST_TYPES_DESC );
@@ -104,7 +105,7 @@ final public class ListAnnotator extends
JCasAnnotator_ImplBase {
} catch ( IOException ioE ) {
throw new ResourceInitializationException( ioE );
}
- LOGGER.info( "Finished Parsing" );
+// LOGGER.info( "Finished Parsing" );
}
/**
@@ -136,7 +137,7 @@ final public class ListAnnotator extends
JCasAnnotator_ImplBase {
createLists( jcas, uniqueListTypes, section.getCoveredText(),
section.getBegin() );
}
}
- LOGGER.info( "Finished processing" );
+// LOGGER.info( "Finished processing" );
}
diff --git
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
index e64c1e2..7cc9cd1 100644
--- a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
+++ b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
@@ -41,11 +41,11 @@ final public class ListSentenceFixer extends
JCasAnnotator_ImplBase {
LOGGER.info( "Adjusting Sentences overlapping Lists ..." );
final Collection<List> lists = JCasUtil.select( jcas, List.class );
if ( lists == null || lists.isEmpty() ) {
- LOGGER.info( "Finished Processing" );
+// LOGGER.info( "Finished Processing" );
return;
}
adjustListEntrySentences( jcas );
- LOGGER.info( "Finished Processing" );
+// LOGGER.info( "Finished Processing" );
}
diff --git
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
index b15da5d..9d30a1d 100644
---
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
+++
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphAnnotator.java
@@ -73,6 +73,7 @@ final public class ParagraphAnnotator extends
JCasAnnotator_ImplBase {
*/
@Override
public void initialize( final UimaContext context ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
super.initialize( context );
if ( _paragraphTypesPath == null ) {
LOGGER.info( "No " + PARAGRAPH_TYPES_DESC );
@@ -91,7 +92,7 @@ final public class ParagraphAnnotator extends
JCasAnnotator_ImplBase {
} catch ( IOException ioE ) {
throw new ResourceInitializationException( ioE );
}
- LOGGER.info( "Finished Parsing" );
+// LOGGER.info( "Finished Parsing" );
}
/**
@@ -105,7 +106,7 @@ final public class ParagraphAnnotator extends
JCasAnnotator_ImplBase {
return;
}
createParagraphs( jcas );
- LOGGER.info( "Finished processing" );
+// LOGGER.info( "Finished processing" );
}
diff --git
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
index b28b370..304fdff 100644
---
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
+++
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ParagraphSentenceFixer.java
@@ -40,11 +40,11 @@ final public class ParagraphSentenceFixer extends
JCasAnnotator_ImplBase {
LOGGER.info( "Adjusting Sentences overlapping Paragraphs ..." );
final Collection<Paragraph> paragraphs = JCasUtil.select( jcas,
Paragraph.class );
if ( paragraphs == null || paragraphs.isEmpty() ) {
- LOGGER.info( "Finished Processing" );
+// LOGGER.info( "Finished Processing" );
return;
}
adjustParagraphSentences( jcas, paragraphs );
- LOGGER.info( "Finished Processing" );
+// LOGGER.info( "Finished Processing" );
}
static private void adjustParagraphSentences( final JCas jCas, final
Collection<Paragraph> paragraphs ) {
diff --git
a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
index e47c4e8..62a1aea 100644
--- a/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
+++ b/ctakes-core/src/main/java/org/apache/ctakes/core/ae/RegexSectionizer.java
@@ -124,6 +124,7 @@ abstract public class RegexSectionizer extends
JCasAnnotator_ImplBase {
*/
@Override
public void initialize( final UimaContext context ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
super.initialize( context );
synchronized (SECTION_TYPE_LOCK) {
if ( !_sectionsLoaded ) {
@@ -138,7 +139,7 @@ abstract public class RegexSectionizer extends
JCasAnnotator_ImplBase {
*/
@Override
public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
- LOGGER.info( "Annotating Sections ..." );
+ LOGGER.info( "Finding Sections ..." );
if ( _sectionTypes.isEmpty() ) {
LOGGER.info( "Finished processing, no section types defined" );
return;
@@ -156,7 +157,7 @@ abstract public class RegexSectionizer extends
JCasAnnotator_ImplBase {
dividerLines.putAll( findDividerLines( docText ) );
}
createSegments( jcas, headerTags, footerTags, dividerLines );
- LOGGER.info( "Finished processing" );
+// LOGGER.info( "Finished processing" );
}
diff --git
a/ctakes-core/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
b/ctakes-core/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
index 8990b8a..ca465b7 100644
---
a/ctakes-core/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
+++
b/ctakes-core/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
@@ -37,9 +37,10 @@ import static org.junit.Assert.assertEquals;
public class TestCDASegmentAnnotator {
+ // This file does not exist.
public static String INPUT_FILE =
"../ctakes-regression-test/testdata/input/plaintext/doc2_07543210_sample_current.txt";
- @Test
+// @Test
public void TestCDASegmentPipeLine() throws Exception {
TypeSystemDescription typeSystem = TypeSystemDescriptionFactory
.createTypeSystemDescription();
diff --git
a/ctakes-core/src/test/java/org/apache/ctakes/core/ci/HyphenTextModifierImplTests.java
b/ctakes-core/src/test/java/org/apache/ctakes/core/ci/HyphenTextModifierImplTests.java
index 0bcfc2b..47420a5 100644
---
a/ctakes-core/src/test/java/org/apache/ctakes/core/ci/HyphenTextModifierImplTests.java
+++
b/ctakes-core/src/test/java/org/apache/ctakes/core/ci/HyphenTextModifierImplTests.java
@@ -60,7 +60,7 @@ public class HyphenTextModifierImplTests {
* the text is not modified (the blanks are not replaced by hyphens).
* For the second test, the appropriate blanks are replaced by hyphens.
*/
- @Test
+// @Test
public void testTest() {
// The messages about "UNSUPPORTED" go to stderr, and are not
returned by test()
@@ -96,7 +96,9 @@ public class HyphenTextModifierImplTests {
InputStream filename = null;
try {
- filename =
FileLocator.getAsStream("../ctakes-core-res/target/classes/org/apache/ctakes/core/tokenizer/hyphenated.txt");
+// filename =
FileLocator.getAsStream("../ctakes-core-res/target/classes/org/apache/ctakes/core/tokenizer/hyphenated.txt");
+ // This file doesn't exist in the apache repo.
+ filename =
FileLocator.getAsStream("org/apache/ctakes/core/tokenizer/hyphenated.txt");
} catch (Exception e) {
e.printStackTrace();
}
diff --git
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
index 5b81788..5ac26b0 100644
---
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
+++
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
@@ -10,6 +10,7 @@ import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
@@ -34,19 +35,21 @@ import static
org.apache.ctakes.dependency.parser.util.DependencyUtility.*;
products = { MARKABLE }
)
public class DeterministicMarkableAnnotator extends JCasAnnotator_ImplBase {
+ static private final Logger LOGGER = Logger.getLogger(
"DeterministicMarkableAnnotator" );
// list starters like A. or #1 or 3)
static Pattern headerPatt =
Pattern.compile("^(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)])) *");
@Override
public void initialize(UimaContext uc) throws
ResourceInitializationException{
+ LOGGER.info( "Initializing ..." );
super.initialize(uc);
}
@Override
- public void process(JCas jCas)
- throws AnalysisEngineProcessException {
-
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Processing ..." );
+
// createMarkablesUsingConstituencyTrees(jCas);
createMarkablesUsingDependencyTrees(jCas);
diff --git
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
index ede1d50..2e29758 100644
---
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
+++
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableSalienceAnnotator.java
@@ -73,7 +73,6 @@ public class MarkableSalienceAnnotator extends
CleartkAnnotator<Boolean> {
extractors.add( new GrammaticalRoleFeatureExtractor() );
extractors.add( new SemanticEnvironmentFeatureExtractor() );
extractors.add( new ClinicalFeatureExtractor() );
- LOGGER.info( "Finished." );
}
@Override
@@ -96,6 +95,5 @@ public class MarkableSalienceAnnotator extends
CleartkAnnotator<Boolean> {
markable.setConfidence(outcomes.get(true).floatValue());
}
}
- LOGGER.info( "Finished." );
}
}
diff --git
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
index 0249b7f..2cd2f8f 100644
---
a/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
+++
b/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
@@ -235,7 +235,7 @@ public class MentionClusterCoreferenceAnnotator extends
CleartkAnnotator<String>
} else if ( this.isTraining() ) {
classDataWriter = this.dataWriter;
}
- LOGGER.info( "Finished." );
+// LOGGER.info( "Finished." );
}
public void process( final JCas jCas ) throws AnalysisEngineProcessException
{
@@ -254,7 +254,7 @@ public class MentionClusterCoreferenceAnnotator extends
CleartkAnnotator<String>
}
processDocument( jCas, null, relationLookup );
removeSingletonClusters( jCas );
- LOGGER.info( "Finished." );
+// LOGGER.info( "Finished." );
return;
}
// If we get this far then we have multiple views, so we are processing a
patient CAS.
@@ -276,7 +276,7 @@ public class MentionClusterCoreferenceAnnotator extends
CleartkAnnotator<String>
} catch ( IOException ioE ) {
LOGGER.error( ioE.getMessage() );
}
- LOGGER.info( "Finished." );
+// LOGGER.info( "Finished." );
}
private void processDocument( final JCas jCas, final JCas prevCas, final
Map<CollectionTextRelationIdentifiedAnnotationPair,
CollectionTextRelationIdentifiedAnnotationRelation>
diff --git
a/ctakes-dependency-parser/src/test/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java
b/ctakes-dependency-parser/src/test/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java
index 7cf863d..79b9f6d 100644
---
a/ctakes-dependency-parser/src/test/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java
+++
b/ctakes-dependency-parser/src/test/java/org/apache/ctakes/dependency/parser/ae/util/TestClearNLPAnalysisEngines.java
@@ -53,6 +53,7 @@ public class TestClearNLPAnalysisEngines{
// The dummy models from ClearParser haven't been updated to work with
ClearNLP.
//public static final String DEP_DUMMY_MODEL_FILE =
"org/apache/ctakes/dependency/parser/models/dependency/dummy.dep.mod.jar";
//public static final String SRL_DUMMY_MODEL_FILE =
"org/apache/ctakes/dependency/parser/models/srl/dummy.srl.mod.jar";
+ // This file does not exist.
public static String INPUT_FILE =
"../ctakes-clinical-pipeline/src/test/data/plaintext/testpatient_plaintext_1.txt";
public static class Options {
@@ -98,7 +99,7 @@ public class TestClearNLPAnalysisEngines{
}
- @Test
+// @Test
public void TestClearNLPPipeLine() throws Exception {
TypeSystemDescription typeSystem =
TypeSystemDescriptionFactory.createTypeSystemDescription();
@@ -131,7 +132,7 @@ public class TestClearNLPAnalysisEngines{
SimplePipeline.runPipeline(reader1, preprocessingAE,
clearNLPDepParser, clearNLPSRL, dumpClearNLPOutput);
}
- @Test
+// @Test
public void TestClearNLPPipeLineWithFactoryMethods() throws Exception
{
TypeSystemDescription typeSystem =
TypeSystemDescriptionFactory.createTypeSystemDescription();
diff --git
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
index fd2842d..db69f35 100644
---
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
+++
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
@@ -28,10 +28,15 @@ import
org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Modifier;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
/**
* Identifies Degree_Of relations between {@link EventMention}s and
@@ -45,6 +50,19 @@ import org.apache.uima.jcas.tcas.Annotation;
products = { PipeBitInfo.TypeProduct.DEGREE_RELATION }
)
public class DegreeOfRelationExtractorAnnotator extends
RelationExtractorAnnotator {
+ static private final Logger LOGGER = Logger.getLogger(
"DegreeOfRelationExtractorAnnotator" );
+
+ @Override
+ public void initialize( UimaContext context ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
+ super.initialize( context );
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Degree Of ..." );
+ super.process( jCas );
+ }
@Override
protected Class<? extends BinaryTextRelation> getRelationClass() {
diff --git
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
index c62aa0d..f883634 100644
---
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
+++
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
@@ -33,9 +33,13 @@ import
org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.resource.ResourceInitializationException;
/**
* Identifies Location_Of relations between {@link EventMention}s and
@@ -50,6 +54,21 @@ import org.apache.uima.fit.util.JCasUtil;
)
public class LocationOfRelationExtractorAnnotator extends
RelationExtractorAnnotator {
+ static private final Logger LOGGER = Logger.getLogger(
"LocationOfRelationExtractorAnnotator" );
+
+ @Override
+ public void initialize( UimaContext context ) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
+ super.initialize( context );
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Location Of ..." );
+ super.process( jCas );
+ }
+
+
@Override
protected Class<? extends BinaryTextRelation> getRelationClass() {
return LocationOfTextRelation.class;
diff --git
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
index eb1201d..1d6e304 100644
---
a/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
+++
b/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
@@ -26,6 +26,7 @@ import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.textsem.Modifier;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -47,6 +48,7 @@ import org.cleartk.ml.chunking.Chunking;
products = { PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION,
PipeBitInfo.TypeProduct.CHUNK }
)
public class ModifierExtractorAnnotator extends CleartkAnnotator<String> {
+ static private final Logger LOGGER = Logger.getLogger(
"ModifierExtractorAnnotator" );
public static AnalysisEngineDescription getDescription(Object...
additionalConfiguration)
throws ResourceInitializationException {
@@ -63,6 +65,7 @@ public class ModifierExtractorAnnotator extends
CleartkAnnotator<String> {
@Override
public void initialize(UimaContext context) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
RelationExtractorAnnotator.allowClassifierModelOnClasspath(context);
super.initialize(context);
this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class,
Modifier.class, "typeID");
@@ -70,6 +73,7 @@ public class ModifierExtractorAnnotator extends
CleartkAnnotator<String> {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Processing ..." );
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
List<BaseToken> tokens = new ArrayList<>();
for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class,
sentence)){
diff --git
a/ctakes-relation-extractor/src/user/resources/org/apache/ctakes/relation/extractor/pipeline/RelationSubPipe.piper
b/ctakes-relation-extractor/src/user/resources/org/apache/ctakes/relation/extractor/pipeline/RelationSubPipe.piper
index bff9157..82dc26d 100644
---
a/ctakes-relation-extractor/src/user/resources/org/apache/ctakes/relation/extractor/pipeline/RelationSubPipe.piper
+++
b/ctakes-relation-extractor/src/user/resources/org/apache/ctakes/relation/extractor/pipeline/RelationSubPipe.piper
@@ -1,10 +1,10 @@
// Commands and parameters to create a default relation extraction
sub-pipeline. This is not a full pipeline.
// Modifiers. Use addLogged to log start and finish of processing. There
aren't default models, so set specifically
-addLogged ModifierExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/modifier_extractor/model.jar
+add ModifierExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/modifier_extractor/model.jar
// Degree of severity, etc.
-addLogged DegreeOfRelationExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/degree_of/model.jar
+add DegreeOfRelationExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/degree_of/model.jar
// Location.
-addLogged LocationOfRelationExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/location_of/model.jar
+add LocationOfRelationExtractorAnnotator
classifierJarPath=/org/apache/ctakes/relation/extractor/models/location_of/model.jar
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
index a7457a9..b6c0b19 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
@@ -189,6 +189,12 @@ public class BackwardsTimeAnnotator extends
TemporalEntityAnnotator_ImplBase {
// LOGGER.info( "Finished." );
}
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Times ..." );
+ super.process( jCas );
+ }
+
@Override
public void process(JCas jCas, Segment segment) throws
AnalysisEngineProcessException {
// classify tokens within each sentence
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
index 7981220..d678027 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
@@ -25,7 +25,10 @@ import org.apache.ctakes.temporal.utils.SoftMaxUtil;
import org.apache.ctakes.typesystem.type.refsem.Event;
import org.apache.ctakes.typesystem.type.refsem.EventProperties;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.*;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
@@ -36,6 +39,7 @@ import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
import org.cleartk.ml.DataWriter;
@@ -51,9 +55,8 @@ import org.cleartk.ml.jar.GenericJarClassifierFactory;
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collectors;
//import java.io.IOException;
//import java.util.Map;
@@ -190,6 +193,7 @@ public class DocTimeRelAnnotator extends
CleartkAnnotator<String> {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding DocTimeRel ..." );
for (EventMention eventMention : JCasUtil.select(jCas,
EventMention.class)) {
List<Sentence> sents = JCasUtil.selectCovering(jCas,
Sentence.class, eventMention);
List<Feature> features = new ArrayList<>();
@@ -210,7 +214,7 @@ public class DocTimeRelAnnotator extends
CleartkAnnotator<String> {
// features.addAll(this.umlsExtractor.extract(jCas,
eventMention)); //add umls features
features.addAll(this.verbTensePatternExtractor.extract(jCas,
eventMention));//add nearby verb POS pattern feature
- //
+ //
features.addAll(this.dateExtractor.extract(jCas,
eventMention)); //add the closest NE type
// features.addAll(this.durationExtractor.extract(jCas,
eventMention)); //add duration feature
// features.addAll(this.disSemExtractor.extract(jCas,
eventMention)); //add distributional semantic features
@@ -265,4 +269,181 @@ public class DocTimeRelAnnotator extends
CleartkAnnotator<String> {
}
}
}
+
+// @Override
+// public void process(JCas jCas) throws AnalysisEngineProcessException {
+// LOGGER.info( "Finding DocTimeRel ..." );
+// final Map<Segment, Collection<Sentence>> sectionSentenceMap =
JCasUtil.indexCovered( jCas,
+//
Segment.class,
+//
Sentence.class );
+// final Map<Sentence,Collection<IdentifiedAnnotation>>
sentenceAnnotationsMap = JCasUtil.indexCovered( jCas,
+//
Sentence.class,
+//
IdentifiedAnnotation.class );
+// final Map<Sentence,Collection<BaseToken>> sentenceTokensMap =
JCasUtil.indexCovered( jCas,
+//
Sentence.class,
+//
BaseToken.class );
+// final Collection<EventMention> handled = new HashSet<>();
+// for ( Map.Entry<Segment,Collection<Sentence>> sectionSentences
: sectionSentenceMap.entrySet() ) {
+// handleSection( jCas, sectionSentences.getKey(),
sectionSentences.getValue(), sentenceAnnotationsMap,
+// sentenceTokensMap,
handled );
+// }
+// }
+
+ private void handleSection( final JCas jCas,
+
final Segment section,
+
final Collection<Sentence> sentences,
+
final Map<Sentence, Collection<IdentifiedAnnotation>>
sentenceAnnotationsMap,
+
final Map<Sentence,Collection<BaseToken>> sentenceTokensMap,
+
final Collection<EventMention> handled ) throws
AnalysisEngineProcessException {
+ if ( sentences == null ) {
+ return;
+ }
+ for ( Sentence sentence : sentences ) {
+ handleSentence( jCas, section, sentence,
sentenceAnnotationsMap.get( sentence ),
+
sentenceTokensMap.get( sentence ),
+ handled );
+ }
+ }
+
+ private void handleSentence( final JCas jCas,
+
final Segment section,
+
final Sentence sentence,
+
final Collection<IdentifiedAnnotation> annotations,
+
final Collection<BaseToken> tokens,
+
final Collection<EventMention> handled ) throws
AnalysisEngineProcessException {
+ if ( annotations == null ) {
+ return;
+ }
+ final Collection<EventMention> events = new ArrayList<>();
+ final Collection<TimeMention> timexes = new ArrayList<>();
+ final Collection<TimeAnnotation> times = new ArrayList<>();
+ final Collection<DateAnnotation> dates = new ArrayList<>();
+ final Collection<MeasurementAnnotation> measurements = new
ArrayList<>();
+ for ( IdentifiedAnnotation annotation : annotations ) {
+ if ( annotation instanceof EventMention ) {
+ events.add( (EventMention)annotation );
+ } else if ( annotation instanceof TimeMention ) {
+ timexes.add( (TimeMention) annotation );
+ } else if ( annotation instanceof TimeAnnotation ) {
+ times.add( (TimeAnnotation) annotation );
+ } else if ( annotation instanceof DateAnnotation ) {
+ dates.add( (DateAnnotation) annotation );
+ } else if ( annotation instanceof MeasurementAnnotation
) {
+ measurements.add( (MeasurementAnnotation)
annotation );
+ }
+ }
+ if ( events.isEmpty() ) {
+ return;
+ }
+ final List<BaseToken> sortedTokens = tokens.stream()
+
.sorted(
Comparator.comparingInt( BaseToken::getBegin )
+
.thenComparingInt( BaseToken::getEnd ) )
+
.collect( Collectors.toList()
);
+
+ final List<WordToken> sortedWords = new ArrayList<>();
+ final List<NumToken> sortedNumbers = new ArrayList<>();
+ for ( BaseToken token : sortedTokens ) {
+ if ( token instanceof WordToken ) {
+ sortedWords.add( (WordToken) token );
+ } else if ( token instanceof NumToken ) {
+ sortedNumbers.add( (NumToken) token );
+ }
+ }
+ final int sentenceEnd = sentence.getEnd();
+ final List<Feature> features = new ArrayList<>();
+ for ( EventMention event : events ) {
+ if ( handled.contains( event ) ) {
+ // Can't we exit here? Why was the old method
continuing with the addition of other features?
+ continue;
+ }
+ if ( event.getEnd() > sentenceEnd ) {
+ features.addAll(
this.contextExtractor.extractWithin( jCas, event, sentence ) );
+ features.addAll(
this.tokenVectorContext.extractWithin( jCas, event, sentence ) );
+ features.addAll(
this.tokenVectorContext2.extractWithin( jCas, event, sentence ) );
+ handled.add( event );
+ } else {
+ features.addAll( this.contextExtractor.extract(
jCas, event ) );
+ features.addAll(
this.tokenVectorContext.extract( jCas, event ) );
+ features.addAll(
this.tokenVectorContext2.extract( jCas, event ) );
+ }
+ //add section heading
+ features.addAll( this.sectionIDExtractor.extract( jCas,
event, Collections.singletonList( section ) ) );
+ //add closest verb
+ features.addAll( this.closestVerbExtractor.extract(
jCas, event, sortedWords ) );
+ //add the closest time expression types
+ features.addAll(
+ this.timeXExtractor.extract( jCas,
event, events, timexes, times, dates ) );
+ //add the closest raw time expression types
+ features.addAll( this.genericExtractor.extract( jCas,
event, events, sortedTokens, sortedWords ) );
+ //
features.addAll(this.umlsExtractor.extract(jCas, eventMention)); //add umls
features
+ //add nearby verb POS pattern feature
+ features.addAll(
+ this.verbTensePatternExtractor.extract(
jCas, event, sortedWords ) );
+ //add the closest NE
+ features.addAll( this.dateExtractor.extract( jCas,
event, dates, measurements, sortedNumbers ) );
+
+ // type
+ //
features.addAll(this.durationExtractor.extract(jCas, eventMention)); //add
duration feature
+ //
features.addAll(this.disSemExtractor.extract(jCas, eventMention)); //add
distributional semantic features
+ if ( this.isTraining() ) {
+ if ( event.getEvent() != null ) {
+ final String outcome = event.getEvent()
+
.getProperties()
+
.getDocTimeRel();
+ this.dataWriter.write( new Instance<>(
outcome, features ) );
+ }
+ } else {
+ // String outcome =
this.classifier.classify(features);
+ final Map<String, Double> scores =
this.classifier.score( features );
+ Map.Entry<String, Double> maxEntry = null;
+ for ( Map.Entry<String, Double> entry :
scores.entrySet() ) {
+ if ( maxEntry == null ||
entry.getValue()
+
.compareTo( maxEntry.getValue() ) > 0 )
{
+ maxEntry = entry;
+ }
+ }
+
+ if ( probViewname != null ) {
+ Map<String, Double> probs =
SoftMaxUtil.getDistributionFromScores( scores );
+ try {
+ JCas probView = jCas.getView(
probViewname );
+ for ( String label :
probs.keySet() ) {
+ final EventMention
mention = new EventMention( probView );
+ mention.setId(
event.getId() );
+ mention.setConfidence(
probs.get( label )
+
.floatValue() );
+ final Event viewEvent =
new Event( probView );
+ final EventProperties
props = new EventProperties( probView );
+ props.setDocTimeRel(
label );
+
viewEvent.setProperties( props );
+ mention.setEvent(
viewEvent );
+ mention.addToIndexes();
+ }
+ } catch ( CASException e ) {
+ e.printStackTrace();
+ throw new
AnalysisEngineProcessException( e );
+ }
+
+ }
+
+ if ( event.getEvent() == null ) {
+ Event casEvent = new Event( jCas );
+ event.setEvent( casEvent );
+ EventProperties props = new
EventProperties( jCas );
+ casEvent.setProperties( props );
+ }
+ if ( maxEntry != null ) {
+ event.getEvent()
+
.getProperties()
+
.setDocTimeRel( maxEntry.getKey() );
+ event.getEvent()
+
.setConfidence( maxEntry.getValue()
+
.floatValue() );
+ //
System.out.println("event DocTimeRel
confidence:"+maxEntry.getValue().floatValue());
+ }
+ }
+ }
+
+ }
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
index 313c901..176bfbb 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
@@ -39,6 +39,7 @@ import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -78,6 +79,7 @@ import java.util.Random;
public class EventAnnotator extends TemporalEntityAnnotator_ImplBase {
public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
"ProbabilityOfKeepingANegativeExample";
+ static private final Logger LOGGER = Logger.getLogger( "EventAnnotator" );
@ConfigurationParameter(
name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
@@ -191,6 +193,7 @@ public class EventAnnotator extends
TemporalEntityAnnotator_ImplBase {
@Override
public void initialize(UimaContext context) throws
ResourceInitializationException {
+ LOGGER.info( "Initializing ..." );
super.initialize(context);
// define chunkings
@@ -231,6 +234,12 @@ public class EventAnnotator extends
TemporalEntityAnnotator_ImplBase {
}
}
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Events ..." );
+ super.process( jCas );
+ }
+
@Override
public void process(JCas jCas, Segment segment) throws
AnalysisEngineProcessException {
PredicateArgumentExtractor predicateArgumentExtractor = new
PredicateArgumentExtractor(jCas);
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
index ca653a3..60a0177 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
@@ -34,6 +34,7 @@ import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@@ -128,6 +129,12 @@ public class EventEventRelationAnnotator extends
TemporalRelationExtractorAnnota
LOGGER.info( "Finished." );
}
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Event-Event Relations ..." );
+ super.process( jCas );
+ }
+
@Override
protected
List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>>
getFeatureExtractors() {
final String vectorFile =
"org/apache/ctakes/temporal/gloveresult_3";
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
index 9ba2f6c..9f5ba6b 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
@@ -36,6 +36,7 @@ import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@@ -130,6 +131,12 @@ public class EventTimeRelationAnnotator extends
RelationExtractorAnnotator {
// LOGGER.info( "Finished." );
}
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ LOGGER.info( "Finding Event-Time Relations ..." );
+ super.process( jCas );
+ }
+
@Override
protected
List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>>
getFeatureExtractors()
throws ResourceInitializationException {
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
index 89c59ba..183ee50 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
@@ -18,11 +18,7 @@
*/
package org.apache.ctakes.temporal.ae.feature;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import java.util.*;
//import java.util.logging.Logger;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
@@ -85,4 +81,32 @@ public class ClosestVerbExtractor implements
FeatureExtractor1 {
return features;
}
+ public List<Feature> extract( final JCas view, final Annotation
annotation, final Collection<WordToken> words )
+ throws CleartkExtractorException {
+ final List<Feature> features = new ArrayList<>();
+ final int annotationBegin = annotation.getBegin();
+ int closestDistance = Integer.MAX_VALUE;
+ WordToken closestToken = null;
+ for ( WordToken wt : words ) {
+ String pos = wt.getPartOfSpeech();
+ if ( pos.startsWith( "VB" ) ) {
+ final int distance = Math.abs( wt.getBegin() -
annotationBegin );
+ if ( distance < closestDistance ) {
+ closestDistance = distance;
+ closestToken = wt;
+ }
+ }
+ }
+ if ( closestToken == null ) {
+ return Collections.emptyList();
+ }
+ final Feature feature = new Feature( this.name+"_token",
closestToken.getCoveredText() );
+ features.add( feature );
+ //logger.info("found nearby closest verb: "+
entry.getValue().getCoveredText() + " POS:" +
entry.getValue().getPartOfSpeech());
+ final Feature posfeature = new Feature( this.name,
closestToken.getPartOfSpeech() );
+ features.add( posfeature );
+ return features;
+ }
+
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DateAndMeasurementExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DateAndMeasurementExtractor.java
index 90eacab..f6384f7 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DateAndMeasurementExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DateAndMeasurementExtractor.java
@@ -91,4 +91,29 @@ public class DateAndMeasurementExtractor implements
FeatureExtractor1<Annotation
return features;
}
+
+ public List<Feature> extract( final JCas view, final Annotation
annotation,
+
final Collection<DateAnnotation> dates,
+
final Collection<MeasurementAnnotation> measurements,
+
final Collection<NumToken> numbers ) throws CleartkExtractorException {
+ final List<Feature> features = new ArrayList<>();
+ for (@SuppressWarnings("unused") DateAnnotation date : dates ) {
+ final Feature indicator = new Feature( "DateXNearby",
this.name );
+ features.add(indicator);
+ break;
+ }
+ for (@SuppressWarnings("unused") MeasurementAnnotation date :
measurements ) {
+ final Feature indicator = new Feature(
"MeasurementNearby", "measure" );
+ features.add( indicator );
+ break;
+ }
+ for (@SuppressWarnings("unused") NumToken date : numbers ) {
+ final Feature indicator = new Feature(
"NumTokenNearby", "NumToken" );
+ features.add( indicator );
+ break;
+ }
+ return features;
+ }
+
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPropertyExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPropertyExtractor.java
index 005f2ba..a283266 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPropertyExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventPropertyExtractor.java
@@ -231,4 +231,103 @@ public class EventPropertyExtractor implements
FeatureExtractor1<Annotation> {
return feats;
}
+
+ public List<Feature> extract( final JCas view, final Annotation
annotation,
+
final Collection<EventMention> events,
+
final List<BaseToken> sortedTokens,
+
final List<WordToken> sortedWords ) throws CleartkExtractorException {
+ final List<Feature> features = new ArrayList<>();
+ //get Document ID:
+ String fname;
+ try {
+ final String docID = DocIdUtil.getDocumentID( view
);//ViewUriUtil.getURI(view).toString();
+ int begin = docID.lastIndexOf( "_" );
+ fname = docID.substring( begin+1 );
+ features.add( new Feature( "docName", fname ) );
+ if ( fname.equals( "RAD" ) || fname.equals( "SP" ) ) {
+ features.add( new Feature( "docName:RAD+SP" ) );
+ }else{
+ features.add( new Feature( "docName:others" ) );
+ }
+ } catch ( org.apache.uima.cas.CASRuntimeException casRTE ) { //
for unit tests that don't set up the UriView
+ casRTE.printStackTrace();
+ features.add( new Feature( "docName",
"CASRuntimeException.UnableToGetDocIdFromUriView" ) );
+ }
+
+ //1 get event:
+ final EventMention event = (EventMention)annotation;
+ final List<EventMention> realEvents = new ArrayList<>();
+ for ( EventMention eventa : events ) {
+ // filter out umls events --> I am not sure that this
is good. Perhaps EventMention.getEvent() == null ?
+ if( eventa.getClass().equals( EventMention.class ) ) {
+ realEvents.add( eventa );
+ }
+ }
+ if ( realEvents.size() > 0 ) {
+ if ( event.equals( realEvents.get( 0 ) ) ) {
+ // Event is anchor, first "real event"
+ features.add( new Feature( "LeftMostEvent" ) );
+ } else if ( event.equals(
realEvents.get(realEvents.size()-1) ) ) {
+ // event is end, last "real event"
+ features.add( new Feature( "RightMostEvent" ) );
+ }
+ }
+
+ //check if this event is generic:
+ final int eventBegin = event.getBegin();
+ int wordIndex = 0;
+ for ( int i=0; i<sortedWords.size(); i++ ) {
+ if ( sortedWords.get( i ).getBegin() >= eventBegin ) {
+ wordIndex = i;
+ break;
+ }
+ }
+ final int lowLimit = Math.max( 0, wordIndex-15 );
+ final int highLimit = Math.min( sortedWords.size(), wordIndex +
16 );
+ for ( int i=lowLimit; i<highLimit; i++ ) {
+ if ( i == wordIndex ) {
+ continue;
+ }
+ if ( genericWords.contains( sortedWords.get( i
).getCoveredText().toLowerCase() ) ) {
+ features.add( new Feature( "GenericEvent" ) );
+ break;
+ }
+ }
+ int tokenIndex = 0;
+ for ( int i=0; i<sortedTokens.size(); i++ ) {
+ if ( sortedTokens.get( i ).getBegin() >= eventBegin ) {
+ tokenIndex = i;
+ break;
+ }
+ }
+ int crLimit = Math.max( 0, tokenIndex-20 );
+ //check if there is any newLine token in close vicinity:
+ int newlineNum = 0;
+ for ( int i=crLimit; i<tokenIndex; i++ ) {
+ if ( sortedTokens.get( i ) instanceof NewlineToken ) {
+ newlineNum++;
+ }
+ }
+ if ( newlineNum > 0 ) {
+ features.add( new Feature( "hasPrecedingNewline" ) );
+ features.add( new Feature( "newLineNum_preceding",
newlineNum ) );
+ }
+ crLimit = Math.min( sortedTokens.size(), tokenIndex + 21 );
+ newlineNum = 0;
+ for ( int i=tokenIndex+1; i<crLimit; i++ ) {
+ if ( sortedTokens.get( i ) instanceof NewlineToken ) {
+ newlineNum++;
+ }
+ }
+ if ( newlineNum > 0 ) {
+ features.add( new Feature( "hasFollowingNewline" ) );
+ features.add( new Feature( "newLineNum_following",
newlineNum ) );
+ }
+ features.addAll( getEventFeats( "mentionProperty", event ) );
+ return features;
+ }
+
+
+
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
index 6118c17..e70cfe3 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
@@ -49,13 +49,13 @@ public class NearbyVerbTenseXExtractor implements
FeatureExtractor1 {
@Override
public List<Feature> extract(JCas view, Annotation annotation) throws
CleartkExtractorException {
List<Feature> features = new ArrayList<>();
-
+
//1 get covering sentence:
Map<EventMention, Collection<Sentence>> coveringMap =
JCasUtil.indexCovering(view, EventMention.class,
Sentence.class);
EventMention targetTokenAnnotation = (EventMention)annotation;
Collection<Sentence> sentList =
coveringMap.get(targetTokenAnnotation);
-
+
//2 get Verb Tense
if (sentList != null && !sentList.isEmpty()){
for(Sentence sent : sentList) {
@@ -72,9 +72,29 @@ public class NearbyVerbTenseXExtractor implements
FeatureExtractor1 {
features.add(feature);
//logger.info("found nearby verb's pos tag: "+
verbTP);
}
-
+
}
return features;
}
+
+ public List<Feature> extract( final JCas view, final Annotation
annotation, final Collection<WordToken> tokens )
+ throws CleartkExtractorException {
+ final List<Feature> features = new ArrayList<>();
+ StringBuilder verbTP = new StringBuilder();
+ for ( WordToken wt : tokens ) {
+ if ( wt != null ) {
+ String pos = wt.getPartOfSpeech();
+ if ( pos.startsWith( "VB" ) ) {
+ verbTP.append( "_" )
+ .append( pos );
+ }
+ }
+ }
+ final Feature feature = new Feature( this.name,
verbTP.toString() );
+ features.add( feature );
+ return features;
+ }
+
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
index c2f6a44..f1f4f5d 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
@@ -72,4 +72,23 @@ public class SectionHeaderExtractor implements
FeatureExtractor1 {
return features;
}
+ public List<Feature> extract( final JCas view, final Annotation
annotation, final Collection<Segment> sections )
+ throws CleartkExtractorException {
+ if ( sections == null || sections.isEmpty() ) {
+ return extract( view, annotation );
+ }
+ final List<Feature> features = new ArrayList<>();
+ //2 get Verb Tense
+ for ( Segment seg : sections ) {
+ String segname = seg.getId();
+ if ( segname.equals( "SIMPLE_SEGMENT" ) ) {
+ //ignore simple segment
+ continue;
+ }
+ features.add( new Feature( this.name, segname ) );
+ }
+ return features;
+ }
+
+
}
diff --git
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
index 91851bc..6f7b924 100644
---
a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
+++
b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
@@ -18,11 +18,7 @@
*/
package org.apache.ctakes.temporal.ae.feature;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import java.util.*;
//import java.util.logging.Logger;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
@@ -165,4 +161,81 @@ public class TimeXExtractor implements FeatureExtractor1 {
return features;
}
+
+
+ public List<Feature> extract( final JCas view,
+
final Annotation annotation,
+
final Collection<EventMention> events,
+
final Collection<TimeMention> timexes,
+
final Collection<TimeAnnotation> times,
+
final Collection<DateAnnotation> dates ) throws
CleartkExtractorException {
+ //1 get covering sentence:
+ final EventMention targetTokenAnnotation =
(EventMention)annotation;
+ final int eventBegin = annotation.getBegin();
+ int closestDistance = Integer.MAX_VALUE;
+ IdentifiedAnnotation closestTime = null;
+ for ( TimeMention timex : timexes ) {
+ final int distance = Math.abs( timex.getBegin() -
eventBegin );
+ if ( distance < closestDistance ) {
+ closestDistance = distance;
+ closestTime = timex;
+ }
+ }
+ for ( TimeAnnotation time : times ) {
+ final int distance = Math.abs( time.getBegin() -
eventBegin );
+ if ( distance < closestDistance ) {
+ closestDistance = distance;
+ closestTime = time;
+ }
+ }
+ for ( DateAnnotation date : dates ) {
+ final int distance = Math.abs( date.getBegin() -
eventBegin );
+ if ( distance < closestDistance ) {
+ closestDistance = distance;
+ closestTime = date;
+ }
+ }
+ if ( closestTime == null ) {
+ return Collections.emptyList();
+ }
+ final List<Feature> features = new ArrayList<>();
+ final Feature feature = new Feature( this.name,
closestTime.getCoveredText() );
+ features.add( feature );
+ // logger.info("add time feature: "+
entry.getValue().getCoveredText() + entry.getValue().getTimeClass());
+ final Feature indicator = new Feature( "TimeXNearby", this.name
);
+ features.add( indicator );
+ final Feature type = new Feature( "TimeXType",
closestTime.getClass() );
+ features.add( type );
+
+ //add PP get Heading preposition
+ for ( TreebankNode treebankNode : JCasUtil.selectCovering(
+ view,
+ TreebankNode.class,
+ closestTime.getBegin(),
+ closestTime.getEnd() ) ) {
+ if ( treebankNode.getNodeType().equals( "PP" ) ) {
+ Feature PPNodeType = new Feature(
"Timex_PPNodeType", treebankNode.getNodeType() );
+ features.add( PPNodeType );
+ break;
+ }
+ }
+
+ //add path tree, timex attributes
+ try {
+ features.addAll( this.attr.extract( view,
targetTokenAnnotation, closestTime ) );//add temporal attribute
+ // features
+ features.addAll( this.timewd.extract( view,closestTime
) );
+ } catch ( AnalysisEngineProcessException aeE ) {
+ throw new IllegalArgumentException( "error in
gererating path feature:" + features );
+ }
+ return features;
+ }
+
+
+
+
+
+
+
+
}