This is an automated email from the ASF dual-hosted git repository.
seanfinan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ctakes.git
The following commit(s) were added to refs/heads/main by this push:
new fafd849 CTAKES-564 All engines now implement uimafit
JCasAnnotator_ImplBase We still need to refactor them to use
@ConfigurationParameter RecordResolutionCasConsumer is fully refactored, note
TODOs Negation classes removed as they are redundant copies of ctakes-core and
ctakes-necontexts classes - in classpath. smoking-status code is really old and
could benefit from refactoring.
fafd849 is described below
commit fafd849b8dad0dbe0fdd00dadc3ee8062af0980d
Author: Sean Finan <[email protected]>
AuthorDate: Thu Dec 29 13:05:09 2022 -0500
CTAKES-564 All engines now implement uimafit JCasAnnotator_ImplBase
We still need to refactor them to use @ConfigurationParameter
RecordResolutionCasConsumer is fully refactored, note TODOs
Negation classes removed as they are redundant copies of ctakes-core and
ctakes-necontexts classes - in classpath.
smoking-status code is really old and could benefit from refactoring.
---
ctakes-smoking-status/README | 2 +
.../org/apache/ctakes/smokingstatus/Const.java | 1 +
.../smokingstatus/ae/ClassifiableEntries.java | 18 +-
.../ae/KuRuleBasedClassifierAnnotator.java | 8 +-
.../ae/PcsClassifierAnnotator_libsvm.java | 15 +-
.../smokingstatus/ae/ResolutionAnnotator.java | 1 +
.../ctakes/smokingstatus/ae/SentenceAdjuster.java | 16 +-
.../cc/RecordResolutionCasConsumer.java | 355 ++++++++---------
.../context/NamedEntityContextAnalyzer.java | 121 ------
.../context/negation/NegationContextAnalyzer.java | 59 ---
.../ctakes/smokingstatus/machine/NegationFSM.java | 435 ---------------------
11 files changed, 214 insertions(+), 817 deletions(-)
diff --git a/ctakes-smoking-status/README b/ctakes-smoking-status/README
index 4f0a76a..74bdb5f 100644
--- a/ctakes-smoking-status/README
+++ b/ctakes-smoking-status/README
@@ -269,7 +269,9 @@ Debugging issues
One of the next releases will look into addressing the following:
- class 'org.apache.ctakes.smokingstatus.machine.NegationFSM.java' must
inherit from 'core/org.apache.ctakes.core.fsm.machine.NegationFSM.java'
+ - !!! smokingstatus negation removed as it was identical to that in
ctakes-core.
- org.apache.ctakes.smokingstatus.context.negation.NegationContextAnalyzer -
needs to use 'NE
Contexts/org.apache.ctakes.necontexts.negation.NegationContextAnalyzer'
+ - !!! smokingstatus NegationContextAnalyzer removed as it was identical to
that in ctakes-ne-contexts.
- ResolutionAnnotator.java references 'TypeSystemConst.NE_CERTAINTY_NEGATED'
which has been commented out, uncomment in both places for next major release
- FIXED -Parameters probably need to be changed? /smoking
status/desc/analysis_engine/ClassifiableEntriesAnnotator.xml
- FIXED - CdaCasInitializer.xml must be used from 'document preprocessor'
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/Const.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/Const.java
index fa72ff5..cbc985e 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/Const.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/Const.java
@@ -25,6 +25,7 @@ package org.apache.ctakes.smokingstatus;
*/
public interface Const
{
+ // TODO make this an enum
public static final String CLASS_PAST_SMOKER = "PAST_SMOKER";
public static final String CLASS_CURR_SMOKER = "CURRENT_SMOKER";
public static final String CLASS_SMOKER = "SMOKER";
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ClassifiableEntries.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ClassifiableEntries.java
index 59304aa..7cbf809 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ClassifiableEntries.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ClassifiableEntries.java
@@ -25,7 +25,6 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
-import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -36,13 +35,13 @@ import java.util.Set;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;
-import org.apache.uima.pear.tools.PackageInstaller;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.TOP;
@@ -73,6 +72,8 @@ import
org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;
public class ClassifiableEntries extends JCasAnnotator_ImplBase {
+ // TODO @ConfigurationParameter all of these parameters.
+
/**
* Name of configuration parameter that must be set to the filepath of
the
* UIMA descriptor ProductionPostSentenceAggregate.xml
@@ -112,12 +113,10 @@ public class ClassifiableEntries extends
JCasAnnotator_ImplBase {
*/
public static final String PARAM_IGNORE_SECTIONS = "SectionsToIgnore";
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException {
+ public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
+ super.initialize( aContext );
boolean windowsSystem = true;
try {
- super.initialize(aContext);
-
ResMgr = UIMAFramework.newDefaultResourceManager();
iv_procEntryList = new ArrayList<ClassifiableEntry>();
iv_entryIndexMap = new HashMap<String,
List<ClassifiableEntry>>();
@@ -278,7 +277,7 @@ public class ClassifiableEntries extends
JCasAnnotator_ImplBase {
* apache.uima.jcas.impl.JCas,
* org.apache.uima.analysis_engine.ResultSpecification)
*/
- public void process(JCas jcas) {
+ public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
// cleanup
iv_entryIndexMap.clear();
@@ -528,8 +527,7 @@ public class ClassifiableEntries extends
JCasAnnotator_ImplBase {
* Given all the unique classifications for a given record, resolve it
down
* to a single final classifcation.
*
- * @param cList
- * @return
+ * @return -
*/
private String resolveClassification() {
// If (all sentences in a report are classified as UNKNOWN)
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/KuRuleBasedClassifierAnnotator.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/KuRuleBasedClassifierAnnotator.java
index 71c4a68..64be1a8 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/KuRuleBasedClassifierAnnotator.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/KuRuleBasedClassifierAnnotator.java
@@ -27,7 +27,8 @@ import java.util.Iterator;
import java.util.Set;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
@@ -45,8 +46,7 @@ public class KuRuleBasedClassifierAnnotator extends
JCasAnnotator_ImplBase {
String classAttributeName;
boolean caseSensitive = true;
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException {
+ public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
super.initialize(aContext);
@@ -76,7 +76,7 @@ public class KuRuleBasedClassifierAnnotator extends
JCasAnnotator_ImplBase {
}
- public void process(JCas jcas) {
+ public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
Iterator<?> tokenItr =
indexes.getAnnotationIndex(WordToken.type)
.iterator();
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifierAnnotator_libsvm.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifierAnnotator_libsvm.java
index a1b22ae..c589a58 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifierAnnotator_libsvm.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/PcsClassifierAnnotator_libsvm.java
@@ -34,7 +34,8 @@ import libsvm.svm;
import libsvm.svm_model;
import libsvm.svm_node;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
@@ -52,15 +53,19 @@ public class PcsClassifierAnnotator_libsvm extends
JCasAnnotator_ImplBase {
Map<?, ?> tokenCounts;
svm_model model; // trained libsvm model
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException {
- super.initialize(aContext);
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
+ super.initialize( aContext );
tokenCounts = new HashMap();
stopWords = new HashSet<String>();
goWords = new ArrayList<String>();
try {
+ // TODO @ConfigurationParam
Object paramValue = aContext
.getConfigParameterValue("CaseSensitive");
if (paramValue != null)
@@ -83,7 +88,7 @@ public class PcsClassifierAnnotator_libsvm extends
JCasAnnotator_ImplBase {
}
}
- public void process(JCas jcas) {
+ public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
List<Double> feature = new ArrayList<Double>();
JFSIndexRepository indexes = jcas.getJFSIndexRepository();
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ResolutionAnnotator.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ResolutionAnnotator.java
index c942d8e..1bdc936 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ResolutionAnnotator.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/ResolutionAnnotator.java
@@ -54,6 +54,7 @@ import
org.apache.ctakes.smokingstatus.type.libsvm.NominalAttributeValue;
* @author Mayo Clinic
*
*/
+// This isn't actually an ae and should be moved.
public class ResolutionAnnotator
{
Set<String> conWords; //contradiction words for negation -- if this word
appears in sentence do not negate
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/SentenceAdjuster.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/SentenceAdjuster.java
index 6086a5b..08464a8 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/SentenceAdjuster.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/ae/SentenceAdjuster.java
@@ -26,9 +26,9 @@ import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
@@ -47,6 +47,7 @@ import org.apache.ctakes.typesystem.type.textspan.Sentence;
*
*/
public class SentenceAdjuster extends JCasAnnotator_ImplBase {
+ // TODO @ConfigurationParam
/**
* The list of words ("and" "&") to ignore in pattern matching.
*/
@@ -67,14 +68,10 @@ public class SentenceAdjuster extends
JCasAnnotator_ImplBase {
* the configuration parameters. This method is not invoked for every
* document processed.
*
- * @see
com.ibm.uima.analysis_engine.annotator.BaseAnnotator#initialize(UimaContext)
*/
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
-
- {
-
- super.initialize(aContext);
+ @Override
+ public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
+ super.initialize( aContext );
context = aContext;
try {
@@ -91,6 +88,7 @@ public class SentenceAdjuster extends JCasAnnotator_ImplBase {
private void configInit() throws AnnotatorContextException {
// populate the HashSet of words that we will ignore when
pattern
// matching
+ // TODO @ConfigurationParam
String[] ignoreWords = (String[]) context
.getConfigParameterValue(PARAM_IGNORE_WORDS);
@@ -138,7 +136,7 @@ public class SentenceAdjuster extends
JCasAnnotator_ImplBase {
* Invokes this annotator's analysis logic. Invoked for each document
* processed.
*/
- public void process(JCas jcas) throws AnalysisEngineProcessException {
+ public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
String text = jcas.getDocumentText();
try {
// just one sentence
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/cc/RecordResolutionCasConsumer.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/cc/RecordResolutionCasConsumer.java
index 26e28ac..4f8b483 100644
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/cc/RecordResolutionCasConsumer.java
+++
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/cc/RecordResolutionCasConsumer.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,185 +18,192 @@
*/
package org.apache.ctakes.smokingstatus.cc;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.Iterator;
+import java.io.*;
+import java.util.Collection;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.ctakes.core.cc.AbstractFileWriter;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.ProcessTrace;
import org.apache.ctakes.smokingstatus.type.SmokingDocumentClassification;
-import org.apache.ctakes.smokingstatus.patientLevel.PatientLevelSmokingStatus;
-import org.apache.ctakes.typesystem.type.structured.DocumentID;
-
-public class RecordResolutionCasConsumer extends CasConsumer_ImplBase
-{
- /**
- * The name of the parameter that is specifies the path of the output file.
- */
- public static final String PARAM_OUTPUT_FILE = "OutputFile";
-
- /**
- * The name of the parameter that is specifies the delimiter for the output
- * file.
- */
- public static final String PARAM_DELIMITER = "Delimiter";
-
- /**
- * Specifies whether the cas should be handled as CDA (via 'plaintext' sofa
view) or default (flat file)
- *
- */
- public static final String CDA_PROCESSING = "ProcessingCDADocument";
-
- /**
- * Specifies whether post process should be run which provides the patient
level classification
- */
- public static final String PATIENT_LEVEL_PROCESSING =
"RunPatientLevelClassification";
-
- /**
- * Optional path and file name of the output file which holds the patient
level classification summary
- *
- */
- public static final String FINAL_CLASS_FILE =
"FinalClassificationOutputFile";
-
- public void initialize() throws ResourceInitializationException
- {
- File outFile;
-
- iv_sb = new StringBuffer();
-
- try
- {
- String filename = (String) getConfigParameterValue(PARAM_OUTPUT_FILE);
- outFile = new File(filename);
- if (!outFile.exists())
- outFile.createNewFile();
- iv_bw = new BufferedWriter(new FileWriter(outFile));
-
- iv_delimiter = (String) getConfigParameterValue(PARAM_DELIMITER);
- iv_useCDAProcess = (Boolean) getConfigParameterValue(CDA_PROCESSING);
- iv_postPatientLvlProcess = (Boolean)
getConfigParameterValue(PATIENT_LEVEL_PROCESSING);
- iv_patient_level_file = (String)
getConfigParameterValue(FINAL_CLASS_FILE);
-
- if (iv_postPatientLvlProcess) {
- patientSmokingStatus = new PatientLevelSmokingStatus();
- patientSmokingStatus.setInputFile(filename);
- }
-
- } catch (Exception ioe)
- {
- throw new ResourceInitializationException(ioe);
- }
- }
-
- public void processCas(CAS cas) throws ResourceProcessException
- {
- try
- {
- JCas jcas;
- if (iv_useCDAProcess)
- jcas = cas.getJCas().getView("plaintext");
- else
- jcas = cas.getJCas();
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-
- FSIterator<TOP> documentIDIterator =
indexes.getAllIndexedFS(DocumentID.type);
- if(documentIDIterator.hasNext())
- {
- DocumentID dia = (DocumentID)documentIDIterator.next();
- iv_sb.append(dia.getDocumentID());
+// TODO Rename. Also, extend AbstractTableFileWriter.
+public class RecordResolutionCasConsumer extends AbstractFileWriter<String> {
+
+ static private final Logger LOGGER = Logger.getLogger(
"RecordResolutionCasConsumer" );
+
+ @ConfigurationParameter(
+ name = "OutputFile",
+ description = "Name of file to which smoking status for the corpus
should be written."
+ )
+ private String _outputFile;
+
+ @ConfigurationParameter(
+ name = "Delimiter",
+ description = "Name of the parameter that is specifies the delimiter
for the output."
+ )
+ private String iv_delimiter;
+
+ @ConfigurationParameter(
+ name = "ProcessingCDADocument",
+ description = "Specifies whether the cas should be handled as CDA
(via 'plaintext' sofa view) or default "
+ + "(flat file).",
+ mandatory = false
+ )
+ private boolean iv_useCDAProcess = false;
+
+// @ConfigurationParameter(
+// name = "RunPatientLevelClassification",
+// description = "Specifies whether post process should be run which
provides the patient level classification.",
+// mandatory = false
+// )
+// private boolean iv_postPatientLvlProcess = false;
+//
+// @ConfigurationParameter(
+// name = "FinalClassificationOutputFile",
+// description = "Optional path and file name of the output file which
holds the patient level classification summary.",
+// mandatory = false
+// )
+// private String iv_patient_level_file;
+
+
+
+ static private final String PLAIN_TEXT_VIEW = "plaintext";
+
+ private final StringBuilder _sb = new StringBuilder();
+
+
+
+ /**
+ * System new line character
+ * @throws ResourceInitializationException -
+ */
+ static private final String NEW_LINE = System.getProperty( "line.separator"
);
+// private PatientLevelSmokingStatus patientSmokingStatus = null;
+
+
+
+ /**
+ * @param jCas the jcas passed to the process( jcas ) method.
+ */
+ @Override
+ protected void createData( final JCas jCas ) {
+ // This makes some pretty ugly output. It would be better to use a
TableWriter.
+ final String documentId = DocIdUtil.getDocumentIdForFile( jCas );
+ _sb.append( documentId );
+ _sb.append( iv_delimiter );
+ final Collection<SmokingDocumentClassification> smokeStats
+ = JCasUtil.select( jCas, SmokingDocumentClassification.class );
+ if ( smokeStats.isEmpty() ) {
+ _sb.append( "Error in RecordResolutionCasConsumer:NO classification"
);
}
- else
- {
- iv_sb.append("Error in CasInitializer(?) NO_DOC_ID");
+ for ( SmokingDocumentClassification smokeStat : smokeStats ) {
+ //there should be just one SmokingDocumentClassification
+ _sb.append( smokeStat.getClassification() );
+ _sb.append( NEW_LINE );
}
-
- iv_sb.append(iv_delimiter);
-
- Iterator<?> docClsItr =
indexes.getAnnotationIndex(SmokingDocumentClassification.type).iterator();
-
- //there should be just one SmokingDocumentClassification
- if (docClsItr.hasNext())
- {
- SmokingDocumentClassification dc =
(SmokingDocumentClassification)docClsItr.next();
- iv_sb.append(dc.getClassification());
- iv_sb.append(NEW_LINE);
+ }
+
+ /**
+ * @return completed patient JCases
+ */
+ @Override
+ protected String getData() {
+ return _sb.toString();
+ }
+
+ /**
+ * Does nothing.
+ * @param data -
+ */
+ @Override
+ protected void writeComplete( final String data ) {}
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final UimaContext aContext ) throws
ResourceInitializationException {
+ super.initialize( aContext );
+// if ( iv_postPatientLvlProcess ) {
+// if ( iv_patient_level_file != null &&
!iv_patient_level_file.isEmpty() ) {
+// patientSmokingStatus = new PatientLevelSmokingStatus();
+// patientSmokingStatus.setInputFile( iv_patient_level_file );
+// } else {
+// LOGGER.error( "RunPatientLevelClassification true, but no
FinalClassificationOutputFile given." );
+// }
+// }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void process( final JCas jcas ) throws
AnalysisEngineProcessException {
+ if ( iv_useCDAProcess ) {
+ try {
+ createData( jcas.getView( PLAIN_TEXT_VIEW ) );
+ } catch ( CASException casE ) {
+ LOGGER.error( casE.getMessage() );
+ }
+ } else {
+ createData( jcas );
}
- else
- {
- iv_sb.append("Error in RecordResolutionCasConsumer:NO classification");
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void writeFile( final String data, final String outputDir, final
String documentId, final String fileName )
+ throws IOException {
+ final File file = new File( outputDir, fileName );
+ LOGGER.info( "Writing smoking status for the corpus to " +
file.getPath() + " ..." );
+ try ( Writer writer = new BufferedWriter( new FileWriter( file ) ) ) {
+ writer.write( getData() );
}
-
- iv_bw.write(iv_sb.toString());
- }
- catch(Exception exception)
- {
- throw new ResourceProcessException(exception);
- }
- finally
- {
- iv_sb.delete(0, iv_sb.length());
- }
- }
-
- public void collectionProcessComplete(ProcessTrace arg0) throws
ResourceProcessException, IOException
- {
- super.collectionProcessComplete(arg0);
- File outFile = null;
- try
- {
- iv_bw.flush();
- iv_bw.close();
-
- }
- catch(Exception e)
- { throw new ResourceProcessException(e); }
- if (iv_postPatientLvlProcess) {
- String filename = (String) getConfigParameterValue(PARAM_OUTPUT_FILE);
-
- if (iv_patient_level_file != null)
- outFile = new File(iv_patient_level_file);
- else
- outFile = new File(filename.replace(filename,
filename+"_patientLevel.txt"));
-
- if (!outFile.exists())
- outFile.createNewFile();
- patientSmokingStatus.setOutputFile(outFile.getAbsolutePath());
- patientSmokingStatus.collectCounts("\\"+iv_delimiter);
- patientSmokingStatus.assignPatientLevelSmokingStatus();
- patientSmokingStatus.printToFile();
- }
- }
-
- /**
- * The buffered writer used to write the document classification
- */
- private BufferedWriter iv_bw = null;
-
- /**
- * buffer used to compile results of a given doc
- */
- private StringBuffer iv_sb;
-
- private String iv_patient_level_file = null;
- private String iv_delimiter;
-
- /**
- * System new line character
- * @throws ResourceInitializationException
- */
- private static String NEW_LINE = System.getProperty("line.separator");
- private boolean iv_postPatientLvlProcess = false;
- private boolean iv_useCDAProcess = false;
- private PatientLevelSmokingStatus patientSmokingStatus = null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void collectionProcessComplete() throws
AnalysisEngineProcessException {
+ super.collectionProcessComplete();
+ final String outputDir = getSimpleSubDirectory().isEmpty()
+ ? getRootDirectory()
+ : getRootDirectory() + "/" +
getSimpleSubDirectory();
+ try {
+ writeFile( getData(), outputDir, "", _outputFile );
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ // !! This did not work as advertised. There should be 2 writers,
+ // one for corpus-level and one for patient-level
+
+// if ( iv_postPatientLvlProcess ) {
+// String filename = (String) getConfigParameterValue(
PARAM_OUTPUT_FILE );
+//
+// if ( iv_patient_level_file != null ) {
+// outFile = new File( iv_patient_level_file );
+// } else {
+// outFile = new File( filename.replace( filename, filename +
"_patientLevel.txt" ) );
+// }
+// if ( !outFile.exists() ) {
+// outFile.createNewFile();
+// }
+// patientSmokingStatus.setOutputFile( outFile.getAbsolutePath() );
+// patientSmokingStatus.collectCounts( "\\" + iv_delimiter );
+// patientSmokingStatus.assignPatientLevelSmokingStatus();
+// patientSmokingStatus.printToFile();
+ }
+
+
}
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/NamedEntityContextAnalyzer.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/NamedEntityContextAnalyzer.java
deleted file mode 100644
index a52056e..0000000
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/NamedEntityContextAnalyzer.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.smokingstatus.context;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.ctakes.core.fsm.adapters.TextTokenAdapter;
-import org.apache.ctakes.core.fsm.token.TextToken;
-import org.apache.ctakes.necontexts.ContextAnalyzer;
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-
-
-/**
- * This context analyzer provides code that is shared by the
- * StatusContextAnalyzer and NegationContextAnalyzer which are both analyzers
- * that examine the contexts surrounding named entity annotations.
- */
-public abstract class NamedEntityContextAnalyzer implements ContextAnalyzer {
-
- private Logger iv_logger = Logger.getLogger(getClass().getName());
-
- private Set<String> _boundaryWordSet;
-
- public void initialize(UimaContext uimaContext) throws
ResourceInitializationException {
- initBoundaryData();
- }
-
- private void initBoundaryData() {
- iv_logger.info("initBoundaryData() called for
ContextInitializer");
- _boundaryWordSet = new HashSet<String>();
- _boundaryWordSet.add("but");
- _boundaryWordSet.add("however");
- _boundaryWordSet.add("nevertheless");
- _boundaryWordSet.add("notwithstanding");
- _boundaryWordSet.add("though");
- _boundaryWordSet.add("although");
- _boundaryWordSet.add("if");
- _boundaryWordSet.add("when");
- _boundaryWordSet.add("how");
- _boundaryWordSet.add("what");
- _boundaryWordSet.add("which");
- _boundaryWordSet.add("while");
- _boundaryWordSet.add("since");
- _boundaryWordSet.add("then");
- _boundaryWordSet.add("i");
- _boundaryWordSet.add("he");
- _boundaryWordSet.add("she");
- _boundaryWordSet.add("they");
- _boundaryWordSet.add("we");
-
- _boundaryWordSet.add(";");
- _boundaryWordSet.add(".");
- _boundaryWordSet.add(")");
- }
-
- public boolean isBoundary(Annotation contextAnnotation, int
scopeOrientation) throws AnalysisEngineProcessException {
- String lcText =
contextAnnotation.getCoveredText().toLowerCase();
- return _boundaryWordSet.contains(lcText);
- }
-
- /**
- * This method converts Token annotations to TextTokens required by the
fsm library used by both subclasses of this class.
- * @param tokenList a list of token annotations
- * @return a conversion of the token annotations as a list of TextTokens
- */
- protected List<TextToken> wrapAsFsmTokens(List<? extends Annotation>
tokenList) {
- List<TextToken> fsmTokenList = new ArrayList<TextToken>();
-
- Iterator<? extends Annotation> tokenItr = tokenList.iterator();
- while (tokenItr.hasNext()) {
- Annotation tokenAnnot = tokenItr.next();
- fsmTokenList.add(new TextTokenAdapter(tokenAnnot));
- }
-
- // Add dummy token to end of the list
- // This is a workaround for cases where a meaningful token
occurs at the
- // end of the list. Since there are no more tokens, the FSM
cannot push
- // itself into the next state. The dummy token's intent is to
provide
- // that extra token.
- fsmTokenList.add(new TextToken() {
-
- public String getText() {
- return "+DUMMY_TOKEN+";
- }
-
- public int getEndOffset() {
- return 0;
- }
-
- public int getStartOffset() {
- return 0;
- }
- });
-
- return fsmTokenList;
- }
-}
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/negation/NegationContextAnalyzer.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/negation/NegationContextAnalyzer.java
deleted file mode 100644
index c49f8a4..0000000
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/context/negation/NegationContextAnalyzer.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.smokingstatus.context.negation;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.ctakes.core.fsm.output.NegationIndicator;
-import org.apache.ctakes.core.fsm.token.TextToken;
-import org.apache.ctakes.necontexts.ContextHit;
-import org.apache.ctakes.smokingstatus.context.NamedEntityContextAnalyzer;
-import org.apache.ctakes.smokingstatus.machine.NegationFSM;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.tcas.Annotation;
-
-
-public class NegationContextAnalyzer extends NamedEntityContextAnalyzer {
- private NegationFSM _negIndicatorFSM = new NegationFSM();
-
- /**
- * This method analyzes a list of tokens looking for a negation pattern
as
- * specified by the class NegationFSM.
- *
- * @see NegationFSM
- */
- public ContextHit analyzeContext(List<? extends Annotation>
contextTokens, int scopeOrientation)
- throws AnalysisEngineProcessException {
- List<TextToken> fsmTokenList = wrapAsFsmTokens(contextTokens);
-
- try {
- Set<NegationIndicator> s =
_negIndicatorFSM.execute(fsmTokenList);
-
- if (s.size() > 0) {
- NegationIndicator neg = s.iterator().next();
- return new ContextHit(neg.getStartOffset(),
neg.getEndOffset());
- } else {
- return null;
- }
- } catch (Exception e) {
- throw new AnalysisEngineProcessException(e);
- }
- }
-}
\ No newline at end of file
diff --git
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/machine/NegationFSM.java
b/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/machine/NegationFSM.java
deleted file mode 100644
index ab54987..0000000
---
a/ctakes-smoking-status/src/main/java/org/apache/ctakes/smokingstatus/machine/NegationFSM.java
+++ /dev/null
@@ -1,435 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.smokingstatus.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.ctakes.core.fsm.condition.DisjoinCondition;
-import org.apache.ctakes.core.fsm.condition.NegateCondition;
-import org.apache.ctakes.core.fsm.condition.TextSetCondition;
-import org.apache.ctakes.core.fsm.output.NegationIndicator;
-import org.apache.ctakes.core.fsm.state.NamedState;
-import org.apache.ctakes.core.fsm.state.NonTerminalEndState;
-import org.apache.ctakes.core.fsm.token.BaseToken;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-
-/**
- * Uses one or more finite state machines to detect dates in the given input of
- * tokens.
- *
- * @author Mayo Clinic
- */
-public class NegationFSM {
-
- // regular modal verb
- private Set<String> iv_modalVerbsSet = new HashSet<String>();
- // negative particle
- private Set<String> iv_negParticlesSet = new HashSet<String>();
- // regular verbs requiring negation particle
- private Set<String> iv_regVerbsSet = new HashSet<String>();
- // neagive verbs that contain negation in them
- private Set<String> iv_negVerbsSet = new HashSet<String>();
- // negation preposition
- private Set<String> iv_negPrepositionsSet = new HashSet<String>();
- // negatively charged determiners
- private Set<String> iv_negDeterminersSet = new HashSet<String>();
- // regular nouns - indicators
- private Set<String> iv_regNounsSet = new HashSet<String>();
- // regular prepositions
- private Set<String> iv_regPrepositionsSet = new HashSet<String>();
- // negative adjectives
- private Set<String> iv_negAdjectivesSet = new HashSet<String>();
- // negative collocations
- private Set<String> iv_negCollocSet = new HashSet<String>();
- // NEGATIVE COLLOCATION PARTICLE
- private Set<String> iv_negColPartSet = new HashSet<String>();
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public NegationFSM() {
- iv_modalVerbsSet.add("can");
- iv_modalVerbsSet.add("ca");
- iv_modalVerbsSet.add("will");
- iv_modalVerbsSet.add("must");
- iv_modalVerbsSet.add("could");
- iv_modalVerbsSet.add("would");
- iv_modalVerbsSet.add("should");
- iv_modalVerbsSet.add("shall");
- iv_modalVerbsSet.add("did");
-
- iv_negParticlesSet.add("not");
- iv_negColPartSet.add("out");
- iv_negParticlesSet.add("n't");
- iv_negParticlesSet.add("'t");
-
- iv_negCollocSet.add("rule");
- iv_negCollocSet.add("rules");
- iv_negCollocSet.add("ruled");
- iv_negCollocSet.add("ruling");
- iv_negCollocSet.add("rule-out");
-
- iv_regVerbsSet.add("reveal");
- iv_regVerbsSet.add("reveals");
- iv_regVerbsSet.add("revealed");
- iv_regVerbsSet.add("revealing");
- iv_regVerbsSet.add("have");
- iv_regVerbsSet.add("had");
- iv_regVerbsSet.add("has");
- iv_regVerbsSet.add("feel");
- iv_regVerbsSet.add("feels");
- iv_regVerbsSet.add("felt");
- iv_regVerbsSet.add("feeling");
- iv_regVerbsSet.add("complain");
- iv_regVerbsSet.add("complains");
- iv_regVerbsSet.add("complained");
- iv_regVerbsSet.add("complaining");
- iv_regVerbsSet.add("demonstrate");
- iv_regVerbsSet.add("demonstrates");
- iv_regVerbsSet.add("demonstrated");
- iv_regVerbsSet.add("demonstrating");
- iv_regVerbsSet.add("appear");
- iv_regVerbsSet.add("appears");
- iv_regVerbsSet.add("appeared");
- iv_regVerbsSet.add("appearing");
- iv_regVerbsSet.add("caused");
- iv_regVerbsSet.add("cause");
- iv_regVerbsSet.add("causing");
- iv_regVerbsSet.add("causes");
- iv_regVerbsSet.add("find");
- iv_regVerbsSet.add("finds");
- iv_regVerbsSet.add("found");
- iv_regVerbsSet.add("discover");
- iv_regVerbsSet.add("discovered");
- iv_regVerbsSet.add("discovers");
-
- iv_negVerbsSet.add("deny");
- iv_negVerbsSet.add("denies");
- iv_negVerbsSet.add("denied");
- iv_negVerbsSet.add("denying");
- iv_negVerbsSet.add("fail");
- iv_negVerbsSet.add("fails");
- iv_negVerbsSet.add("failed");
- iv_negVerbsSet.add("failing");
- iv_negVerbsSet.add("decline");
- iv_negVerbsSet.add("declines");
- iv_negVerbsSet.add("declined");
- iv_negVerbsSet.add("declining");
- iv_negVerbsSet.add("exclude");
- iv_negVerbsSet.add("excludes");
- iv_negVerbsSet.add("excluding");
- iv_negVerbsSet.add("excluded");
-
- iv_negPrepositionsSet.add("without");
- iv_negPrepositionsSet.add("absent");
- iv_negPrepositionsSet.add("none");
- iv_negPrepositionsSet.add("non");//added for smoking status
-
- iv_negDeterminersSet.add("no");
- iv_negDeterminersSet.add("any");
- iv_negDeterminersSet.add("neither");
- iv_negDeterminersSet.add("nor");
- iv_negDeterminersSet.add("never");
- iv_negDeterminersSet.add("non-contributory");//added for
smoking status
- iv_negDeterminersSet.add("noncontributory");//added for smoking
status
- iv_negDeterminersSet.add("non");//added for smoking status
-
- iv_regNounsSet.add("evidence");
- iv_regNounsSet.add("indication");
- iv_regNounsSet.add("indications");
- iv_regNounsSet.add("sign");
- iv_regNounsSet.add("signs");
- iv_regNounsSet.add("symptoms");
- iv_regNounsSet.add("symptom");
- iv_regNounsSet.add("sx");
- iv_regNounsSet.add("dx");
- iv_regNounsSet.add("diagnosis");
- iv_regNounsSet.add("history");
- iv_regNounsSet.add("hx");
- iv_regNounsSet.add("findings");
-
- iv_regPrepositionsSet.add("of");
- iv_regPrepositionsSet.add("in");
- iv_regPrepositionsSet.add("for");
- iv_regPrepositionsSet.add("with");
-
- iv_negAdjectivesSet.add("unremarkable");
- iv_negAdjectivesSet.add("unlikely");
- iv_negAdjectivesSet.add("negative");
-
- iv_machineSet.add(getAspectualNegIndicatorMachine());
- iv_machineSet.add(getNominalNegIndicatorMachine());
- iv_machineSet.add(getAdjNegIndicatorMachine());
-
- }
-
- private Machine getAspectualNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State anyState = new NamedState("ANY");
-
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State regModalState = new NamedState("REG_MODAL");
- State negPartState = new NamedState("NEG_PART");
- State negVerbState = new NamedState("NEG_VERB");
- State negCollocState = new NamedState("NEG_COLLOC");
- State negColPartState = new NamedState("NEG_COLPART");
-
- Condition regModalC = new TextSetCondition(iv_modalVerbsSet,
false);
- Condition negPartC = new TextSetCondition(iv_negParticlesSet,
false);
- Condition regVerbC = new TextSetCondition(iv_regVerbsSet,
false);
- Condition negVerbC = new TextSetCondition(iv_negVerbsSet,
false);
- Condition negDetC = new TextSetCondition(iv_negDeterminersSet,
false);
- Condition negCollocC = new TextSetCondition(iv_negCollocSet,
false);
- Condition negColPartC = new TextSetCondition(iv_negColPartSet,
false);
-
- Condition notCollocC = new NegateCondition(negCollocC);
-
- startState.addTransition(negVerbC, negVerbState);
- startState.addTransition(negCollocC, negCollocState); // rule
-
- startState.addTransition(new DisjoinCondition(regModalC,
regVerbC),
- regModalState); // start with a modal
- startState.addTransition(new DisjoinCondition(negPartC,
negDetC),
- negPartState);
-
- startState.addTransition(new AnyCondition(), startState);
-
- regModalState.addTransition(negCollocC, negCollocState);
- negCollocState.addTransition(negColPartC, negColPartState); //
out
- negColPartState.addTransition(new AnyCondition(), ntEndState);
- negCollocState.addTransition(new AnyCondition(), startState);
-
- regModalState.addTransition(new DisjoinCondition(negPartC,
negDetC),
- negPartState);
- regModalState.addTransition(new AnyCondition(), anyState);
-
- anyState.addTransition(new DisjoinCondition(negPartC, negDetC),
- negPartState);
- anyState.addTransition(new AnyCondition(), startState);
-
- negPartState.addTransition(notCollocC, ntEndState);
- negVerbState.addTransition(notCollocC, ntEndState);
- negPartState.addTransition(new AnyCondition(), startState);
- negVerbState.addTransition(new AnyCondition(), startState);
-
- negPartState.addTransition(new AnyCondition(), ntEndState);
- negVerbState.addTransition(new AnyCondition(), ntEndState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
-
- /**
- * should recognize:
- * <ul><li>A</li>
- * <li>B</li>
- * <li>B C</li>
- * <li>B D* C</li>
- * </ul>
- * <p>where A is one of
- * <ul><li>without</li>
- * <li>absent</li>
- * <li>none</li>
- * </ul>
- * <p> and B is one of
- * <ul>
- * <li>no</li>
- * <li>any</li>
- * <li>neither</li>
- * <li>nor</li>
- * <li>never</li>
- * </ul>
- * <p> and C is one of
- * <ul>
- * <li>evidence</li>
- * <li>indication</li>
- * <li>indications</li>
- * <li>sign</li>
- * <li>signs</li>
- * <li>symptoms</li>
- * <li>symptom</li>
- * <li>sx</li>
- * <li>dx</li>
- * <li>diagnosis</li>
- * <li>history</li>
- * <li>hx</li>
- * <li>findings</li>
- * </ul>
- * <p> and D is anything
- * @return
- */
- private Machine getNominalNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State anyState = new NamedState("ANY");
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State negPrepState = new NamedState("NEG_PREP");
- State negDetState = new NamedState("NEG_DET");
- State regNounState = new NamedState("REG_NOUN");
-
- Condition negPrepC = new
TextSetCondition(iv_negPrepositionsSet, false);
- Condition negDetC = new TextSetCondition(iv_negDeterminersSet,
false);
- Condition regNounC = new TextSetCondition(iv_regNounsSet,
false);
-
- startState.addTransition(negDetC, negDetState); // start with a
modal
- startState.addTransition(negPrepC, negPrepState);
- startState.addTransition(new AnyCondition(), startState);
-
- negPrepState.addTransition(new AnyCondition(), ntEndState);
- negDetState.addTransition(regNounC, regNounState);
- negDetState.addTransition(new AnyCondition(), ntEndState);
- negDetState.addTransition(new AnyCondition(), anyState);
-
- anyState.addTransition(regNounC, regNounState);
- anyState.addTransition(new AnyCondition(), anyState);
-
- regNounState.addTransition(new AnyCondition(), ntEndState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
-
- /**
- * recognizes "A B ..."
- * where A is unremarkable, unlikely, or negative
- * and B is of, in, for, or with
- */
-
- private Machine getAdjNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State regPrepState = new NamedState("REG_PREP");
- State negAdjState = new NamedState("NEG_ADJ");
-
- Condition regPrepC = new
TextSetCondition(iv_regPrepositionsSet, false);
- Condition negAdjC = new TextSetCondition(iv_negAdjectivesSet,
false);
-
- startState.addTransition(negAdjC, negAdjState); // start with a
modal
- startState.addTransition(new AnyCondition(), startState);
-
- negAdjState.addTransition(regPrepC, regPrepState);
- regPrepState.addTransition(new AnyCondition(), ntEndState);
- negAdjState.addTransition(new AnyCondition(), startState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of DateToken objects.
- * @throws Exception
- */
- public Set<NegationIndicator> execute(List<?> tokens) throws Exception {
- Set<NegationIndicator> outSet = new
HashSet<NegationIndicator>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine,
Integer>();
-
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = (BaseToken) tokens.get(i);
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines
start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer)
o).intValue();
- // skip ahead over single token
we don't want
- tokenStartIndex++;
- }
- BaseToken endToken = null;
- if (currentState instanceof
NonTerminalEndState) {
- endToken = (BaseToken)
tokens.get(i - 1);
- } else {
- endToken = token;
- }
-
- BaseToken startToken = (BaseToken)
tokens
- .get(tokenStartIndex);
- NegationIndicator neg = new
NegationIndicator(startToken
- .getStartOffset(),
endToken.getEndOffset());
- outSet.add(neg);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return outSet;
- }
-}
\ No newline at end of file