Author: seanfinan Date: Wed Oct 12 19:50:39 2016 New Revision: 1764529 URL: http://svn.apache.org/viewvc?rev=1764529&view=rev Log: PipelineReader uses key=value key=value ... for setting component parameters PipelineReader has addDescription command to utilize static .createAnnotatorDescription() Renaming of example pipeline runners
Added: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ - copied from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt - copied, changed from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt Removed: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ExamplePipeline1.txt ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ExamplePipeline2.txt ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java?rev=1764529&r1=1764528&r2=1764529&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java Wed Oct 12 19:50:39 2016 @@ -7,6 +7,7 @@ import org.apache.ctakes.core.resource.F import org.apache.log4j.Logger; import org.apache.uima.UIMAException; import org.apache.uima.analysis_component.AnalysisComponent; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.collection.CollectionReader; import org.apache.uima.resource.ResourceInitializationException; @@ -15,30 +16,32 @@ import java.io.IOException; import java.io.InputStreamReader; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collection; import java.util.regex.Pattern; /** * Creates a pipeline (PipelineBuilder) from specifications in a flat plaintext file. - * <p> + * * <p>There are several basic commands: * addPackage <i>user_package_name</i> * loadParameters <i>path_to_properties_file_with_ae_parameters</i> - * addParameters <i>ae_parameter_name</i>|<i>ae_parameter_value</i>| ... + * addParameters <i>ae_parameter_name=ae_parameter_value e_parameter_name=ae_parameter_value</i> ... * reader <i>collection_reader_class_name</i> * readFiles <i>input_directory</i> - * <i>input_directory</i> can be empty if {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} was specified - * add <i>ae_or_cc_class_name</i> - * addLogged <i>ae_or_cc_class_name</i> + * <i>input_directory</i> can be empty if + * {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} ("InputDirectory") was specified + * add <i>ae_or_cc_class_name ae_parameter_name=ae_parameter_value e_parameter_name<=ae_parameter_value</i> ... + * addLogged <i>ae_or_cc_class_name ae_parameter_name=ae_parameter_value e_parameter_name=ae_parameter_value</i> ... + * addDescription <i>ae_or_cc_class_name</i> * collectCuis * collectEntities * writeXmis <i>output_directory</i> - * <i>output_directory</i> can be empty if {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} was specified - * <p> + * <i>output_directory</i> can be empty if + * {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} ("OutputDirectory") was specified * # and // may be used to mark line comments * </p> - * <p> * class names must be fully-specified with package unless they are in standard ctakes cr ae or cc packages, * or in a package specified by an earlier addPackage command. * @@ -74,7 +77,8 @@ final public class PipelineReader { static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ]; - static private final Pattern SPLIT_PATTERN = Pattern.compile( "\\|" ); + static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" ); + static private final Pattern KEY_VALUE_PATTERN = Pattern.compile( "=" ); private PipelineBuilder _builder; @@ -117,7 +121,7 @@ final public class PipelineReader { continue; } final int spaceIndex = line.indexOf( ' ' ); - if ( spaceIndex < 3 ) { + if ( spaceIndex < 0 ) { addToPipeline( line, "" ); } else { addToPipeline( line.substring( 0, spaceIndex ), line.substring( spaceIndex + 1 ).trim() ); @@ -136,6 +140,7 @@ final public class PipelineReader { return _builder; } + /** * @param command specified by first word in the file line * @param parameter specified by second word in the file line @@ -150,7 +155,7 @@ final public class PipelineReader { _builder.loadParameters( parameter ); break; case "addParameters": - _builder.addParameters( getStrings( parameter ) ); + _builder.addParameters( splitParameters( parameter ) ); break; case "reader": _builder.reader( createReader( parameter ) ); @@ -163,15 +168,34 @@ final public class PipelineReader { } break; case "add": - _builder.add( getComponentClass( parameter ) ); + if ( hasParameters( parameter ) ) { + final String[] component_parameters = splitFromParameters( parameter ); + final String component = component_parameters[ 0 ]; + final Object[] parameters = splitParameters( component_parameters[ 1 ] ); + _builder.add( getComponentClass( component ), parameters ); + } else { + _builder.add( getComponentClass( parameter ) ); + } break; case "addLogged": - _builder.addLogged( getComponentClass( parameter ) ); + if ( hasParameters( parameter ) ) { + final String[] component_parameters = splitFromParameters( parameter ); + final String component = component_parameters[ 0 ]; + final Object[] parameters = splitParameters( component_parameters[ 1 ] ); + _builder.addLogged( getComponentClass( component ), parameters ); + } else { + _builder.addLogged( getComponentClass( parameter ) ); + } + break; + case "addDescription": + final AnalysisEngineDescription description = createDescription( parameter ); + _builder.addDescription( description ); break; + case "collectCuis": _builder.collectCuis(); break; - case "collectEntites": + case "collectEntities": _builder.collectEntities(); break; case "writeXmis": @@ -230,11 +254,48 @@ final public class PipelineReader { if ( componentClass != null ) { return componentClass; } + componentClass = getPackagedClass( + "org.apache.ctakes." + packageName, className, AnalysisComponent.class ); + if ( componentClass != null ) { + return componentClass; + } } return null; } /** + * This requires that the component class has a static createAnnotatorDescription method with no parameters + * @param className component class for which a descriptor should be created + * @return a description generated for the component + * @throws ResourceInitializationException if anything went wrong with finding the class or the method, + * or invoking the method to get an AnalysisEngineDescription + */ + private AnalysisEngineDescription createDescription( final String className ) + throws ResourceInitializationException { + final Class<? extends AnalysisComponent> componentClass = getComponentClass( className ); + Method method; + try { + method = componentClass.getMethod( "createAnnotatorDescription" ); + } catch ( NoSuchMethodException nsmE ) { + LOGGER.error( "No createAnnotatorDescription method in " + className ); + throw new ResourceInitializationException( nsmE ); + } + try { + final Object invocation = method.invoke( null ); + if ( !AnalysisEngineDescription.class.isInstance( invocation ) ) { + LOGGER.error( "createAnnotatorDescription in " + className + " returned an " + + invocation.getClass().getName() + " not an AnalysisEngineDescription" ); + throw new ResourceInitializationException(); + } + return (AnalysisEngineDescription)invocation; + } catch ( IllegalAccessException | InvocationTargetException multE ) { + LOGGER.error( "Could not invoke createAnnotatorDescription on " + className ); + throw new ResourceInitializationException( multE ); + } + } + + + /** * @param className fully-specified or simple name of a cr Collection Reader class * @return instantiated collection reader * @throws ResourceInitializationException if the class could not be found or instantiated @@ -282,6 +343,11 @@ final public class PipelineReader { if ( readerClass != null ) { return readerClass; } + readerClass = getPackagedClass( + "org.apache.ctakes." + packageName, className, CollectionReader.class ); + if ( readerClass != null ) { + return readerClass; + } } return null; } @@ -328,12 +394,90 @@ final public class PipelineReader { } /** - * @param parameter text - * @return array created by splitting text at '|' characters + * + * @param text - + * @return true if there is more than one word in the text */ - static private String[] getStrings( final String parameter ) { - return SPLIT_PATTERN.split( parameter ); + static private boolean hasParameters( final String text ) { + return SPACE_PATTERN.split( text ).length > 1; } + /** + * @param text text with more than one word + * @return an array of two strings, [0]= the first word, [1]= the remaining words separated by spaces + */ + static private String[] splitFromParameters( final String text ) { + final String[] allSplits = SPACE_PATTERN.split( text ); + final String[] returnSplits = new String[ 2 ]; + returnSplits[ 0 ] = allSplits[ 0 ]; + String parameters = allSplits[ 1 ]; + for ( int i = 2; i < allSplits.length; i++ ) { + parameters += " " + allSplits[ i ]; + } + returnSplits[ 1 ] = parameters; + return returnSplits; + } + + /** + * @param text - + * @return array created by splitting text ' ' and then at '=' characters + */ + static private Object[] splitParameters( final String text ) { + if ( text == null || text.trim().isEmpty() ) { + return EMPTY_OBJECT_ARRAY; + } + final String[] pairs = SPACE_PATTERN.split( text.trim() ); + final Object[] keysAndValues = new Object[ pairs.length * 2 ]; + int i = 0; + for ( String pair : pairs ) { + final String[] keyAndValue = KEY_VALUE_PATTERN.split( pair ); + keysAndValues[ i ] = keyAndValue[ 0 ]; + if ( keyAndValue.length == 1 ) { + keysAndValues[ i + 1 ] = ""; + } else if ( keyAndValue.length > 2 ) { + LOGGER.warn( "Multiple parameter values, using first of " + pair ); + } + keysAndValues[ i + 1 ] = getValueObject( keyAndValue[ 1 ] ); + i += 2; + } + return keysAndValues; + } + + static private Object getValueObject( final String value ) { + final Object returner = attemptParseBoolean( value ); + if ( !value.equals( returner ) ) { + return returner; + } + return attemptParseInt( value ); + } + + /** + * Since uimafit parameter values can be integers, check for an integer value + * + * @param value String value parsed from file + * @return the value as an Integer, or the original String if an Integer could not be resolved + */ + static private Object attemptParseInt( final String value ) { + try { + return Integer.valueOf( value ); + } catch ( NumberFormatException nfE ) { + return value; + } + } + + /** + * Since uimafit parameter values can be boolean, check for a boolean value + * + * @param value String value parsed from file + * @return the value as a Boolean, or the original String if it is not "true" or "false", case insensitive + */ + static private Object attemptParseBoolean( final String value ) { + if ( value.equalsIgnoreCase( "true" ) ) { + return Boolean.TRUE; + } else if ( value.equalsIgnoreCase( "false" ) ) { + return Boolean.FALSE; + } + return value; + } } Copied: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt (from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt) URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt?p2=ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt&p1=ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt&r1=1764190&r2=1764529&rev=1764529&view=diff ============================================================================== --- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt (original) +++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt Wed Oct 12 19:50:39 2016 @@ -1,8 +1,15 @@ +// This file contains commands and parameters to run the ctakes-examples "Hello World" pipeline + // Equivalent of ClinicalPipelineFactory.getTokenProcessingPipeline() add SimpleSegmentAnnotator add SentenceDetector add TokenizerAnnotatorPTB add ContextDependentTokenizerAnnotator +// The POSTagger has a -complex- startup, but it can create its own description to handle it +addDescription POSTagger + +// add the simple Hello World Annotator +add org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator -// The POSTagger has a -complex- startup and should be added manually -# add POSTagger +// Collect discovered entities for post-run information +collectEntities Modified: ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java?rev=1764529&r1=1764528&r2=1764529&view=diff ============================================================================== --- ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java (original) +++ ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java Wed Oct 12 19:50:39 2016 @@ -18,14 +18,6 @@ */ package org.apache.ctakes.regression.test; -import java.io.File; -import java.io.IOException; -import java.util.List; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - import org.apache.log4j.Logger; import org.apache.uima.UIMAFramework; import org.apache.uima.cas.CAS; @@ -42,12 +34,19 @@ import org.junit.Test; import org.w3c.dom.Document; import org.xml.sax.SAXException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.util.List; + /** * Runs a full pipeline and compares the xml output to ensure all annotators * work together in harmony. * * This is designed to run all CPE's inside the - * desc/collection_processing_engine Directory. So any new pipelines added there + * desc/collection_processing_engine Directory. So any new pipeline added there * will automatically be run and tested as long as they put the generated output * to expectedoutput/{nameofcpe} * Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml?rev=1764529&r1=1764528&r2=1764529&view=diff ============================================================================== --- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml (original) +++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml Wed Oct 12 19:50:39 2016 @@ -61,6 +61,7 @@ </delegateAnalysisEngine> <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB"> <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/> + <!--<import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>--> </delegateAnalysisEngine> <delegateAnalysisEngine key="LookupWindowAnnotator"> <import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/> Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java?rev=1764529&r1=1764528&r2=1764529&view=diff ============================================================================== --- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java (original) +++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java Wed Oct 12 19:50:39 2016 @@ -1,14 +1,6 @@ package org.apache.ctakes.relationextractor.metastasis; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.MalformedURLException; -import java.net.URI; -import java.util.ArrayList; -import java.util.List; - +import com.google.common.io.CharStreams; import org.apache.ctakes.relationextractor.eval.SHARPXMI.CopyDocumentTextToGoldView; import org.apache.ctakes.relationextractor.eval.SHARPXMI.DocumentIDAnnotator; import org.apache.uima.UIMAFramework; @@ -35,12 +27,21 @@ import org.cleartk.util.cr.UriCollection import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; -import com.google.common.io.CharStreams; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; public class MetastasisXmiGenerationPipeline { - public static final File ANAFORA_ANNOTATIONS_DIR = new File("DeepPhe/Metastasis/Anafora/All/"); - public static final String XMI_OUTPUT_DIR = "DeepPhe/Metastasis/Xmi/All/"; + // public static final File ANAFORA_ANNOTATIONS_DIR = new File("DeepPhe/Metastasis/Anafora/All/"); + public static final File ANAFORA_ANNOTATIONS_DIR + = new File( "\\\\rc-fs.tch.harvard.edu\\chip-nlp\\Public\\DeepPhe\\Metastasis\\Anafora\\Test" ); + public static final String XMI_OUTPUT_DIR = "C:\\Spiffy\\prj_darth_phenome\\output\\temp\\metastatic\\Test"; public static final String GOLD_VIEW_NAME = "GoldView"; public static void main(String[] args) throws Exception { @@ -62,8 +63,9 @@ public class MetastasisXmiGenerationPipe AggregateBuilder builder = new AggregateBuilder(); builder.add(UriToDocumentTextAnnotator.getDescription()); - - File preprocessDescFile = new File("desc/analysis_engine/RelationExtractorPreprocessor.xml"); + + File preprocessDescFile + = new File( "C:\\Spiffy\\ctakes_trunk_intellij\\dev\\apache\\ctakes-relation-extractor\\desc\\analysis_engine/RelationExtractorPreprocessor.xml" ); XMLParser parser = UIMAFramework.getXMLParser(); XMLInputSource source = new XMLInputSource(preprocessDescFile); builder.add(parser.parseAnalysisEngineDescription(source));