Author: seanfinan
Date: Thu Oct 13 04:27:23 2016
New Revision: 1764579

URL: http://svn.apache.org/viewvc?rev=1764579&view=rev
Log:
EntityCollector better Entity for Collection
PipelineReader add load other pipeline file, set values for descriptors
DocumentIDAnnotationUtil number unknown document ids
Helllo World Pipelines - new and improved

Modified:
    
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
    
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
    
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
    
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldBuildPipeRunner.java
    
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldReadPipeRunner.java
    
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt

Modified: 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
 (original)
+++ 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/EntityCollector.java
 Thu Oct 13 04:27:23 2016
@@ -61,7 +61,10 @@ public enum EntityCollector {
       final StringBuilder sb = new StringBuilder();
       for ( Map.Entry<String, Collection<Entity>> entry : 
_entityMap.entrySet() ) {
          sb.append( entry.getKey() ).append( "\n" );
-         entry.getValue().stream().map( Entity::toString ).forEach( sb::append 
);
+         entry.getValue().stream()
+               .sorted( ( e1, e2 ) -> e1._begin - e2._begin )
+               .map( Entity::toString )
+               .forEach( sb::append );
       }
       return sb.toString();
    }
@@ -110,6 +113,16 @@ public enum EntityCollector {
          sb.append( '\n' );
          return sb.toString();
       }
+
+      @Override
+      public int hashCode() {
+         return toString().hashCode();
+      }
+
+      @Override
+      public boolean equals( final Object other ) {
+         return toString().equals( other.toString() );
+      }
    }
 
    /**
@@ -126,7 +139,7 @@ public enum EntityCollector {
       }
 
       static private void putEntities( final String documentId, final 
Collection<IdentifiedAnnotation> annotations ) {
-         final Collection<Entity> entities = annotations.stream().map( 
Entity::new ).collect( Collectors.toList() );
+         final Collection<Entity> entities = annotations.stream().map( 
Entity::new ).collect( Collectors.toSet() );
          EntityCollector.getInstance()._entityMap.put( documentId, entities );
       }
    }

Modified: 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
 (original)
+++ 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
 Thu Oct 13 04:27:23 2016
@@ -25,6 +25,7 @@ import java.util.regex.Pattern;
  * Creates a pipeline (PipelineBuilder) from specifications in a flat 
plaintext file.
  *
  * <p>There are several basic commands:
+ * load <i>path_to_another_pipeline_file</i>
  * addPackage <i>user_package_name</i>
  * loadParameters <i>path_to_properties_file_with_ae_parameters</i>
  * addParameters <i>ae_parameter_name=ae_parameter_value 
e_parameter_name=ae_parameter_value</i> ...
@@ -40,7 +41,7 @@ import java.util.regex.Pattern;
  * writeXmis <i>output_directory</i>
  *    <i>output_directory</i> can be empty if
  *    {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} ("OutputDirectory") 
was specified
- * # and // may be used to mark line comments
+ * // and # and ! may be used to mark line comments
  * </p>
  * class names must be fully-specified with package unless they are in 
standard ctakes cr ae or cc packages,
  * or in a package specified by an earlier addPackage command.
@@ -79,6 +80,7 @@ final public class PipelineReader {
 
    static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
    static private final Pattern KEY_VALUE_PATTERN = Pattern.compile( "=" );
+   static private final Pattern COMMA_ARRAY_PATTERN = Pattern.compile( "," );
 
    private PipelineBuilder _builder;
 
@@ -116,7 +118,7 @@ final public class PipelineReader {
          String line = reader.readLine();
          while ( line != null ) {
             line = line.trim();
-            if ( line.isEmpty() || line.startsWith( "//" ) || line.startsWith( 
"#" ) ) {
+            if ( line.isEmpty() || line.startsWith( "//" ) || line.startsWith( 
"#" ) || line.startsWith( "!" ) ) {
                line = reader.readLine();
                continue;
             }
@@ -140,7 +142,7 @@ final public class PipelineReader {
       return _builder;
    }
 
-
+   // TODO add ability to pass parameters with addDescription
    /**
     * @param command   specified by first word in the file line
     * @param parameter specified by second word in the file line
@@ -148,6 +150,9 @@ final public class PipelineReader {
     */
    private void addToPipeline( final String command, final String parameter ) 
throws UIMAException {
       switch ( command ) {
+         case "load":
+            loadPipelineFile( parameter );
+            break;
          case "addPackage":
             _userPackages.add( parameter );
             break;
@@ -188,8 +193,16 @@ final public class PipelineReader {
             }
             break;
          case "addDescription":
-            final AnalysisEngineDescription description = createDescription( 
parameter );
-            _builder.addDescription( description );
+            if ( hasParameters( parameter ) ) {
+               final String[] descriptor_parameters = splitFromParameters( 
parameter );
+               final String component = descriptor_parameters[ 0 ];
+               final Object[] values = splitDescriptorValues( 
descriptor_parameters[ 1 ] );
+               final AnalysisEngineDescription description = 
createDescription( component, values );
+               _builder.addDescription( description );
+            } else {
+               final AnalysisEngineDescription description = 
createDescription( parameter );
+               _builder.addDescription( description );
+            }
             break;
 
          case "collectCuis":
@@ -266,22 +279,27 @@ final public class PipelineReader {
    /**
     * This requires that the component class has a static 
createAnnotatorDescription method with no parameters
     * @param className component class for which a descriptor should be created
+    * @param values optional parameter values for the descriptor creator
     * @return a description generated for the component
     * @throws ResourceInitializationException if anything went wrong with 
finding the class or the method,
     * or invoking the method to get an AnalysisEngineDescription
     */
-   private AnalysisEngineDescription createDescription( final String className 
)
+   private AnalysisEngineDescription createDescription( final String 
className, final Object... values )
          throws ResourceInitializationException {
       final Class<? extends AnalysisComponent> componentClass = 
getComponentClass( className );
       Method method;
       try {
-         method = componentClass.getMethod( "createAnnotatorDescription" );
+         if ( values.length == 0 ) {
+            method = componentClass.getMethod( "createAnnotatorDescription" );
+         } else {
+            method = componentClass.getMethod( "createAnnotatorDescription", 
getValueTypes( values ) );
+         }
       } catch ( NoSuchMethodException nsmE ) {
          LOGGER.error( "No createAnnotatorDescription method in " + className 
);
          throw new ResourceInitializationException( nsmE );
       }
       try {
-         final Object invocation = method.invoke( null );
+         final Object invocation = method.invoke( null, values );
          if ( !AnalysisEngineDescription.class.isInstance( invocation ) ) {
             LOGGER.error( "createAnnotatorDescription in " + className + " 
returned an "
                           + invocation.getClass().getName() + " not an 
AnalysisEngineDescription" );
@@ -294,6 +312,27 @@ final public class PipelineReader {
       }
    }
 
+   /**
+    * The java reflection getMethod does not handle autoboxing/unboxing.
+    * So, we assume that Integer and Boolean parameter values will actually be 
primitives.
+    *
+    * @param values parameter value objects
+    * @return parameter value class types, unboxing to primitives where needed
+    */
+   static private Class<?>[] getValueTypes( final Object... values ) {
+      final Class<?>[] classArray = new Class[ values.length ];
+      for ( int i = 0; i < values.length; i++ ) {
+         final Class<?> type = values[ i ].getClass();
+         if ( type.equals( Integer.class ) ) {
+            classArray[ i ] = int.class;
+         } else if ( type.equals( Boolean.class ) ) {
+            classArray[ i ] = boolean.class;
+         } else {
+            classArray[ i ] = type;
+         }
+      }
+      return classArray;
+   }
 
    /**
     * @param className fully-specified or simple name of a cr Collection 
Reader class
@@ -443,7 +482,19 @@ final public class PipelineReader {
       return keysAndValues;
    }
 
+   static private Object[] splitDescriptorValues( final String text ) {
+      final String[] values = SPACE_PATTERN.split( text.trim() );
+      final Object[] valueObjects = new Object[ values.length ];
+      for ( int i = 0; i < values.length; i++ ) {
+         valueObjects[ i ] = getValueObject( values[ i ] );
+      }
+      return valueObjects;
+   }
+
    static private Object getValueObject( final String value ) {
+      if ( isCommaArray( value ) ) {
+         return attemptParseArray( value );
+      }
       final Object returner = attemptParseBoolean( value );
       if ( !value.equals( returner ) ) {
          return returner;
@@ -480,4 +531,20 @@ final public class PipelineReader {
       return value;
    }
 
+   /**
+    * @param value String value parsed from file
+    * @return true if there are any comma characters in the value, denoting an 
array
+    */
+   static private boolean isCommaArray( final String value ) {
+      return value.indexOf( ',' ) > 0;
+   }
+
+   /**
+    * @param value String value parsed from file
+    * @return an array of String
+    */
+   static private Object attemptParseArray( final String value ) {
+      return COMMA_ARRAY_PATTERN.split( value );
+   }
+
 }

Modified: 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
 (original)
+++ 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java
 Thu Oct 13 04:27:23 2016
@@ -41,6 +41,8 @@ final public class DocumentIDAnnotationU
 
    static private final Pattern FILE_FIX_PATTERN = Pattern.compile( 
"[^A-Za-z0-9\\.]" );
 
+   static private long _noDocIdIndex = 1;
+
    // Utility classes should be final and have only a private constructor
    private DocumentIDAnnotationUtil() {
    }
@@ -63,15 +65,18 @@ final public class DocumentIDAnnotationU
       final JFSIndexRepository indexes = jcas.getJFSIndexRepository();
       final FSIterator<TOP> documentIDIterator = indexes.getAllIndexedFS( 
DocumentID.type );
       if ( documentIDIterator == null || !documentIDIterator.hasNext() ) {
-         LOGGER.debug( "Could not find document Id Annotation" );
-         return NO_DOCUMENT_ID;
+         LOGGER.warn( "Unable to find DocumentIDAnnotation" );
+         return createDocumentId( jcas );
       }
       final DocumentID documentIDAnnotation = 
(DocumentID)documentIDIterator.next();
       try {
          return documentIDAnnotation.getDocumentID();
       } catch ( CASRuntimeException casRTE ) {
-         LOGGER.warn( "document Id Annotation does not have the id feature 
set", casRTE );
-         return NO_DOCUMENT_ID;
+         final String newId = NO_DOCUMENT_ID + _noDocIdIndex;
+         _noDocIdIndex++;
+         LOGGER.warn( "document Id Annotation does not have the id feature 
set, setting to " + newId, casRTE );
+         documentIDAnnotation.setDocumentID( newId );
+         return newId;
       }
    }
 
@@ -104,7 +109,7 @@ final public class DocumentIDAnnotationU
             }
             if ( documentID == null || documentID.equals( NO_DOCUMENT_ID ) ) {
                LOGGER.warn( "Unable to find DocumentIDAnnotation" );
-               return NO_DOCUMENT_ID;
+               return createDocumentId( startingJcas );
             }
          }
       }
@@ -138,4 +143,19 @@ final public class DocumentIDAnnotationU
       return FILE_FIX_PATTERN.matcher( docId ).replaceAll( "_" );
    }
 
+   /**
+    * @param jCas -
+    * @return {@link #NO_DOCUMENT_ID} plus an index based upon the number of 
documents without IDs fetched with this class.
+    * This may lead to documents having ids indexed out of order with respect 
to the order in which they were run.
+    */
+   static private String createDocumentId( final JCas jCas ) {
+      final String newId = NO_DOCUMENT_ID + _noDocIdIndex;
+      _noDocIdIndex++;
+      LOGGER.debug( "Creating document ID " + newId );
+      final DocumentID documentIDAnnotation = new DocumentID( jCas );
+      documentIDAnnotation.setDocumentID( newId );
+      documentIDAnnotation.addToIndexes();
+      return newId;
+   }
+
 }

Modified: 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldBuildPipeRunner.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldBuildPipeRunner.java?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldBuildPipeRunner.java
 (original)
+++ 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldBuildPipeRunner.java
 Thu Oct 13 04:27:23 2016
@@ -1,10 +1,13 @@
 package org.apache.ctakes.examples.pipeline;
 
 
-import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
-import org.apache.ctakes.core.pipeline.EntityCollector;
+import 
org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.pipeline.PipelineBuilder;
 import org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator;
+import org.apache.ctakes.postagger.POSTagger;
 import org.apache.log4j.Logger;
 import org.apache.uima.UIMAException;
 
@@ -36,14 +39,18 @@ final public class HelloWorldBuildPipeRu
       try {
          PipelineBuilder builder = new PipelineBuilder();
          builder
-               // Add a simple pre-defined existing pipeline for Tokenization. 
 Could also add engines individually
-               .addDescription( 
ClinicalPipelineFactory.getTokenProcessingPipeline() )
+               // Add a simple pre-defined existing pipeline for Tokenization.
+               // Equivalent of 
ClinicalPipelineFactory.getTokenProcessingPipeline()
+               .add( SimpleSegmentAnnotator.class )
+               .add( SentenceDetector.class )
+               .add( TokenizerAnnotatorPTB.class )
+               .add( ContextDependentTokenizerAnnotator.class )
+               // The POSTagger has a -complex- startup, but it can create its 
own description to handle it
+               .addDescription( POSTagger.createAnnotatorDescription() )
                // add the simple Hello World Annotator
-               .add( ExampleHelloWorldAnnotator.class )
-               // Collect discovered entities for post-run information
-               .collectEntities();
+               .add( ExampleHelloWorldAnnotator.class );
          if ( args.length > 0 ) {
-            // Example to save the Aggregate descriptor to an xml file for 
external use such as the UIMA CVD/CPE
+            // Example to save the Aggregate descriptor to an xml file for 
external use such as the UIMA CVD
             builder.writeXMIs( args[ 0 ] );
          }
          // Run the pipeline with specified text
@@ -51,8 +58,6 @@ final public class HelloWorldBuildPipeRu
       } catch ( IOException | UIMAException multE ) {
          LOGGER.error( multE.getMessage() );
       }
-      //Print out the IdentifiedAnnotation objects
-      LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
    }
 
 

Modified: 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldReadPipeRunner.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldReadPipeRunner.java?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldReadPipeRunner.java
 (original)
+++ 
ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/HelloWorldReadPipeRunner.java
 Thu Oct 13 04:27:23 2016
@@ -1,7 +1,6 @@
 package org.apache.ctakes.examples.pipeline;
 
 
-import org.apache.ctakes.core.pipeline.EntityCollector;
 import org.apache.ctakes.core.pipeline.PipelineBuilder;
 import org.apache.ctakes.core.pipeline.PipelineReader;
 import org.apache.log4j.Logger;
@@ -39,7 +38,7 @@ final public class HelloWorldReadPipeRun
          final PipelineReader reader = new PipelineReader( PIPELINE_FILE_PATH 
);
          PipelineBuilder builder = reader.getBuilder();
          if ( args.length > 0 ) {
-            // Example to save the Aggregate descriptor to an xml file for 
external use such as the UIMA CVD/CPE
+            // Example to save the Aggregate descriptor to an xml file for 
external use such as the UIMA CVD
             builder.writeXMIs( args[ 0 ] );
          }
          // Run the pipeline with specified text
@@ -47,8 +46,6 @@ final public class HelloWorldReadPipeRun
       } catch ( IOException | UIMAException multE ) {
          LOGGER.error( multE.getMessage() );
       }
-      // Log the IdentifiedAnnotation objects
-      LOGGER.info( "\n" + EntityCollector.getInstance().toString() );
    }
 
 

Modified: 
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt?rev=1764579&r1=1764578&r2=1764579&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
 (original)
+++ 
ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
 Thu Oct 13 04:27:23 2016
@@ -10,6 +10,3 @@ addDescription POSTagger
 
 // add the simple Hello World Annotator
 add org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator
-
-// Collect discovered entities for post-run information
-collectEntities


Reply via email to