Author: rec
Date: Mon Feb  6 13:22:23 2017
New Revision: 1781897

URL: http://svn.apache.org/viewvc?rev=1781897&view=rev
Log:
[UIMA-5298] Life-cycle methods not called on all components instantiated by 
uimaFIT
- Scope exceptions more broadly to ensure that life-cycle events are called in 
more cases
- Add more documentation on invocation of life-cycle methods via uimaFIT

Modified:
    
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/pipeline/SimplePipeline.java

Modified: 
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/pipeline/SimplePipeline.java
URL: 
http://svn.apache.org/viewvc/uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/pipeline/SimplePipeline.java?rev=1781897&r1=1781896&r2=1781897&view=diff
==============================================================================
--- 
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/pipeline/SimplePipeline.java
 (original)
+++ 
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/pipeline/SimplePipeline.java
 Mon Feb  6 13:22:23 2017
@@ -34,6 +34,9 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.cas.CAS;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.collection.CollectionReaderDescription;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.util.LifeCycleUtil;
 import org.apache.uima.jcas.JCas;
@@ -54,9 +57,11 @@ public final class SimplePipeline {
   /**
    * <p>
    * Run the CollectionReader and AnalysisEngines as a pipeline. After 
processing all CASes provided
-   * by the reader, the method calls {@link 
AnalysisEngine#collectionProcessComplete()
-   * collectionProcessComplete()} on the engines and {@link Resource#destroy() 
destroy()} on all
-   * engines.
+   * by the reader, the method calls the life-cycle methods
+   * ({@link AnalysisEngine#collectionProcessComplete() 
collectionProcessComplete()} on the engines
+   * and {@link Resource#destroy() destroy()}) on all engines. Note that the 
life-cycle methods are
+   * <b>NOT</b> called on the reader. As the reader was instantiated by the 
caller, it must also be
+   * managed (i.e. destroyed) the caller.
    * </p>
    * <p>
    * Note that with this method, external resources cannot be shared between 
the reader and the
@@ -76,19 +81,20 @@ public final class SimplePipeline {
    */
   public static void runPipeline(final CollectionReader reader,
           final AnalysisEngineDescription... descs) throws UIMAException, 
IOException {
-    // Create AAE
-    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);
-
-    // Instantiate AAE
-    final AnalysisEngine aae = createEngine(aaeDesc);
-
-    // Create CAS from merged metadata
-    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
-    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), 
aae.getMetaData()), 
-            null, resMgr);
-    reader.typeSystemInit(cas.getTypeSystem());
-
+    AnalysisEngine aae = null;
     try {
+      // Create AAE
+      final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);
+  
+      // Instantiate AAE
+      aae = createEngine(aaeDesc);
+  
+      // Create CAS from merged metadata
+      ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+      final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), 
aae.getMetaData()), 
+              null, resMgr);
+      reader.typeSystemInit(cas.getTypeSystem());
+
       // Process
       while (reader.hasNext()) {
         reader.getNext(cas);
@@ -114,6 +120,22 @@ public final class SimplePipeline {
    * <p>
    * External resources can be shared between the reader and the analysis 
engines.
    * </p>
+   * <p>
+   * This method is suitable for the batch-processing of sets of documents 
where the overheaded
+   * of instantiating the pipeline components does not significantly impact 
the overall runtime
+   * of the pipeline. If you need to avoid this overhead, e.g. because you 
wish to run a pipeline
+   * on individual documents, then you should not use this method. Instead, 
create a CAS using
+   * {@link JCasFactory}, create a reader instance using {@link 
CollectionReaderFactory#createReader},
+   * create an engine instance using {@link 
AnalysisEngineFactory#createEngine} and then use
+   * a loop to process the data, resetting the CAS after each step.
+   * </p>
+   * <pre><code>
+   *   while (reader.hasNext()) {
+   *     reader.getNext(cas);
+   *     engine.process(cas);
+   *     cas.reset();
+   *   }
+   * </code></pre>
    * 
    * @param readerDesc
    *          The CollectionReader that loads the documents into the CAS.
@@ -128,23 +150,26 @@ public final class SimplePipeline {
    */
   public static void runPipeline(final CollectionReaderDescription readerDesc,
           final AnalysisEngineDescription... descs) throws UIMAException, 
IOException {
-    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
     
-    // Create the components
-    final CollectionReader reader = 
UIMAFramework.produceCollectionReader(readerDesc, resMgr, null);
-
-    // Create AAE
-    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);
-
-    // Instantiate AAE
-    final AnalysisEngine aae = UIMAFramework.produceAnalysisEngine(aaeDesc, 
resMgr, null);
-
-    // Create CAS from merged metadata
-    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), 
aae.getMetaData()),
-            null, resMgr);
-    reader.typeSystemInit(cas.getTypeSystem());
-
+    CollectionReader reader = null;
+    AnalysisEngine aae = null;
     try {
+      ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+      
+      // Create the components
+      reader = UIMAFramework.produceCollectionReader(readerDesc, resMgr, null);
+  
+      // Create AAE
+      final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);
+  
+      // Instantiate AAE
+      aae = UIMAFramework.produceAnalysisEngine(aaeDesc, resMgr, null);
+  
+      // Create CAS from merged metadata
+      final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), 
aae.getMetaData()),
+              null, resMgr);
+      reader.typeSystemInit(cas.getTypeSystem());
+
       // Process
       while (reader.hasNext()) {
         reader.getNext(cas);
@@ -166,6 +191,9 @@ public final class SimplePipeline {
    * Provides a simple way to run a pipeline for a given collection reader and 
sequence of analysis
    * engines. After processing all CASes provided by the reader, the method 
calls
    * {@link AnalysisEngine#collectionProcessComplete() 
collectionProcessComplete()} on the engines.
+   * Note that {@link AnalysisEngine#destroy()} and {@link 
CollectionReader#destroy()} are
+   * <b>NOT</b> called. As the components were instantiated by the caller, 
they must also be managed
+   * (i.e. destroyed) the caller.
    * </p>
    * <p>
    * External resources can only be shared between the reader and/or the 
analysis engines if the
@@ -222,12 +250,14 @@ public final class SimplePipeline {
    */
   public static void runPipeline(final CAS aCas, final 
AnalysisEngineDescription... aDescs)
           throws ResourceInitializationException, 
AnalysisEngineProcessException {
-    // Create aggregate AE
-    final AnalysisEngineDescription aaeDesc = createEngineDescription(aDescs);
-
-    // Instantiate
-    final AnalysisEngine aae = createEngine(aaeDesc);
+    AnalysisEngine aae = null;
     try {
+      // Create aggregate AE
+      final AnalysisEngineDescription aaeDesc = 
createEngineDescription(aDescs);
+  
+      // Instantiate
+      aae = createEngine(aaeDesc);
+      
       // Process
       aae.process(aCas);
 


Reply via email to