OPENNLP-966: Remove deprecated UIMA trainers

Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/f0020c40
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/f0020c40
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/f0020c40

Branch: refs/heads/master
Commit: f0020c407098873fbd5a369de4863e6b9adc592b
Parents: 15939e9
Author: Jörn Kottmann <jo...@apache.org>
Authored: Thu Jan 26 23:05:44 2017 +0100
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Fri Jan 27 13:58:02 2017 +0100

----------------------------------------------------------------------
 opennlp-uima/descriptors/ChunkerTrainer.xml     | 143 ------
 .../descriptors/PersonNameFinderTrainer.xml     | 168 -------
 opennlp-uima/descriptors/PosTaggerTrainer.xml   | 116 -----
 .../descriptors/SentenceDetectorTrainer.xml     | 106 -----
 opennlp-uima/descriptors/TokenizerTrainer.xml   | 124 -----
 .../opennlp/uima/chunker/ChunkerTrainer.java    | 236 ----------
 .../uima/doccat/DocumentCategorizerTrainer.java | 162 -------
 .../uima/namefind/NameFinderTrainer.java        | 447 -------------------
 .../opennlp/uima/postag/POSTaggerTrainer.java   | 240 ----------
 .../sentdetect/SentenceDetectorTrainer.java     | 210 ---------
 .../opennlp/uima/tokenize/TokenizerTrainer.java | 294 ------------
 .../java/opennlp/uima/util/CasConsumerUtil.java | 411 -----------------
 .../opennlp/uima/util/SampleTraceStream.java    |  65 ---
 13 files changed, 2722 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/descriptors/ChunkerTrainer.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/descriptors/ChunkerTrainer.xml 
b/opennlp-uima/descriptors/ChunkerTrainer.xml
deleted file mode 100644
index fce9599..0000000
--- a/opennlp-uima/descriptors/ChunkerTrainer.xml
+++ /dev/null
@@ -1,143 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier";>
-       <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-       
<implementationName>opennlp.uima.chunker.ChunkerTrainer</implementationName>
-       <processingResourceMetaData>
-               <name>POS Trainer</name>
-               <description></description>
-               <version>${pom.version}</version>
-               <vendor>Apache Software Foundation</vendor>
-               <configurationParameters>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.ModelName</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.SentenceType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.TokenType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.POSFeature</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.Language</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.ChunkType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-       
-                       <configurationParameter>
-                               <name>opennlp.uima.ChunkTagFeature</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>       
-               </configurationParameters>
-
-               <configurationParameterSettings>
-                       <nameValuePair>
-                               <name>opennlp.uima.ModelName</name>
-                               <value>
-                                       <string>POS.bin</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.TokenType</name>
-                               <value>
-                                       <string>opennlp.uima.Token</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.SentenceType</name>
-                               <value>
-                                       <string>opennlp.uima.Sentence</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.POSFeature</name>
-                               <value>
-                                       <string>pos</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.Language</name>
-                               <value>
-                                       <string>en</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.ChunkType</name>
-                               <value>
-                                       <string>opennlp.uima.Chunk</string>
-                               </value>
-                       </nameValuePair>
-                       
-                       <nameValuePair>
-                               <name>opennlp.uima.ChunkTagFeature</name>
-                               <value>
-                                       <string>chunkType</string>
-                               </value>
-                       </nameValuePair>
-                       
-               </configurationParameterSettings>
-               
-               <typeSystemDescription />
-               <typePriorities />
-               <fsIndexCollection />
-               <capabilities />
-               <operationalProperties>
-                       <modifiesCas>false</modifiesCas>
-                       
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-               </operationalProperties>
-       </processingResourceMetaData>
-</casConsumerDescription>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/descriptors/PersonNameFinderTrainer.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/descriptors/PersonNameFinderTrainer.xml 
b/opennlp-uima/descriptors/PersonNameFinderTrainer.xml
deleted file mode 100644
index a7f1f8c..0000000
--- a/opennlp-uima/descriptors/PersonNameFinderTrainer.xml
+++ /dev/null
@@ -1,168 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier";>
-       <frameworkImplementation>org.apache.uima.java
-       </frameworkImplementation>
-       
<implementationName>opennlp.uima.namefind.NameFinderTrainer</implementationName>
-       <processingResourceMetaData>
-               <name>Person Name Finder Trainer</name>
-               <description></description>
-               <version>${pom.version}</version>
-               <vendor>Apache Software Foundation</vendor>
-               <configurationParameters>
-                       <configurationParameter>
-                               <name>opennlp.uima.ModelName</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.SentenceType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.TokenType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.NameType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.TrainingParamsFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               
<name>opennlp.uima.AdditionalTrainingDataFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               
<name>opennlp.uima.AdditionalTrainingDataEncoding</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.SampleTraceFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               
<name>opennlp.uima.SampleTraceFileEncoding</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.FeatureGeneratorFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               
<name>opennlp.uima.FeatureGeneratorResources</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.Language</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-               </configurationParameters>
-
-               <configurationParameterSettings>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.ModelName</name>
-                               <value>
-                                       <string>Person.bin</string>
-                               </value>
-                       </nameValuePair>
-                       
-                       <nameValuePair>
-                               <name>opennlp.uima.TokenType</name>
-                               <value>
-                                       <string>opennlp.uima.Token</string>
-                               </value>
-                       </nameValuePair>
-                       
-                       <nameValuePair>
-                               <name>opennlp.uima.SentenceType</name>
-                               <value>
-                                       <string>uima.tcas.DocumentAnnotation
-                                       </string>
-                               </value>
-                       </nameValuePair>
-                       <nameValuePair>
-                               <name>opennlp.uima.NameType</name>
-                               <value>
-                                       <string>opennlp.uima.Person</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.Language</name>
-                               <value>
-                                       <string>en</string>
-                               </value>
-                       </nameValuePair>
-
-               </configurationParameterSettings>
-               <typeSystemDescription>
-                       <imports>
-                               <import location="TypeSystem.xml" />
-                       </imports>
-               </typeSystemDescription>
-               <typePriorities />
-               <fsIndexCollection />
-               <capabilities />
-               <operationalProperties>
-                       <modifiesCas>false</modifiesCas>
-                       
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-               </operationalProperties>
-       </processingResourceMetaData>
-</casConsumerDescription>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/descriptors/PosTaggerTrainer.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/descriptors/PosTaggerTrainer.xml 
b/opennlp-uima/descriptors/PosTaggerTrainer.xml
deleted file mode 100644
index 325c76e..0000000
--- a/opennlp-uima/descriptors/PosTaggerTrainer.xml
+++ /dev/null
@@ -1,116 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier";>
-       <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-       
<implementationName>opennlp.uima.postag.POSTaggerTrainer</implementationName>
-       <processingResourceMetaData>
-               <name>POS Trainer</name>
-               <description></description>
-               <version>${pom.version}</version>
-               <vendor>Apache Software Foundation</vendor>
-               <configurationParameters>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.ModelName</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.SentenceType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.TokenType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.POSFeature</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.Language</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-               </configurationParameters>
-
-               <configurationParameterSettings>
-                       <nameValuePair>
-                               <name>opennlp.uima.ModelName</name>
-                               <value>
-                                       <string>POS.bin</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.TokenType</name>
-                               <value>
-                                       <string>opennlp.uima.Token</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.SentenceType</name>
-                               <value>
-                                       <string>opennlp.uima.Sentence</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.POSFeature</name>
-                               <value>
-                                       <string>pos</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.Language</name>
-                               <value>
-                                       <string>en</string>
-                               </value>
-                       </nameValuePair>
-
-               </configurationParameterSettings>
-               
-               <typeSystemDescription />
-               <typePriorities />
-               <fsIndexCollection />
-               <capabilities />
-               <operationalProperties>
-                       <modifiesCas>false</modifiesCas>
-                       
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-               </operationalProperties>
-       </processingResourceMetaData>
-</casConsumerDescription>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/descriptors/SentenceDetectorTrainer.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/descriptors/SentenceDetectorTrainer.xml 
b/opennlp-uima/descriptors/SentenceDetectorTrainer.xml
deleted file mode 100644
index 1db008f..0000000
--- a/opennlp-uima/descriptors/SentenceDetectorTrainer.xml
+++ /dev/null
@@ -1,106 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier";>
-       <frameworkImplementation>org.apache.uima.java
-       </frameworkImplementation>
-       
<implementationName>opennlp.uima.sentdetect.SentenceDetectorTrainer</implementationName>
-       <processingResourceMetaData>
-               <name>Sentence Detector Trainer</name>
-               <description></description>
-               <version>${pom.version}</version>
-               <vendor>Apache Software Foundation</vendor>
-
-               <configurationParameters>
-                       <configurationParameter>
-                               <name>opennlp.uima.ModelName</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.SentenceType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-
-                       <configurationParameter>
-                               <name>opennlp.uima.Language</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       
-                       <configurationParameter>
-                               <name>opennlp.uima.EOSChars</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.SampleTraceFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               
<name>opennlp.uima.SampleTraceFileEncoding</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>                       
-               </configurationParameters>
-
-               <configurationParameterSettings>
-                       <nameValuePair>
-                               <name>opennlp.uima.ModelName</name>
-                               <value>
-                                       <string>SentDetect.bin</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.SentenceType</name>
-                               <value>
-                                       <string>opennlp.uima.Sentence</string>
-                               </value>
-                       </nameValuePair>
-
-                       <nameValuePair>
-                               <name>opennlp.uima.Language</name>
-                               <value>
-                                       <string>en</string>
-                               </value>
-                       </nameValuePair>
-
-               </configurationParameterSettings>
-               <typeSystemDescription />
-               <typePriorities />
-               <fsIndexCollection />
-               <capabilities />
-               <operationalProperties>
-                       <modifiesCas>false</modifiesCas>
-                       
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-               </operationalProperties>
-       </processingResourceMetaData>
-</casConsumerDescription>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/descriptors/TokenizerTrainer.xml
----------------------------------------------------------------------
diff --git a/opennlp-uima/descriptors/TokenizerTrainer.xml 
b/opennlp-uima/descriptors/TokenizerTrainer.xml
deleted file mode 100644
index 654f3df..0000000
--- a/opennlp-uima/descriptors/TokenizerTrainer.xml
+++ /dev/null
@@ -1,124 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier";>
-       <frameworkImplementation>org.apache.uima.java
-       </frameworkImplementation>
-       <implementationName>opennlp.uima.tokenize.TokenizerTrainer
-       </implementationName>
-       <processingResourceMetaData>
-               <name>TokenizerTrainer</name>
-               <description></description>
-               <version>${pom.version}</version>
-               <vendor>Apache Software Foundation</vendor>
-               <configurationParameters>
-                       <configurationParameter>
-                               <name>opennlp.uima.ModelName</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.SentenceType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.TokenType</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.tokenizer.IsSkipAlphaNumerics
-                               </name>
-                               <type>Boolean</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.Language</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>true</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               <name>opennlp.uima.SampleTraceFile</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>
-                       <configurationParameter>
-                               
<name>opennlp.uima.SampleTraceFileEncoding</name>
-                               <type>String</type>
-                               <multiValued>false</multiValued>
-                               <mandatory>false</mandatory>
-                       </configurationParameter>                       
-               </configurationParameters>
-               <configurationParameterSettings>
-                       <nameValuePair>
-                               <name>opennlp.uima.ModelName</name>
-                               <value>
-                                       <string>Tokens.bin</string>
-                               </value>
-                       </nameValuePair>
-                       <nameValuePair>
-                               <name>opennlp.uima.TokenType</name>
-                               <value>
-                                       <string>opennlp.uima.Token</string>
-                               </value>
-                       </nameValuePair>
-                       <nameValuePair>
-                               <name>opennlp.uima.SentenceType</name>
-                               <value>
-                                       <string>uima.tcas.DocumentAnnotation
-                                       </string>
-                               </value>
-                       </nameValuePair>
-                       <nameValuePair>
-                               <name>opennlp.uima.tokenizer.IsSkipAlphaNumerics
-                               </name>
-                               <value>
-                                       <boolean>false</boolean>
-                               </value>
-                       </nameValuePair>
-                       <nameValuePair>
-                               <name>opennlp.uima.Language</name>
-                               <value>
-                                       <string>en</string>
-                               </value>
-                       </nameValuePair>
-               </configurationParameterSettings>
-               <typeSystemDescription>
-                       <imports>
-                               <import location="TypeSystem.xml" />
-                       </imports>
-               </typeSystemDescription>
-               <typePriorities />
-               <fsIndexCollection />
-               <capabilities />
-               <operationalProperties>
-                       <modifiesCas>false</modifiesCas>
-                       
<multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-               </operationalProperties>
-       </processingResourceMetaData>
-</casConsumerDescription>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
deleted file mode 100644
index 30b3f2f..0000000
--- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.chunker;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.chunker.ChunkSample;
-import opennlp.tools.chunker.ChunkerFactory;
-import opennlp.tools.chunker.ChunkerME;
-import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.model.ModelUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP Chunker trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <caption></caption>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the 
model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of 
the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of 
the token type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.POSFeature</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.ChunkType</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.ChunkTagFeature</td></tr>
- * </table>
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-public class ChunkerTrainer extends CasConsumer_ImplBase {
-
-  private List<ChunkSample> mChunkSamples = new ArrayList<>();
-
-  private UimaContext mContext;
-
-  private String mModelName;
-
-  private Type mSentenceType;
-
-  private Type mTokenType;
-
-  private Feature mPOSFeature;
-
-  private Type mChunkType;
-
-  private Feature mChunkTagFeature;
-
-  private String language;
-
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    mContext = getUimaContext();
-
-    Logger mLogger = mContext.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker Trainer.");
-    }
-
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-  }
-
-  /**
-   * Initialize the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-    String sentenceTypeName =
-        CasConsumerUtil.getRequiredStringParameter(mContext,
-            UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
-    String chunkTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        Chunker.CHUNK_TYPE_PARAMETER);
-
-    mChunkType = CasConsumerUtil.getType(typeSystem, chunkTypeName);
-
-    String chunkTagFeature = CasConsumerUtil.getRequiredStringParameter(
-        mContext, Chunker.CHUNK_TAG_FEATURE_PARAMETER);
-
-    mChunkTagFeature = mChunkType.getFeatureByBaseName(chunkTagFeature);
-
-    CasConsumerUtil.checkFeatureType(mChunkTagFeature, CAS.TYPE_NAME_STRING);
-
-    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-
-    mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
-    String posFeatureName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.POS_FEATURE_PARAMETER);
-
-    mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
-
-    CasConsumerUtil.checkFeatureType(mPOSFeature, CAS.TYPE_NAME_STRING);
-  }
-
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceIndex = 
cas.getAnnotationIndex(mSentenceType);
-
-    for (AnnotationFS sentenceAnnotation : sentenceIndex) {
-      processSentence(cas, sentenceAnnotation);
-    }
-  }
-
-  private void processSentence(CAS tcas, AnnotationFS sentence) {
-    FSIndex<AnnotationFS> chunkIndex = tcas.getAnnotationIndex(mChunkType);
-
-    ContainingConstraint containingConstraint =
-        new ContainingConstraint(sentence);
-
-    Iterator<AnnotationFS> chunkIterator = tcas.createFilteredIterator(
-        chunkIndex.iterator(), containingConstraint);
-
-    while (chunkIterator.hasNext()) {
-      AnnotationFS chunkAnnotation = chunkIterator.next();
-      processChunk(tcas, chunkAnnotation);
-    }
-  }
-
-  private void processChunk(CAS tcas, AnnotationFS chunk) {
-
-    String chunkTag = chunk.getFeatureValueAsString(mChunkTagFeature);
-
-    FSIndex<AnnotationFS> tokenIndex = tcas.getAnnotationIndex(mTokenType);
-
-    ContainingConstraint containingConstraint =
-        new ContainingConstraint(chunk);
-
-    Iterator<AnnotationFS> tokenIterator = 
tcas.createFilteredIterator(tokenIndex.iterator(),
-        containingConstraint);
-
-    List<String> tokens = new ArrayList<>();
-    List<String> tags = new ArrayList<>();
-    List<String> chunkTags = new ArrayList<>();
-
-    while (tokenIterator.hasNext()) {
-      AnnotationFS tokenAnnotation = tokenIterator.next();
-
-      tokens.add(tokenAnnotation.getCoveredText().trim());
-      tags.add(tokenAnnotation.getFeatureValueAsString(mPOSFeature));
-      chunkTags.add(chunkTag);
-    }
-
-    mChunkSamples.add(new ChunkSample(tokens, tags, chunkTags));
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-    GIS.PRINT_MESSAGES = false;
-
-    ChunkerModel chunkerModel = ChunkerME.train(language,
-        ObjectStreamUtils.createObjectStream(mChunkSamples),
-        ModelUtil.createDefaultTrainingParameters(), 
ChunkerFactory.create(null));
-
-    // dereference to allow garbage collection
-    mChunkSamples = null;
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(chunkerModel, modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    mChunkSamples = null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
 
b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
deleted file mode 100644
index ca788d7..0000000
--- 
a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.doccat;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.doccat.DoccatFactory;
-import opennlp.tools.doccat.DoccatModel;
-import opennlp.tools.doccat.DocumentCategorizerME;
-import opennlp.tools.doccat.DocumentSample;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP NameFinder trainer.
- * <p>
- * Note: This class is still work in progress, and should not be used!
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-
-public class DocumentCategorizerTrainer extends CasConsumer_ImplBase {
-
-  private UimaContext mContext;
-
-  private String mModelName;
-
-  private List<DocumentSample> documentSamples = new ArrayList<>();
-
-  private Type mCategoryType;
-
-  private Feature mCategoryFeature;
-
-  private String language;
-
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    mContext = getUimaContext();
-
-    Logger mLogger = mContext.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP Doccat Trainer.");
-    }
-
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-  }
-
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-
-    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    Type mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
-    String categoryTypeName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        "opennlp.uima.doccat.CategoryType");
-
-    mCategoryType = CasConsumerUtil.getType(typeSystem, categoryTypeName);
-
-    // get feature name
-    String categoryFeatureName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        "opennlp.uima.doccat.CategoryFeature");
-
-    mCategoryFeature = mCategoryType.getFeatureByBaseName(categoryFeatureName);
-  }
-
-  public void processCas(CAS cas) throws ResourceProcessException {
-
-    FSIndex categoryIndex = cas.getAnnotationIndex(mCategoryType);
-
-    if (categoryIndex.size() > 0) {
-      AnnotationFS categoryAnnotation =
-          (AnnotationFS) categoryIndex.iterator().next();
-
-      // add to event collection
-
-      DocumentSample sample = new DocumentSample(
-          categoryAnnotation.getStringValue(mCategoryFeature),
-          cas.getDocumentText());
-
-      documentSamples.add(sample);
-    }
-  }
-
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-
-    GIS.PRINT_MESSAGES = false;
-
-    TrainingParameters params = new TrainingParameters();
-    params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
-    params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
-
-    DoccatModel categoryModel = DocumentCategorizerME.train(language,
-        ObjectStreamUtils.createObjectStream(documentSamples), params, new 
DoccatFactory());
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(categoryModel, modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Destroys the current instance.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    documentSamples = null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
deleted file mode 100644
index af00f58..0000000
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.namefind;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.namefind.BioCodec;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.NameSample;
-import opennlp.tools.namefind.NameSampleDataStream;
-import opennlp.tools.namefind.TokenNameFinderFactory;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.util.InputStreamFactory;
-import opennlp.tools.util.MarkableFileInputStreamFactory;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.SampleTraceStream;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP NameFinder trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <caption></caption>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the 
model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.Language</td> <td>The language 
code</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name 
of the sentence type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of 
the token type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.NameType</td> <td>The full name of 
the name type</td></tr>
- *  </table>
- *
- * Optional parameters
- * <table border=1>
- *   <caption></caption>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.opennlp.uima.TrainingParamsFile</td>
- *   <td>Training Parameters Properties file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.FeatureGeneratorFile</td>
- *   <td>Feature Generator definition file which contain the feature generator 
configuration</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.FeatureGeneratorResources</td>
- *   <td>Feature Generator resources dictionary</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.AdditionalTrainingDataFile</td>
- *   <td>Training file which contains additional data in the OpenNLP 
format</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.AdditionalTrainingDataEncoding</td>
- *   <td>Encoding of the additional training data</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SampleTraceFile</td>
- *   <td>All training samples are traced to this file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SampleTraceFileEncoding</td>
- *   <td>Encoding of the sample trace file</td></tr>
- * </table>
- * <p>
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-
-public final class NameFinderTrainer extends CasConsumer_ImplBase {
-
-  private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER =
-      "opennlp.uima.FeatureGeneratorFile";
-  private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER =
-      "opennlp.uima.FeatureGeneratorResources";
-
-  private Logger logger;
-
-  private String modelPath;
-
-  private byte featureGeneratorDefinition[];
-
-  private File featureGeneratorResourceDir;
-
-  private String additionalTrainingDataFile;
-
-  private String additionalTrainingDataEncoding;
-
-  private File sampleTraceFile = null;
-
-  private String sampleTraceFileEncoding = null;
-
-  private Type sentenceType;
-
-  private Type tokenType;
-
-  private Type nameType;
-
-  private String language;
-
-  // TODO: Keeping all events in memory limits the size of the training corpus
-  // Possible solutions:
-  // - Write all events to disk
-  // - Directly start indexing with a blocking sample stream, the indexer will 
then write everything
-  //   to disk or could store the events much more space efficient in memory
-
-  private List<NameSample> nameFinderSamples = new ArrayList<>();
-  private TrainingParameters trainingParams;
-
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    logger = getUimaContext().getLogger();
-
-    if (logger.isLoggable(Level.INFO)) {
-      logger.log(Level.INFO, "Initializing the OpenNLP Name Trainer.");
-    }
-
-    modelPath = CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
-        UimaUtil.LANGUAGE_PARAMETER);
-
-    trainingParams = 
OpennlpUtil.loadTrainingParams(CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), UimaUtil.TRAINING_PARAMS_FILE_PARAMETER), true);
-
-    String featureGeneratorDefinitionFile = 
CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER);
-
-    if (featureGeneratorDefinitionFile != null) {
-      try {
-        featureGeneratorDefinition = OpennlpUtil.loadBytes(new 
File(featureGeneratorDefinitionFile));
-      } catch (IOException e) {
-        throw new ResourceInitializationException(e);
-      }
-
-      String featureGeneratorResourcesDirName = 
CasConsumerUtil.getOptionalStringParameter(
-          getUimaContext(), FEATURE_GENERATOR_RESOURCES_PARAMETER);
-
-      if (featureGeneratorResourcesDirName != null) {
-        featureGeneratorResourceDir = new 
File(featureGeneratorResourcesDirName);
-      }
-    }
-
-    additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE);
-
-    // If the additional training data is specified, the encoding must be 
provided!
-    if (additionalTrainingDataFile != null) {
-      additionalTrainingDataEncoding = 
CasConsumerUtil.getRequiredStringParameter(
-          getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING);
-    }
-
-    String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), "opennlp.uima.SampleTraceFile");
-
-    if (sampleTraceFileName != null) {
-      sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
-          .getDataPath() + File.separatorChar + sampleTraceFileName);
-      sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter(
-          getUimaContext(), "opennlp.uima.SampleTraceFileEncoding");
-    }
-  }
-
-  /**
-   * Initialize the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-
-    String sentenceTypeName =
-        CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    sentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
-    String tokenTypeName = 
CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-
-    tokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
-    String nameTypeName = 
CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
-        NameFinder.NAME_TYPE_PARAMETER);
-
-    nameType = CasConsumerUtil.getType(typeSystem, nameTypeName);
-  }
-
-  /**
-   * Creates a {@link List} from an {@link Iterator}.
-   *
-   * @param <T>
-   * @param it
-   * @return
-   */
-  private static <T> List<T> iteratorToList(Iterator<T> it) {
-    List<T> list = new LinkedList<>();
-
-    while (it.hasNext()) {
-      list.add(it.next());
-    }
-
-    return list;
-  }
-
-  private static boolean isContaining(AnnotationFS annotation,
-      AnnotationFS containtedAnnotation) {
-    boolean isStartContaining = annotation.getBegin() <= 
containtedAnnotation.getBegin();
-    return isStartContaining && annotation.getEnd() >= 
containtedAnnotation.getEnd();
-
-  }
-
-  /**
-   * Creates the name spans out of a list of token annotations and a list of 
entity annotations.
-   * <p>
-   * The name spans for the name finder use a token index and not on a 
character index which
-   * is used by the entity annotations.
-   *
-   * @param tokenList
-   * @param entityAnnotations
-   * @return
-   */
-  private static Span[] createNames(List<AnnotationFS> tokenList, 
List<AnnotationFS> entityAnnotations) {
-
-    List<Span> nameList = new LinkedList<>();
-
-    AnnotationFS currentEntity = null;
-
-    int startIndex = -1;
-    int index = 0;
-    for (AnnotationFS token : tokenList) {
-      for (AnnotationFS entity : entityAnnotations) {
-
-        if (!isContaining(entity, token)) {
-          // ... end of an entity
-          if (currentEntity == entity) {
-            nameList.add(new Span(startIndex, index));
-
-            startIndex = -1;
-            currentEntity = null;
-            // break;
-          } else {
-            continue;
-          }
-        }
-
-        // is this token start of new entity
-        if (currentEntity == null && isContaining(entity, token)) {
-          startIndex = index;
-
-          currentEntity = entity;
-        }
-      }
-
-      index++;
-    }
-
-    if (currentEntity != null) {
-      Span name = new Span(startIndex, index);
-      nameList.add(name);
-    }
-
-    return nameList.toArray(new Span[nameList.size()]);
-  }
-
-  /*
-   * Process the given CAS object.
-   */
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-    FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(sentenceType);
-
-    boolean isClearAdaptiveData = true;
-
-    for (AnnotationFS sentenceAnnotation : sentenceIndex) {
-      ContainingConstraint sentenceContainingConstraint = new 
ContainingConstraint(
-          sentenceAnnotation);
-
-      FSIndex<AnnotationFS> tokenAnnotations = 
cas.getAnnotationIndex(tokenType);
-
-      Iterator<AnnotationFS> containingTokens = 
cas.createFilteredIterator(tokenAnnotations
-          .iterator(), sentenceContainingConstraint);
-
-      FSIndex<AnnotationFS> allNames = cas.getAnnotationIndex(nameType);
-
-      Iterator<AnnotationFS> containingNames = 
cas.createFilteredIterator(allNames.iterator(),
-          sentenceContainingConstraint);
-
-      List<AnnotationFS> tokenList = iteratorToList(containingTokens);
-
-      Span names[] = createNames(tokenList, iteratorToList(containingNames));
-
-      // create token array
-      String tokenArray[] = new String[tokenList.size()];
-
-      for (int i = 0; i < tokenArray.length; i++) {
-        tokenArray[i] = tokenList.get(i).getCoveredText();
-      }
-
-      NameSample trainingSentence = new NameSample(tokenArray, names, null, 
isClearAdaptiveData);
-
-      if (trainingSentence.getSentence().length != 0) {
-        nameFinderSamples.add(trainingSentence);
-
-        if (isClearAdaptiveData) {
-          isClearAdaptiveData = false;
-        }
-      } else {
-        if (logger.isLoggable(Level.INFO)) {
-          logger.log(Level.INFO, "Sentence without tokens: " +
-              sentenceAnnotation.getCoveredText());
-        }
-      }
-    }
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-
-    if (logger.isLoggable(Level.INFO)) {
-      logger.log(Level.INFO, "Collected " + nameFinderSamples.size() +
-          " name samples.");
-    }
-
-    GIS.PRINT_MESSAGES = false;
-
-    // create training stream ...
-    ObjectStream<NameSample> samples = 
ObjectStreamUtils.createObjectStream(nameFinderSamples);
-
-    Writer samplesOut = null;
-    TokenNameFinderModel nameModel;
-    try {
-      if (additionalTrainingDataFile != null) {
-
-        if (logger.isLoggable(Level.INFO)) {
-          logger.log(Level.INFO, "Using additional training data file: " + 
additionalTrainingDataFile);
-        }
-
-        InputStreamFactory additionalTrainingDataIn = new 
MarkableFileInputStreamFactory(
-            new File(additionalTrainingDataFile));
-        Charset additionalTrainingDataCharset = Charset
-            .forName(additionalTrainingDataEncoding);
-
-        ObjectStream<NameSample> additionalSamples = new NameSampleDataStream(
-            new PlainTextByLineStream(additionalTrainingDataIn,
-                additionalTrainingDataCharset));
-
-        samples = ObjectStreamUtils.createObjectStream(samples, 
additionalSamples);
-      }
-
-      if (sampleTraceFile != null) {
-        samplesOut = new OutputStreamWriter(new 
FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
-        samples = new SampleTraceStream<>(samples, samplesOut);
-      }
-
-      Map<String, Object> resourceMap;
-
-      if (featureGeneratorResourceDir != null) {
-        resourceMap = 
TokenNameFinderTrainerTool.loadResources(featureGeneratorResourceDir, null);
-      }
-      else {
-        resourceMap = Collections.emptyMap();
-      }
-
-      nameModel = NameFinderME.train(language, null, samples, trainingParams,
-          new TokenNameFinderFactory(featureGeneratorDefinition, resourceMap, 
new BioCodec()));
-    }
-    finally {
-
-      if (samplesOut != null) {
-        samplesOut.close();
-      }
-    }
-
-    // dereference to allow garbage collection
-    nameFinderSamples = null;
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + modelPath);
-
-    OpennlpUtil.serialize(nameModel, modelFile);
-
-    if (logger.isLoggable(Level.INFO)) {
-      logger.log(Level.INFO, "Model was written to: " + 
modelFile.getAbsolutePath());
-    }
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Destroys the current instance.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    nameFinderSamples = null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
deleted file mode 100644
index be7651e..0000000
--- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.postag;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.postag.POSDictionary;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerFactory;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP POSTagger trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <caption></caption>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the 
model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of 
the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of 
the token type</td></tr>
- * <tr><td>String</td> <td>pennlp.uima.POSFeature</td> <td>The name of the 
token pos feature,
- * the feature must be of type String</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TagDictionaryName</td></tr>
- * </table>
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-
-public class POSTaggerTrainer extends CasConsumer_ImplBase {
-
-  public static final String TAG_DICTIONARY_NAME = 
"opennlp.uima.TagDictionaryName";
-
-  private UimaContext mContext;
-
-  private Type mSentenceType;
-
-  private Type mTokenType;
-
-  private String mModelName;
-
-  private Feature mPOSFeature;
-
-  private Logger mLogger;
-
-  private List<POSSample> mPOSSamples = new ArrayList<>();
-
-  private String language;
-
-  private POSDictionary tagDictionary;
-
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    mContext = getUimaContext();
-
-    mLogger = mContext.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP " +
-          "POSTagger trainer.");
-    }
-
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-
-    String tagDictionaryName = 
CasConsumerUtil.getOptionalStringParameter(mContext,
-        TAG_DICTIONARY_NAME);
-
-    if (tagDictionaryName != null) {
-      try (InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, 
tagDictionaryName)) {
-        tagDictionary = POSDictionary.create(dictIn);
-      } catch (final IOException e) {
-        // if this fails just print error message and continue
-        final String message = "IOException during tag dictionary reading, "
-            + "running without tag dictionary: " + e.getMessage();
-
-        if (this.mLogger.isLoggable(Level.WARNING)) {
-          this.mLogger.log(Level.WARNING, message);
-        }
-      }
-    }
-  }
-
-  /**
-   * Initialize the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-    String sentenceTypeName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
-          sentenceTypeName);
-    }
-
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
-    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-
-    mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
-    String posFeatureName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.POS_FEATURE_PARAMETER);
-
-    mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
-  }
-
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceAnnotations = 
cas.getAnnotationIndex(mSentenceType);
-
-    for (AnnotationFS sentence : sentenceAnnotations) {
-      process(cas, sentence);
-    }
-  }
-
-  private void process(CAS tcas, AnnotationFS sentence) {
-
-    FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
-
-    ContainingConstraint containingConstraint =
-        new ContainingConstraint(sentence);
-
-    List<String> tokens = new ArrayList<>();
-    List<String> tags = new ArrayList<>();
-
-    Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
-        allTokens.iterator(), containingConstraint);
-
-    while (containingTokens.hasNext()) {
-
-      AnnotationFS tokenAnnotation = containingTokens.next();
-
-      String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
-
-      tokens.add(tokenAnnotation.getCoveredText().trim());
-      tags.add(tag);
-    }
-
-    mPOSSamples.add(new POSSample(tokens, tags));
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-
-    GIS.PRINT_MESSAGES = false;
-
-    TrainingParameters params = new TrainingParameters();
-    params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
-    params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5));
-
-    POSModel posTaggerModel = POSTaggerME.train(language,
-        ObjectStreamUtils.createObjectStream(mPOSSamples),
-        params, new POSTaggerFactory(null, tagDictionary));
-
-    // dereference to allow garbage collection
-    mPOSSamples = null;
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(posTaggerModel, modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    mPOSSamples = null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
 
b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
deleted file mode 100644
index 99600b8..0000000
--- 
a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.sentdetect;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.sentdetect.SentenceDetectorFactory;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.TrainingParameters;
-import opennlp.tools.util.model.ModelUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.SampleTraceStream;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP SentenceDetector trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <caption></caption>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the 
model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of 
the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.EOSChars</td>
- * <td>A string containing end-of-sentence characters</td></tr>
- * </table>
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
-
-  private List<SentenceSample> sentenceSamples = new ArrayList<>();
-
-  private Type mSentenceType;
-
-  private String mModelName;
-
-  private String language = "en";
-
-  private UimaContext mContext;
-
-  private String eosChars;
-
-  private File sampleTraceFile;
-
-  private String sampleTraceFileEncoding;
-
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    mContext = getUimaContext();
-
-    Logger mLogger = mContext.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
-          "trainer.");
-    }
-
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-
-    eosChars = CasConsumerUtil.getOptionalStringParameter(mContext, 
"opennlp.uima.EOSChars");
-
-
-    String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), "opennlp.uima.SampleTraceFile");
-
-    if (sampleTraceFileName != null) {
-      sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
-          .getDataPath() + File.separatorChar + sampleTraceFileName);
-      sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter(
-          getUimaContext(), "opennlp.uima.SampleTraceFileEncoding");
-    }
-  }
-
-  /**
-   * Initializes the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-
-    String sentenceTypeName =
-        CasConsumerUtil.getRequiredStringParameter(mContext,
-            UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-  }
-
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceIndex = 
cas.getAnnotationIndex(mSentenceType);
-
-    Span[] sentSpans = new Span[sentenceIndex.size()];
-
-    int i = 0;
-    for (AnnotationFS sentenceAnnotation : sentenceIndex) {
-      sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), 
sentenceAnnotation.getEnd());
-    }
-
-    // TODO: The line cleaning should be done more carefully
-    sentenceSamples.add(new SentenceSample(cas.getDocumentText().replace('\n', 
' '), sentSpans));
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-    GIS.PRINT_MESSAGES = false;
-
-    char eos[] = null;
-    if (eosChars != null) {
-      eos = eosChars.toCharArray();
-    }
-
-    SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
-        null, language, true, null, eos);
-
-    // TrainingParameters mlParams = ModelUtil.createTrainingParameters(100, 
5);
-    TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters();
-    ObjectStream<SentenceSample> samples = 
ObjectStreamUtils.createObjectStream(sentenceSamples);
-
-    Writer samplesOut;
-
-    if (sampleTraceFile != null) {
-      samplesOut = new OutputStreamWriter(new 
FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
-      samples = new SampleTraceStream<>(samples, samplesOut);
-    }
-
-    SentenceModel sentenceModel = SentenceDetectorME.train(language, samples,
-        sdFactory, mlParams);
-
-    // dereference to allow garbage collection
-    sentenceSamples = null;
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(sentenceModel, modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    sentenceSamples = null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/f0020c40/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
deleted file mode 100644
index 35f24a2..0000000
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.tokenize;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.tokenize.TokenSample;
-import opennlp.tools.tokenize.TokenSampleStream;
-import opennlp.tools.tokenize.TokenizerFactory;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.InputStreamFactory;
-import opennlp.tools.util.MarkableFileInputStreamFactory;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.model.ModelUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.SampleTraceStream;
-import opennlp.uima.util.UimaUtil;
-
-/**
- * OpenNLP Tokenizer trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <caption></caption>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the 
model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of 
the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of 
the token type</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- * <caption></caption>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>Boolean</td> 
<td>opennlp.uima.tokenizer.IsSkipAlphaNumerics</td></tr>
- * </table>
- *
- * @deprecated will be removed after 1.7.1 release, there is no replacement
- */
-@Deprecated
-
-public final class TokenizerTrainer extends CasConsumer_ImplBase {
-
-  private static final String IS_ALPHA_NUMERIC_OPTIMIZATION =
-      "opennlp.uima.tokenizer.IsAlphaNumericOptimization";
-
-  private List<TokenSample> tokenSamples = new ArrayList<>();
-
-  private UimaContext mContext;
-
-  private Type mSentenceType;
-
-  private Type mTokenType;
-
-  private String mModelName;
-
-  private String additionalTrainingDataFile;
-
-  private String additionalTrainingDataEncoding;
-
-  private String language;
-
-  private Boolean isSkipAlphaNumerics;
-
-  private Logger mLogger;
-
-  private String sampleTraceFileEncoding;
-
-  private File sampleTraceFile;
-
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-
-    super.initialize();
-
-    mContext = getUimaContext();
-
-    mLogger = mContext.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP Tokenizer trainer.");
-    }
-
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-
-    isSkipAlphaNumerics =
-        CasConsumerUtil.getOptionalBooleanParameter(
-            mContext, IS_ALPHA_NUMERIC_OPTIMIZATION);
-
-    if (isSkipAlphaNumerics == null) {
-      isSkipAlphaNumerics = false;
-    }
-
-    additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE);
-
-    // If the additional training data is specified, the encoding must be 
provided!
-    if (additionalTrainingDataFile != null) {
-      additionalTrainingDataEncoding = 
CasConsumerUtil.getRequiredStringParameter(
-          getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING);
-    }
-
-    String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
-        getUimaContext(), "opennlp.uima.SampleTraceFile");
-
-    if (sampleTraceFileName != null) {
-      sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
-          .getDataPath() + File.separatorChar + sampleTraceFileName);
-      sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter(
-          getUimaContext(), "opennlp.uima.SampleTraceFileEncoding");
-    }
-  }
-
-  /**
-   * Initialize the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-
-    String sentenceTypeName = 
CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
-    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-
-    mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-  }
-
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceAnnotations = 
cas.getAnnotationIndex(mSentenceType);
-
-    for (AnnotationFS sentence : sentenceAnnotations) {
-      process(cas, sentence);
-    }
-  }
-
-  private void process(CAS tcas, AnnotationFS sentence) {
-    FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
-
-    ContainingConstraint containingConstraint =
-        new ContainingConstraint(sentence);
-
-    Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
-        allTokens.iterator(), containingConstraint);
-
-    List<Span> openNLPSpans = new LinkedList<>();
-
-    while (containingTokens.hasNext()) {
-      AnnotationFS tokenAnnotation = containingTokens.next();
-
-      openNLPSpans.add(new Span(tokenAnnotation.getBegin()
-          - sentence.getBegin(), tokenAnnotation.getEnd()
-          - sentence.getBegin()));
-    }
-
-    Span[] spans = openNLPSpans.toArray(new Span[openNLPSpans.size()]);
-
-    Arrays.sort(spans);
-
-    tokenSamples.add(new TokenSample(sentence.getCoveredText(), spans));
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace arg0)
-      throws ResourceProcessException, IOException {
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Collected " + tokenSamples.size() +
-          " token samples.");
-    }
-
-    GIS.PRINT_MESSAGES = false;
-
-    ObjectStream<TokenSample> samples = 
ObjectStreamUtils.createObjectStream(tokenSamples);
-
-    // Write stream to disk ...
-    // if trace file
-    // serialize events ...
-
-    Writer samplesOut;
-    TokenizerModel tokenModel;
-
-    if (additionalTrainingDataFile != null) {
-
-      if (mLogger.isLoggable(Level.INFO)) {
-        mLogger.log(Level.INFO, "Using addional training data file: " + 
additionalTrainingDataFile);
-      }
-
-      InputStreamFactory additionalTrainingDataIn = new 
MarkableFileInputStreamFactory(
-          new File(additionalTrainingDataFile));
-
-      Charset additionalTrainingDataCharset = Charset
-          .forName(additionalTrainingDataEncoding);
-
-      ObjectStream<TokenSample> additionalSamples = new TokenSampleStream(
-          new PlainTextByLineStream(additionalTrainingDataIn,
-              additionalTrainingDataCharset));
-
-      samples = ObjectStreamUtils.createObjectStream(samples, 
additionalSamples);
-    }
-
-    if (sampleTraceFile != null) {
-      samplesOut = new OutputStreamWriter(new 
FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
-      samples = new SampleTraceStream<>(samples, samplesOut);
-    }
-
-    tokenModel = TokenizerME.train(samples,
-        TokenizerFactory.create(null, language, null, isSkipAlphaNumerics, 
null),
-        ModelUtil.createDefaultTrainingParameters());
-
-    // dereference to allow garbage collection
-    tokenSamples = null;
-
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(tokenModel, modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    tokenSamples = null;
-  }
-}

Reply via email to