OPENNLP-622 Preparing to migrate morfologik-addon to main repository

Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/772f31ff
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/772f31ff
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/772f31ff

Branch: refs/heads/trunk
Commit: 772f31ffe764afb675670735be556796781bda8d
Parents: 0cced84
Author: William Colen <[email protected]>
Authored: Wed Nov 9 18:23:28 2016 -0200
Committer: William Colen <[email protected]>
Committed: Wed Nov 9 18:23:28 2016 -0200

----------------------------------------------------------------------
 bin/morfologik-addon                            |  20 --
 bin/morfologik-addon.bat                        |  21 --
 opennlp-morfologik-addon/bin/morfologik-addon   |  20 ++
 .../bin/morfologik-addon.bat                    |  21 ++
 opennlp-morfologik-addon/pom.xml                | 109 +++++++++
 .../src/main/assembly/bin.xml                   |  91 ++++++++
 .../src/main/assembly/src.xml                   |  39 ++++
 .../src/main/bin/morfologik-addon               |  35 +++
 .../src/main/bin/morfologik-addon.bat           |  47 ++++
 .../src/main/bin/opennlp-cp                     |  35 +++
 .../builder/MorfologikDictionayBuilder.java     | 103 +++++++++
 .../java/opennlp/morfologik/cmdline/CLI.java    | 164 +++++++++++++
 .../MorfologikDictionaryBuilderParams.java      |  57 +++++
 .../MorfologikDictionaryBuilderTool.java        |  62 +++++
 .../builder/XMLDictionaryToTableParams.java     |  45 ++++
 .../builder/XMLDictionaryToTableTool.java       | 127 ++++++++++
 .../lemmatizer/MorfologikLemmatizer.java        |  96 ++++++++
 .../tagdict/MorfologikPOSTaggerFactory.java     | 170 ++++++++++++++
 .../tagdict/MorfologikTagDictionary.java        |  90 ++++++++
 .../opennlp/morfologik/util/MorfologikUtil.java |  36 +++
 .../src/main/readme/LICENSE                     | 230 +++++++++++++++++++
 .../src/main/readme/MORFOLOGIK-LICENSE          |  28 +++
 opennlp-morfologik-addon/src/main/readme/NOTICE |  11 +
 .../builder/POSDictionayBuilderTest.java        |  58 +++++
 .../lemmatizer/MorfologikLemmatizerTest.java    |  35 +++
 .../tagdict/MorfologikTagDictionaryTest.java    |  78 +++++++
 .../tagdict/POSTaggerFactoryTest.java           |  88 +++++++
 .../src/test/resources/AnnotatedSentences.txt   | 136 +++++++++++
 .../src/test/resources/dictionaryWithLemma.info |  15 ++
 .../src/test/resources/dictionaryWithLemma.txt  |  11 +
 pom.xml                                         | 109 ---------
 src/main/assembly/bin.xml                       |  91 --------
 src/main/assembly/src.xml                       |  39 ----
 src/main/bin/morfologik-addon                   |  35 ---
 src/main/bin/morfologik-addon.bat               |  47 ----
 src/main/bin/opennlp-cp                         |  35 ---
 .../builder/MorfologikDictionayBuilder.java     | 103 ---------
 .../java/opennlp/morfologik/cmdline/CLI.java    | 164 -------------
 .../MorfologikDictionaryBuilderParams.java      |  57 -----
 .../MorfologikDictionaryBuilderTool.java        |  62 -----
 .../builder/XMLDictionaryToTableParams.java     |  45 ----
 .../builder/XMLDictionaryToTableTool.java       | 127 ----------
 .../lemmatizer/MorfologikLemmatizer.java        |  96 --------
 .../tagdict/MorfologikPOSTaggerFactory.java     | 170 --------------
 .../tagdict/MorfologikTagDictionary.java        |  90 --------
 .../opennlp/morfologik/util/MorfologikUtil.java |  36 ---
 src/main/readme/LICENSE                         | 230 -------------------
 src/main/readme/MORFOLOGIK-LICENSE              |  28 ---
 src/main/readme/NOTICE                          |  11 -
 .../builder/POSDictionayBuilderTest.java        |  58 -----
 .../lemmatizer/MorfologikLemmatizerTest.java    |  35 ---
 .../tagdict/MorfologikTagDictionaryTest.java    |  78 -------
 .../tagdict/POSTaggerFactoryTest.java           |  88 -------
 src/test/resources/AnnotatedSentences.txt       | 136 -----------
 src/test/resources/dictionaryWithLemma.info     |  15 --
 src/test/resources/dictionaryWithLemma.txt      |  11 -
 56 files changed, 2037 insertions(+), 2037 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/bin/morfologik-addon
----------------------------------------------------------------------
diff --git a/bin/morfologik-addon b/bin/morfologik-addon
deleted file mode 100755
index ccc635e..0000000
--- a/bin/morfologik-addon
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/sh
-
-#   Licensed to the Apache Software Foundation (ASF) under one
-#   or more contributor license agreements.  See the NOTICE file
-#   distributed with this work for additional information
-#   regarding copyright ownership.  The ASF licenses this file
-#   to you under the Apache License, Version 2.0 (the
-#   "License"); you may not use this file except in compliance
-#   with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing,
-#   software distributed under the License is distributed on an
-#   #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#   KIND, either express or implied.  See the License for the
-#   specific language governing permissions and limitations
-#   under the License.
-
-mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" 
"-Dexec.args=$*"

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/bin/morfologik-addon.bat
----------------------------------------------------------------------
diff --git a/bin/morfologik-addon.bat b/bin/morfologik-addon.bat
deleted file mode 100644
index 26a4778..0000000
--- a/bin/morfologik-addon.bat
+++ /dev/null
@@ -1,21 +0,0 @@
-@ECHO OFF
-
-REM #   Licensed to the Apache Software Foundation (ASF) under one
-REM #   or more contributor license agreements.  See the NOTICE file
-REM #   distributed with this work for additional information
-REM #   regarding copyright ownership.  The ASF licenses this file
-REM #   to you under the Apache License, Version 2.0 (the
-REM #   "License"); you may not use this file except in compliance
-REM #   with the License.  You may obtain a copy of the License at
-REM #
-REM #    http://www.apache.org/licenses/LICENSE-2.0
-REM #
-REM #   Unless required by applicable law or agreed to in writing,
-REM #   software distributed under the License is distributed on an
-REM #   
-REM #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-REM #   KIND, either express or implied.  See the License for the
-REM #   specific language governing permissions and limitations
-REM #   under the License.
-
-mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" 
"-Dexec.args=%*"

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/bin/morfologik-addon
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/bin/morfologik-addon 
b/opennlp-morfologik-addon/bin/morfologik-addon
new file mode 100755
index 0000000..ccc635e
--- /dev/null
+++ b/opennlp-morfologik-addon/bin/morfologik-addon
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+#   Licensed to the Apache Software Foundation (ASF) under one
+#   or more contributor license agreements.  See the NOTICE file
+#   distributed with this work for additional information
+#   regarding copyright ownership.  The ASF licenses this file
+#   to you under the Apache License, Version 2.0 (the
+#   "License"); you may not use this file except in compliance
+#   with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" 
"-Dexec.args=$*"

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/bin/morfologik-addon.bat
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/bin/morfologik-addon.bat 
b/opennlp-morfologik-addon/bin/morfologik-addon.bat
new file mode 100644
index 0000000..26a4778
--- /dev/null
+++ b/opennlp-morfologik-addon/bin/morfologik-addon.bat
@@ -0,0 +1,21 @@
+@ECHO OFF
+
+REM #   Licensed to the Apache Software Foundation (ASF) under one
+REM #   or more contributor license agreements.  See the NOTICE file
+REM #   distributed with this work for additional information
+REM #   regarding copyright ownership.  The ASF licenses this file
+REM #   to you under the Apache License, Version 2.0 (the
+REM #   "License"); you may not use this file except in compliance
+REM #   with the License.  You may obtain a copy of the License at
+REM #
+REM #    http://www.apache.org/licenses/LICENSE-2.0
+REM #
+REM #   Unless required by applicable law or agreed to in writing,
+REM #   software distributed under the License is distributed on an
+REM #   
+REM #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+REM #   KIND, either express or implied.  See the License for the
+REM #   specific language governing permissions and limitations
+REM #   under the License.
+
+mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" 
"-Dexec.args=%*"

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/pom.xml b/opennlp-morfologik-addon/pom.xml
new file mode 100644
index 0000000..56d0e47
--- /dev/null
+++ b/opennlp-morfologik-addon/pom.xml
@@ -0,0 +1,109 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+
+       <groupId>org.apache.opennlp</groupId>
+       <artifactId>morfologik-addon</artifactId>
+       <version>1.0-SNAPSHOT</version>
+       <packaging>jar</packaging>
+       <name>Morfologik Addon</name>
+
+       <url>http://maven.apache.org</url>
+       <build>
+               <plugins>
+                       <plugin>
+                               <groupId>org.apache.maven.plugins</groupId>
+                               <artifactId>maven-compiler-plugin</artifactId>
+                               <version>2.3.2</version>
+                               <configuration>
+                                       <source>1.7</source>
+                                       <target>1.7</target>
+                               </configuration>
+                       </plugin>
+                       <plugin>
+                               <artifactId>maven-assembly-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>bundle-project-sources</id>
+                                               <phase>package</phase>
+                                               <goals>
+                                                       <goal>single</goal>
+                                               </goals>
+                                               <configuration>
+                                                       <descriptors>
+                                                               
<descriptor>src/main/assembly/bin.xml</descriptor>
+                                                               
<descriptor>src/main/assembly/src.xml</descriptor>
+                                                       </descriptors>
+                                                       <!-- Tar package is 
only compatible with gnu tar,
+                                                            many file have 
more than 100 chars.
+                                                            Right now only 
javadoc files are too long.
+                                                        -->
+                                                        
<tarLongFileMode>gnu</tarLongFileMode>
+                                                        
+                                                        
<finalName>apache-opennlp-morfologik-addon-${project.version}</finalName>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+                       <plugin> 
+               <artifactId>maven-antrun-plugin</artifactId> 
+               <version>1.6</version> 
+               <executions> 
+                 <execution> 
+                   <id>generate checksums for binary artifacts</id> 
+                   <goals><goal>run</goal></goals> 
+                   <phase>verify</phase> 
+                   <configuration> 
+                     <target> 
+                       <checksum algorithm="sha1" format="MD5SUM"> 
+                         <fileset dir="${project.build.directory}"> 
+                           <include name="*.zip" /> 
+                           <include name="*.gz" /> 
+                         </fileset> 
+                       </checksum> 
+                       <checksum algorithm="md5" format="MD5SUM"> 
+                         <fileset dir="${project.build.directory}"> 
+                           <include name="*.zip" /> 
+                           <include name="*.gz" /> 
+                         </fileset> 
+                       </checksum> 
+                     </target> 
+                   </configuration> 
+                 </execution> 
+               </executions> 
+             </plugin>
+               </plugins>
+       </build>
+       <properties>
+               
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+       </properties>
+
+       <dependencies>
+               <dependency>
+                       <groupId>org.carrot2</groupId>
+                       <artifactId>morfologik-stemming</artifactId>
+                       <version>2.1.0</version>
+                       <scope>compile</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.carrot2</groupId>
+                       <artifactId>morfologik-tools</artifactId>
+                       <version>2.1.0</version>
+                       <scope>compile</scope>
+               </dependency>
+
+               <dependency>
+                       <groupId>org.apache.opennlp</groupId>
+                       <artifactId>opennlp-tools</artifactId>
+                       <version>1.6.0</version>
+               </dependency>
+
+               <dependency>
+                       <groupId>junit</groupId>
+                       <artifactId>junit</artifactId>
+                       <version>4.8.1</version>
+                       <scope>test</scope>
+               </dependency>
+
+       </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/assembly/bin.xml 
b/opennlp-morfologik-addon/src/main/assembly/bin.xml
new file mode 100644
index 0000000..ab4f6da
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/assembly/bin.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<assembly>
+  <id>bin</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>zip</format>
+    <format>dir</format>
+  </formats>
+  
+    <includeBaseDirectory>true</includeBaseDirectory>
+       
<baseDirectory>/apache-opennlp-morfologik-addon-${project.version}</baseDirectory>
+  
+       <dependencySets>
+               <dependencySet>
+                       <scope>runtime</scope>
+                       <unpack>false</unpack>
+                       <useProjectArtifact>false</useProjectArtifact>
+                       <fileMode>644</fileMode>
+                       <directoryMode>755</directoryMode>
+                       <outputDirectory>lib</outputDirectory>
+                       
<useTransitiveDependencies>true</useTransitiveDependencies>
+               </dependencySet>
+       </dependencySets>
+       
+       <fileSets>
+           <fileSet>
+               <directory>src/main/readme</directory>
+               <outputDirectory></outputDirectory>
+               <fileMode>644</fileMode>
+               <directoryMode>755</directoryMode>      
+           </fileSet>
+               
+           <fileSet>
+             <directory>.</directory>
+             <outputDirectory></outputDirectory>
+             <filtered>true</filtered>
+             <fileMode>644</fileMode>
+             <directoryMode>755</directoryMode> 
+             <includes>
+               <include>README</include>
+               <include>RELEASE_NOTES.html</include>
+             </includes>       
+           </fileSet>
+           
+           <fileSet>
+             <directory>target</directory>
+             <outputDirectory></outputDirectory>
+             <fileMode>644</fileMode>
+             <directoryMode>755</directoryMode> 
+             <includes>
+               <include>issuesFixed/**</include>      
+             </includes>       
+           </fileSet>
+           
+               <fileSet>
+                       <directory>src/main/bin</directory>
+                       <fileMode>755</fileMode>
+                       <directoryMode>755</directoryMode>
+                       <outputDirectory>bin</outputDirectory>
+               </fileSet>
+               
+                 <fileSet>
+                   <directory>target</directory>
+                   <outputDirectory>lib</outputDirectory>
+                   <includes>
+                     <include>morfologik-addon-*.jar</include>
+                   </includes>
+                 </fileSet>
+               
+       </fileSets>
+</assembly>

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/assembly/src.xml
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/assembly/src.xml 
b/opennlp-morfologik-addon/src/main/assembly/src.xml
new file mode 100644
index 0000000..cdcc9d3
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/assembly/src.xml
@@ -0,0 +1,39 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<assembly>
+  <id>src</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>zip</format>
+  </formats>
+  
+  <baseDirectory>/apache-opennlp-${project.version}-src</baseDirectory>
+  
+  <fileSets>
+    <fileSet>
+      <directory>../</directory>
+      <outputDirectory></outputDirectory>
+      <excludes>
+        <exclude>**/target/**</exclude>
+        <exclude>**/.*/**</exclude>
+        <exclude>**/pom.xml.releaseBackup</exclude>
+        <exclude>**/release.properties</exclude>
+      </excludes>
+    </fileSet>
+  </fileSets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/bin/morfologik-addon
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/bin/morfologik-addon 
b/opennlp-morfologik-addon/src/main/bin/morfologik-addon
new file mode 100755
index 0000000..9b0faf9
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/bin/morfologik-addon
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+#   Licensed to the Apache Software Foundation (ASF) under one
+#   or more contributor license agreements.  See the NOTICE file
+#   distributed with this work for additional information
+#   regarding copyright ownership.  The ASF licenses this file
+#   to you under the Apache License, Version 2.0 (the
+#   "License"); you may not use this file except in compliance
+#   with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+# Note:  Do not output anything in this script file, any output
+#        may be inadvertantly placed in any output files if
+#        output redirection is used.
+
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    JAVACMD="$JAVA_HOME/bin/java"
+  else
+    JAVACMD="`which java`"
+  fi
+fi
+
+# Might fail if $0 is a link
+OPENNLP_HOME=`dirname "$0"`/..
+
+$JAVACMD -Xmx1024m -cp "lib/*" opennlp.morfologik.cmdline.CLI $@

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/bin/morfologik-addon.bat
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/bin/morfologik-addon.bat 
b/opennlp-morfologik-addon/src/main/bin/morfologik-addon.bat
new file mode 100644
index 0000000..aeec31f
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/bin/morfologik-addon.bat
@@ -0,0 +1,47 @@
+@ECHO off
+
+REM #   Licensed to the Apache Software Foundation (ASF) under one
+REM #   or more contributor license agreements.  See the NOTICE file
+REM #   distributed with this work for additional information
+REM #   regarding copyright ownership.  The ASF licenses this file
+REM #   to you under the Apache License, Version 2.0 (the
+REM #   "License"); you may not use this file except in compliance
+REM #   with the License.  You may obtain a copy of the License at
+REM #
+REM #    http://www.apache.org/licenses/LICENSE-2.0
+REM #
+REM #   Unless required by applicable law or agreed to in writing,
+REM #   software distributed under the License is distributed on an
+REM #   #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+REM #   KIND, either express or implied.  See the License for the
+REM #   specific language governing permissions and limitations
+REM #   under the License.
+
+REM # Note:  Do not output anything in this script file, any output
+REM #        may be inadvertantly placed in any output files if
+REM #        output redirection is used.
+SETLOCAL
+
+IF "%JAVA_CMD%" == "" (
+       IF "%JAVA_HOME%" == "" (
+               SET JAVA_CMD=java 
+       ) ELSE (
+               REM # Keep JAVA_HOME to short-name without spaces
+               FOR %%A IN ("%JAVA_HOME%") DO SET JAVA_CMD=%%~sfA\bin\java
+       )
+)
+
+REM #  Should work with Windows XP and greater.  If not, specify the path to 
where it is installed.
+IF "%OPENNLP_HOME%" == "" (
+       SET OPENNLP_HOME=%~sp0..
+) ELSE (
+       REM # Keep OPENNLP_HOME to short-name without spaces
+       FOR %%A IN ("%OPENNLP_HOME%") DO SET OPENNLP_HOME=%%~sfA
+)
+
+REM #  Get the library JAR file name (JIRA OPENNLP-554)
+FOR %%A IN ("%OPENNLP_HOME%\lib\*.jar") DO SET JAR_FILE=%%A
+
+%JAVA_CMD% -Xmx1024m -jar %JAR_FILE% %*
+
+ENDLOCAL
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/bin/opennlp-cp
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/bin/opennlp-cp 
b/opennlp-morfologik-addon/src/main/bin/opennlp-cp
new file mode 100755
index 0000000..dff0d12
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/bin/opennlp-cp
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+#   Licensed to the Apache Software Foundation (ASF) under one
+#   or more contributor license agreements.  See the NOTICE file
+#   distributed with this work for additional information
+#   regarding copyright ownership.  The ASF licenses this file
+#   to you under the Apache License, Version 2.0 (the
+#   "License"); you may not use this file except in compliance
+#   with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+# Note:  Do not output anything in this script file, any output
+#        may be inadvertantly placed in any output files if
+#        output redirection is used.
+
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    JAVACMD="$JAVA_HOME/bin/java"
+  else
+    JAVACMD="`which java`"
+  fi
+fi
+
+# Might fail if $0 is a link
+OPENNLP_HOME=`dirname "$0"`/..
+
+$JAVACMD -Xmx1024m -cp "lib/*" opennlp.tools.cmdline.CLI $@

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
new file mode 100644
index 0000000..dbbca4d
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.builder;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+import java.util.Properties;
+
+import morfologik.stemming.DictionaryMetadata;
+import morfologik.stemming.EncoderType;
+import morfologik.tools.DictCompile;
+
+/**
+ * Utility class to build Morfologik dictionaries from a tab separated values
+ * file. The first column is the word, the second its lemma and the third a POS
+ * tag. If there is no lemma information leave the second column empty.
+ */
+public class MorfologikDictionayBuilder {
+
+  /**
+   * Helper to compile a morphological dictionary automaton.
+   * 
+   * @param input
+   *          The input file (base,inflected,tag). An associated metadata
+   *          (*.info) file must exist.
+   * @param overwrite
+   *          Overwrite the output file if it exists.
+   * @param validate
+   *          Validate input to make sure it makes sense.
+   * @param acceptBom
+   *          Accept leading BOM bytes (UTF-8).
+   * @param acceptCr
+   *          Accept CR bytes in input sequences (\r).
+   * @param ignoreEmpty
+   *          Ignore empty lines in the input.
+   * @return the dictionary path
+   * 
+   * @throws Exception
+   */
+  public Path build(Path input, boolean overwrite, boolean validate,
+      boolean acceptBom, boolean acceptCr, boolean ignoreEmpty)
+      throws Exception {
+
+    DictCompile compiler = new DictCompile(input, overwrite, validate,
+        acceptBom, acceptCr, ignoreEmpty);
+    compiler.call();
+
+    
+    Path metadataPath = DictionaryMetadata
+        .getExpectedMetadataLocation(input);
+    
+    return metadataPath.resolveSibling(
+        metadataPath.getFileName().toString().replaceAll(
+            "\\." + DictionaryMetadata.METADATA_FILE_EXTENSION + "$", 
".dict"));
+  }
+
+  /**
+   * Helper to compile a morphological dictionary automaton using default
+   * parameters.
+   * 
+   * @param input
+   *          The input file (base,inflected,tag). An associated metadata
+   *          (*.info) file must exist.
+   *          
+   *  @return the dictionary path
+   * 
+   * @throws Exception
+   */
+  public Path build(Path input) throws Exception {
+
+    return build(input, true, true, false, false, false);
+
+  }
+
+  Properties createProperties(Charset encoding, String separator,
+      EncoderType encoderType) throws FileNotFoundException, IOException {
+
+    Properties properties = new Properties();
+    properties.setProperty("fsa.dict.separator", separator);
+    properties.setProperty("fsa.dict.encoding", encoding.name());
+    properties.setProperty("fsa.dict.encoder", encoderType.name());
+
+    return properties;
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
new file mode 100644
index 0000000..f92d178
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.cmdline;
+
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import opennlp.morfologik.cmdline.builder.MorfologikDictionaryBuilderTool;
+import opennlp.morfologik.cmdline.builder.XMLDictionaryToTableTool;
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.TypedCmdLineTool;
+import opennlp.tools.util.Version;
+
+public final class CLI {
+
+       public static final String CMD = "opennlp-morfologik-addon";
+
+       private static Map<String, CmdLineTool> toolLookupMap;
+
+       static {
+               toolLookupMap = new LinkedHashMap<String, CmdLineTool>();
+
+               List<CmdLineTool> tools = new LinkedList<CmdLineTool>();
+
+               tools.add(new MorfologikDictionaryBuilderTool());
+               tools.add(new XMLDictionaryToTableTool());
+
+               for (CmdLineTool tool : tools) {
+                       toolLookupMap.put(tool.getName(), tool);
+               }
+
+               toolLookupMap = Collections.unmodifiableMap(toolLookupMap);
+       }
+
+       /**
+        * @return a set which contains all tool names
+        */
+       public static Set<String> getToolNames() {
+               return toolLookupMap.keySet();
+       }
+
+       private static void usage() {
+               System.out.print("OpenNLP Morfologik Addon "
+                               + Version.currentVersion().toString() + ". ");
+               System.out.println("Usage: " + CMD + " TOOL");
+               System.out.println("where TOOL is one of:");
+
+               // distance of tool name from line start
+               int numberOfSpaces = -1;
+               for (String toolName : toolLookupMap.keySet()) {
+                       if (toolName.length() > numberOfSpaces) {
+                               numberOfSpaces = toolName.length();
+                       }
+               }
+               numberOfSpaces = numberOfSpaces + 4;
+
+               for (CmdLineTool tool : toolLookupMap.values()) {
+
+                       System.out.print("  " + tool.getName());
+
+                       for (int i = 0; i < Math.abs(tool.getName().length()
+                                       - numberOfSpaces); i++) {
+                               System.out.print(" ");
+                       }
+
+                       System.out.println(tool.getShortDescription());
+               }
+
+               System.out
+                               .println("All tools print help when invoked 
with help parameter");
+               System.out
+                               .println("Example: opennlp-morfologik-addon 
POSDictionaryBuilder help");
+       }
+
+
+         @SuppressWarnings("rawtypes")
+    public static void main(String[] args) {
+
+           if (args.length == 0) {
+             usage();
+             System.exit(0);
+           }
+
+           String toolArguments[] = new String[args.length -1];
+           System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
+
+           String toolName = args[0];
+
+           //check for format
+           String formatName = StreamFactoryRegistry.DEFAULT_FORMAT;
+           int idx = toolName.indexOf(".");
+           if (-1 < idx) {
+             formatName = toolName.substring(idx + 1);
+             toolName = toolName.substring(0, idx);
+           }
+           CmdLineTool tool = toolLookupMap.get(toolName);
+
+           try {
+             if (null == tool) {
+               throw new TerminateToolException(1, "Tool " + toolName + " is 
not found.");
+             }
+
+             if ((0 == toolArguments.length && tool.hasParams()) ||
+                 0 < toolArguments.length && "help".equals(toolArguments[0])) {
+                 if (tool instanceof TypedCmdLineTool) {
+                   System.out.println(((TypedCmdLineTool) 
tool).getHelp(formatName));
+                 } else if (tool instanceof BasicCmdLineTool) {
+                   System.out.println(tool.getHelp());
+                 }
+
+                 System.exit(0);
+             }
+
+             if (tool instanceof TypedCmdLineTool) {
+               ((TypedCmdLineTool) tool).run(formatName, toolArguments);
+             } else if (tool instanceof BasicCmdLineTool) {
+               if (-1 == idx) {
+                 ((BasicCmdLineTool) tool).run(toolArguments);
+               } else {
+                 throw new TerminateToolException(1, "Tool " + toolName + " 
does not support formats.");
+               }
+             } else {
+               throw new TerminateToolException(1, "Tool " + toolName + " is 
not supported.");
+             }
+           }
+           catch (TerminateToolException e) {
+
+             if (e.getMessage() != null) {
+               System.err.println(e.getMessage());
+             }
+
+             if (e.getCause() != null) {
+               System.err.println(e.getCause().getMessage());
+               e.getCause().printStackTrace(System.err);
+             }
+
+             System.exit(e.getCode());
+           }
+         }
+
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java
new file mode 100644
index 0000000..5ea2e4f
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.cmdline.builder;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+
+/**
+ * Params for Dictionary tools.
+ */
+interface MorfologikDictionaryBuilderParams extends EncodingParameter {
+
+  @ParameterDescription(valueName = "in", description = "The input file 
(base,inflected,tag). An associated metadata (*.info) file must exist.")
+  File getInputFile();
+  
+  @ParameterDescription(valueName = "true|false", description = "Accept 
leading BOM bytes (UTF-8).")
+  @OptionalParameter(defaultValue="false")
+  Boolean getAcceptBOM();
+  
+  @ParameterDescription(valueName = "true|false", description = "Accept CR 
bytes in input sequences (\r).")
+  @OptionalParameter(defaultValue="false")
+  Boolean getAcceptCR();
+  
+  @ParameterDescription(valueName = "FSA5|CFSA2", description = "Automaton 
serialization format.")
+  @OptionalParameter(defaultValue="FSA5")
+  String getFormat();
+  
+  @ParameterDescription(valueName = "true|false", description = "Ignore empty 
lines in the input.")
+  @OptionalParameter(defaultValue="false")
+  Boolean getIgnoreEmpty();
+  
+  @ParameterDescription(valueName = "true|false", description = "Overwrite the 
output file if it exists.")
+  @OptionalParameter(defaultValue="false")
+  Boolean getOverwrite();
+  
+  @ParameterDescription(valueName = "true|false", description = "Validate 
input to make sure it makes sense.")
+  @OptionalParameter(defaultValue="false")
+  Boolean getValidate();
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
new file mode 100644
index 0000000..eb9b51c
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.cmdline.builder;
+
+import java.io.File;
+import java.nio.file.Path;
+
+import morfologik.stemming.DictionaryMetadata;
+import opennlp.morfologik.builder.MorfologikDictionayBuilder;
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+
+public class MorfologikDictionaryBuilderTool extends BasicCmdLineTool {
+
+  interface Params extends MorfologikDictionaryBuilderParams {
+  }
+
+  public String getShortDescription() {
+    return "builds a binary POS Dictionary using Morfologik";
+  }
+
+  public String getHelp() {
+    return getBasicHelp(Params.class);
+  }
+
+  public void run(String[] args) {
+    Params params = validateAndParseParams(args, Params.class);
+
+    File dictInFile = params.getInputFile();
+
+    CmdLineUtil.checkInputFile("dictionary input file", dictInFile);
+    Path metadataPath = 
DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath());
+    CmdLineUtil.checkInputFile("dictionary metadata (.info) input file", 
metadataPath.toFile());
+
+    MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
+    try {
+      builder.build(dictInFile.toPath(), params.getOverwrite(),
+          params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(),
+          params.getIgnoreEmpty());
+    } catch (Exception e) {
+      throw new TerminateToolException(-1,
+          "Error while creating Morfologik POS Dictionay: " + e.getMessage(), 
e);
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
new file mode 100644
index 0000000..4ee8cd4
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.cmdline.builder;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+
+/**
+ * Params for Dictionary tools.
+ */
+interface XMLDictionaryToTableParams extends EncodingParameter {
+
+  @ParameterDescription(valueName = "in", description = "OpenNLP XML Tag 
Dictionary.")
+  File getInputFile();
+
+  @ParameterDescription(valueName = "out", description = "Output for 
Morfologik (.info will be also created).")
+  File getOutputFile();
+
+  @ParameterDescription(valueName = "char", description = "Columm separator 
(must be a single character)")
+  @OptionalParameter(defaultValue=",")
+  String getSeparator();
+  
+  @ParameterDescription(valueName = "value", description = " Type of 
lemma-inflected form encoding compression that precedes automaton construction. 
Allowed values: [suffix, infix, prefix, none].")
+  @OptionalParameter(defaultValue="prefix")
+  String getEncoder();
+  
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
new file mode 100644
index 0000000..0e7f2d5
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.cmdline.builder;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+import java.util.Properties;
+
+import morfologik.stemming.DictionaryMetadata;
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.postag.POSDictionary;
+
+public class XMLDictionaryToTableTool extends BasicCmdLineTool {
+
+  interface Params extends XMLDictionaryToTableParams {
+  }
+
+  private String SEPARATOR;
+
+  public String getShortDescription() {
+    return "reads an OpenNLP XML tag dictionary and outputs it in a tab 
separated file";
+  }
+
+  public String getHelp() {
+    return getBasicHelp(Params.class);
+  }
+
+  public void run(String[] args) {
+    Params params = validateAndParseParams(args, Params.class);
+
+    File dictInFile = params.getInputFile();
+    File dictOutFile = params.getOutputFile();
+    Charset encoding = params.getEncoding();
+    SEPARATOR = params.getSeparator();
+
+    CmdLineUtil.checkInputFile("dictionary input file", dictInFile);
+    CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile);
+
+    POSDictionary tagDictionary = null;
+    try {
+      tagDictionary = POSDictionary.create(new FileInputStream(dictInFile));
+    } catch (IOException e) {
+      throw new TerminateToolException(-1,
+          "Error while loading XML POS Dictionay: " + e.getMessage(), e);
+    }
+    Iterator<String> iterator = tagDictionary.iterator();
+
+    try (BufferedWriter writer = Files.newBufferedWriter(dictOutFile.toPath(),
+        encoding)) {
+      while (iterator.hasNext()) {
+        String word = iterator.next();
+        for (String tag : tagDictionary.getTags(word)) {
+          if(valid(word,tag)) {
+            String entry = createEntry(word, tag);
+            writer.write(entry);
+            writer.newLine();
+          }
+        }
+      }
+      writer.close();
+      System.out.println("Created dictionary: " + dictOutFile.toPath());
+    } catch (IOException e) {
+      throw new TerminateToolException(-1, "Error while writing output: "
+          + e.getMessage(), e);
+    }
+    
+    Properties info = new Properties();
+    info.setProperty("fsa.dict.separator", SEPARATOR);
+    info.setProperty("fsa.dict.encoding", params.getEncoding().name());
+    info.setProperty("fsa.dict.encoder", params.getEncoder());
+    
+    Path metaPath = 
DictionaryMetadata.getExpectedMetadataLocation(dictOutFile.toPath());
+    
+    try {
+      info.store(Files.newOutputStream(metaPath), "Info file for FSA 
Morfologik dictionary.");
+    } catch (IOException e) {
+      throw new TerminateToolException(-1, "Error while writing metadata 
output: "
+          + e.getMessage(), e);
+    }
+    System.out.println("Created metadata: " + dictOutFile.toPath());
+    
+  }
+
+  private boolean valid(String word, String tag) {
+    if(word.contains(SEPARATOR) || tag.contains(SEPARATOR)) {
+      System.out
+          .println("Warn: invalid entry because contains separator - word: "
+              + word + " tag: " + tag);
+      return false;
+    }
+    
+    return true;
+  }
+
+  private String createEntry(String word, String tag) {
+    
+    String entry = "" + SEPARATOR +// base
+        word + SEPARATOR +
+        tag;
+        
+    return entry;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
new file mode 100644
index 0000000..2090ce5
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.lemmatizer;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import opennlp.tools.lemmatizer.DictionaryLemmatizer;
+
+public class MorfologikLemmatizer implements DictionaryLemmatizer {
+
+  private IStemmer dictLookup;
+  public final Set<String> constantTags = new HashSet<String>(Arrays.asList(
+      "NNP", "NP00000"));
+
+  public MorfologikLemmatizer(Path dictionaryPath) throws 
IllegalArgumentException,
+      IOException {
+    dictLookup = new DictionaryLookup(Dictionary.read(dictionaryPath));
+  }
+
+  private HashMap<List<String>, String> getLemmaTagsDict(String word) {
+    List<WordData> wdList = dictLookup.lookup(word);
+    HashMap<List<String>, String> dictMap = new HashMap<List<String>, 
String>();
+    for (WordData wd : wdList) {
+      List<String> wordLemmaTags = new ArrayList<String>();
+      wordLemmaTags.add(word);
+      wordLemmaTags.add(wd.getTag().toString());
+      dictMap.put(wordLemmaTags, wd.getStem().toString());
+    }
+    return dictMap;
+  }
+
+  private List<String> getDictKeys(String word, String postag) {
+    List<String> keys = new ArrayList<String>();
+    if (constantTags.contains(postag)) {
+      keys.addAll(Arrays.asList(word, postag));
+    } else {
+      keys.addAll(Arrays.asList(word.toLowerCase(), postag));
+    }
+    return keys;
+  }
+
+  private HashMap<List<String>, String> getDictMap(String word, String postag) 
{
+    HashMap<List<String>, String> dictMap = new HashMap<List<String>, 
String>();
+
+    if (constantTags.contains(postag)) {
+      dictMap = this.getLemmaTagsDict(word);
+    } else {
+      dictMap = this.getLemmaTagsDict(word.toLowerCase());
+    }
+    return dictMap;
+  }
+
+  public String lemmatize(String word, String postag) {
+    String lemma = null;
+    List<String> keys = this.getDictKeys(word, postag);
+    HashMap<List<String>, String> dictMap = this.getDictMap(word, postag);
+    // lookup lemma as value of the map
+    String keyValue = dictMap.get(keys);
+    if (keyValue != null) {
+      lemma = keyValue;
+    } else if (keyValue == null && constantTags.contains(postag)) {
+      lemma = word;
+    } else if (keyValue == null && word.toUpperCase() == word) {
+      lemma = word;
+    } else {
+      lemma = word.toLowerCase();
+    }
+    return lemma;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
new file mode 100644
index 0000000..93d6c61
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.tagdict;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Map;
+
+import morfologik.stemming.DictionaryMetadata;
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.postag.TagDictionary;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+import opennlp.tools.util.model.ModelUtil;
+
+public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
+
+  private static final String MORFOLOGIK_POSDICT_SUF = "morfologik_dict";
+  private static final String MORFOLOGIK_DICT_INFO_SUF = "morfologik_info";
+
+  private static final String MORFOLOGIK_POSDICT = "tagdict."
+      + MORFOLOGIK_POSDICT_SUF;
+  private static final String MORFOLOGIK_DICT_INFO = "tagdict."
+      + MORFOLOGIK_DICT_INFO_SUF;
+
+  private TagDictionary dict;
+
+  private byte[] dictInfo;
+  private byte[] dictData;
+
+  public MorfologikPOSTaggerFactory() {
+  }
+  
+  public TagDictionary createTagDictionary(File dictionary)
+      throws InvalidFormatException, FileNotFoundException, IOException {
+    
+    if(!dictionary.canRead()) {
+      throw new FileNotFoundException("Could not read dictionary: " + 
dictionary.getAbsolutePath());
+    }
+    
+    Path dictionaryMeta = 
DictionaryMetadata.getExpectedMetadataLocation(dictionary.toPath());
+    
+    if(dictionaryMeta == null || !dictionaryMeta.toFile().canRead()) {
+      throw new FileNotFoundException("Could not read dictionary metadata: " + 
dictionaryMeta.getFileName());
+    }
+    
+    this.dictData = Files.readAllBytes(dictionary.toPath());
+    this.dictInfo = Files.readAllBytes(dictionaryMeta);
+    
+    return createMorfologikDictionary(dictData, dictInfo);
+    
+  }
+  
+
+  @Override
+  protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) 
{
+    super.init(ngramDictionary, null);
+    this.dict = posDictionary;
+  }
+
+  @Override
+  public TagDictionary getTagDictionary() {
+    if (this.dict == null) {
+
+      if (artifactProvider != null) {
+        Object obj = artifactProvider.getArtifact(MORFOLOGIK_POSDICT);
+        if (obj != null) {
+          byte[] data = (byte[]) artifactProvider
+              .getArtifact(MORFOLOGIK_POSDICT);
+          byte[] info = (byte[]) artifactProvider
+              .getArtifact(MORFOLOGIK_DICT_INFO);
+
+          try {
+            this.dict = createMorfologikDictionary(data, info);
+          } catch (IllegalArgumentException e) {
+            throw new RuntimeException(
+                "Could not load the dictionary files to Morfologik.", e);
+          } catch (IOException e) {
+            throw new RuntimeException(
+                "IO error while reading the Morfologik dictionary files.", e);
+          }
+        }
+      }
+    }
+
+    return this.dict;
+  }
+
+  @Override
+  public void setTagDictionary(TagDictionary dictionary) {
+    this.dict = dictionary;
+  }
+
+  @Override
+  public TagDictionary createEmptyTagDictionary() {
+    throw new UnsupportedOperationException(
+        "Morfologik POS Tagger factory does not support this operation");
+  }
+
+  @Override
+  public TagDictionary createTagDictionary(InputStream in)
+      throws InvalidFormatException, IOException {
+    throw new UnsupportedOperationException(
+        "Morfologik POS Tagger factory does not support this operation");
+  }
+
+  @Override
+  @SuppressWarnings("rawtypes")
+  public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
+    Map<String, ArtifactSerializer> serializers = super
+        .createArtifactSerializersMap();
+
+    serializers.put(MORFOLOGIK_POSDICT_SUF, new ByteArraySerializer());
+    serializers.put(MORFOLOGIK_DICT_INFO_SUF, new ByteArraySerializer());
+
+    return serializers;
+  }
+
+  @Override
+  public Map<String, Object> createArtifactMap() {
+    Map<String, Object> artifactMap = super.createArtifactMap();
+    artifactMap.put(MORFOLOGIK_POSDICT, this.dictData);
+    artifactMap.put(MORFOLOGIK_DICT_INFO, this.dictInfo);
+    return artifactMap;
+  }
+
+  private TagDictionary createMorfologikDictionary(byte[] data, byte[] info)
+      throws IOException {
+    morfologik.stemming.Dictionary dict = morfologik.stemming.Dictionary
+        .read(new ByteArrayInputStream(data), new ByteArrayInputStream(
+            info));
+    return new MorfologikTagDictionary(dict);
+  }
+
+  static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
+
+    public byte[] create(InputStream in) throws IOException,
+        InvalidFormatException {
+
+      return ModelUtil.read(in);
+    }
+
+    public void serialize(byte[] artifact, OutputStream out) throws 
IOException {
+      out.write(artifact);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
new file mode 100644
index 0000000..b34ca2b
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.tagdict;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import opennlp.tools.postag.TagDictionary;
+
+/**
+ * A POS Tagger dictionary implementation based on Morfologik binary
+ * dictionaries
+ */
+public class MorfologikTagDictionary implements TagDictionary {
+
+  private IStemmer dictLookup;
+  private boolean isCaseSensitive;
+
+  /**
+   * Creates a case sensitive {@link MorfologikTagDictionary}
+   *
+   * @param dict
+   *          a Morfologik FSA dictionary
+   * @throws IllegalArgumentException
+   *           if FSA's root node cannot be acquired (dictionary is empty).
+   * @throws IOException
+   *           could not read dictionary from dictURL
+   */
+  public MorfologikTagDictionary(Dictionary dict)
+      throws IllegalArgumentException, IOException {
+    this(dict, true);
+  }
+
+  /**
+   * Creates MorfologikLemmatizer
+   *
+   * @param dict
+   *          a Morfologik FSA dictionary
+   * @param caseSensitive
+   *          if true it performs case sensitive lookup
+   * @throws IllegalArgumentException
+   *           if FSA's root node cannot be acquired (dictionary is empty).
+   * @throws IOException
+   *           could not read dictionary from dictURL
+   */
+  public MorfologikTagDictionary(Dictionary dict, boolean caseSensitive)
+      throws IllegalArgumentException, IOException {
+    this.dictLookup = new DictionaryLookup(dict);
+    this.isCaseSensitive = caseSensitive;
+  }
+
+  @Override
+  public String[] getTags(String word) {
+    if (!isCaseSensitive) {
+      word = word.toLowerCase();
+    }
+
+    List<WordData> data = dictLookup.lookup(word);
+    if (data != null && data.size() > 0) {
+      List<String> tags = new ArrayList<String>(data.size());
+      for (int i = 0; i < data.size(); i++) {
+        tags.add(data.get(i).getTag().toString());
+      }
+      if (tags.size() > 0)
+        return tags.toArray(new String[tags.size()]);
+      return null;
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
new file mode 100644
index 0000000..bd4d1a4
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.util;
+
+import java.io.File;
+
+import morfologik.stemming.DictionaryMetadata;
+
+public class MorfologikUtil {
+  
+  public static File getExpectedPropertiesFile(File dictFile) {
+    return DictionaryMetadata.getExpectedMetadataLocation(dictFile.toPath())
+        .toFile();
+  }
+  
+  public static File getExpectedPropertiesFile(String dictFile) {
+    File f = new File(dictFile);
+    return getExpectedPropertiesFile(f);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/readme/LICENSE
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/readme/LICENSE 
b/opennlp-morfologik-addon/src/main/readme/LICENSE
new file mode 100644
index 0000000..576b4cf
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/readme/LICENSE
@@ -0,0 +1,230 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+The following license applies to the Snowball stemmers:
+        
+        Copyright (c) 2001, Dr Martin Porter
+        Copyright (c) 2002, Richard Boulton
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions are 
met:
+        
+            * Redistributions of source code must retain the above copyright 
notice,
+            * this list of conditions and the following disclaimer.
+            * Redistributions in binary form must reproduce the above copyright
+            * notice, this list of conditions and the following disclaimer in 
the
+            * documentation and/or other materials provided with the 
distribution.
+            * Neither the name of the copyright holders nor the names of its 
contributors
+            * may be used to endorse or promote products derived from this 
software
+            * without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
"AS IS"
+        AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
THE
+        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
PURPOSE ARE
+        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
LIABLE
+        FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
CONSEQUENTIAL
+        DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
GOODS OR
+        SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
HOWEVER
+        CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
LIABILITY,
+        OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE 
b/opennlp-morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE
new file mode 100644
index 0000000..0554010
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2006 Dawid Weiss
+Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without 
modification, 
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, 
+    this list of conditions and the following disclaimer.
+    
+    * Redistributions in binary form must reproduce the above copyright 
notice, 
+    this list of conditions and the following disclaimer in the documentation 
+    and/or other materials provided with the distribution.
+    
+    * Neither the name of Morfologik nor the names of its contributors 
+    may be used to endorse or promote products derived from this software 
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 
FOR 
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/main/readme/NOTICE
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/readme/NOTICE 
b/opennlp-morfologik-addon/src/main/readme/NOTICE
new file mode 100644
index 0000000..73fb1d7
--- /dev/null
+++ b/opennlp-morfologik-addon/src/main/readme/NOTICE
@@ -0,0 +1,11 @@
+Apache OpenNLP
+Copyright 2010, 2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+The snowball stemmers in
+opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball
+were developed by Martin Porter and Richard Boulton.
+The full snowball package is available from
+http://snowball.tartarus.org/

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
new file mode 100644
index 0000000..0a7ba48
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.builder;
+
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+
+import junit.framework.TestCase;
+import morfologik.stemming.DictionaryMetadata;
+import opennlp.morfologik.lemmatizer.MorfologikLemmatizer;
+
+import org.junit.Test;
+
+public class POSDictionayBuilderTest extends TestCase {
+
+  @Test
+  public void testBuildDictionary() throws Exception {
+    
+    Path output = createMorfologikDictionary();
+
+    MorfologikLemmatizer ml = new MorfologikLemmatizer(output);
+
+    assertNotNull(ml);
+  }
+  
+  public static Path createMorfologikDictionary() throws Exception {
+    Path tabFilePath = File.createTempFile(
+        POSDictionayBuilderTest.class.getName(), ".txt").toPath();
+    Path infoFilePath = 
DictionaryMetadata.getExpectedMetadataLocation(tabFilePath);
+    
+    Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
+        "/dictionaryWithLemma.txt"), tabFilePath, 
StandardCopyOption.REPLACE_EXISTING);
+    Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
+        "/dictionaryWithLemma.info"), infoFilePath, 
StandardCopyOption.REPLACE_EXISTING);
+    
+    MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
+    
+    return builder.build(tabFilePath);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
new file mode 100644
index 0000000..6b7525e
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
@@ -0,0 +1,35 @@
+package opennlp.morfologik.lemmatizer;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.file.Path;
+
+import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.tools.lemmatizer.DictionaryLemmatizer;
+
+import org.junit.Test;
+
+public class MorfologikLemmatizerTest {
+
+  @Test
+  public void testLemmatizeInsensitive() throws Exception {
+    DictionaryLemmatizer dict = createDictionary(false);
+
+    assertEquals("casar", dict.lemmatize("casa", "V"));
+    assertEquals("casa", dict.lemmatize("casa", "NOUN"));
+
+    assertEquals("casa", dict.lemmatize("Casa", "PROP"));
+
+  }
+
+  private MorfologikLemmatizer createDictionary(boolean caseSensitive)
+      throws Exception {
+
+    Path output = POSDictionayBuilderTest.createMorfologikDictionary();
+
+    MorfologikLemmatizer ml = new MorfologikLemmatizer(output);
+
+    return ml;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/772f31ff/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
new file mode 100644
index 0000000..c6c9e04
--- /dev/null
+++ 
b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
@@ -0,0 +1,78 @@
+package opennlp.morfologik.tagdict;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.List;
+
+import morfologik.stemming.Dictionary;
+import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.tools.postag.TagDictionary;
+
+import org.junit.Test;
+
+public class MorfologikTagDictionaryTest {
+
+  @Test
+  public void testNoLemma() throws Exception {
+    MorfologikTagDictionary dict = createDictionary(false);
+
+    List<String> tags = Arrays.asList(dict.getTags("carro"));
+    assertEquals(1, tags.size());
+    assertTrue(tags.contains("NOUN"));
+
+  }
+
+  @Test
+  public void testPOSDictionaryInsensitive() throws Exception {
+    TagDictionary dict = createDictionary(false);
+
+    List<String> tags = Arrays.asList(dict.getTags("casa"));
+    assertEquals(2, tags.size());
+    assertTrue(tags.contains("NOUN"));
+    assertTrue(tags.contains("V"));
+
+    // this is the behavior of case insensitive dictionary
+    // if we search it using case insensitive, Casa as a proper noun
+    // should be lower case in the dictionary
+    tags = Arrays.asList(dict.getTags("Casa"));
+    assertEquals(2, tags.size());
+    assertTrue(tags.contains("NOUN"));
+    assertTrue(tags.contains("V"));
+
+  }
+
+  @Test
+  public void testPOSDictionarySensitive() throws Exception {
+    TagDictionary dict = createDictionary(true);
+
+    List<String> tags = Arrays.asList(dict.getTags("casa"));
+    assertEquals(2, tags.size());
+    assertTrue(tags.contains("NOUN"));
+    assertTrue(tags.contains("V"));
+
+    // this is the behavior of case insensitive dictionary
+    // if we search it using case insensitive, Casa as a proper noun
+    // should be lower case in the dictionary
+    tags = Arrays.asList(dict.getTags("Casa"));
+    assertEquals(1, tags.size());
+    assertTrue(tags.contains("PROP"));
+
+  }
+
+  private MorfologikTagDictionary createDictionary(boolean caseSensitive)
+      throws Exception {
+    return this.createDictionary(caseSensitive, null);
+  }
+
+  private MorfologikTagDictionary createDictionary(boolean caseSensitive,
+      List<String> constant) throws Exception {
+
+    Dictionary dic = 
Dictionary.read(POSDictionayBuilderTest.createMorfologikDictionary());
+    MorfologikTagDictionary ml = new MorfologikTagDictionary(dic, 
caseSensitive);
+
+    return ml;
+  }
+
+}

Reply via email to