Remove morfologik because it was promoted to opennlp.git
Project: http://git-wip-us.apache.org/repos/asf/opennlp-addons/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp-addons/commit/e2f3db75 Tree: http://git-wip-us.apache.org/repos/asf/opennlp-addons/tree/e2f3db75 Diff: http://git-wip-us.apache.org/repos/asf/opennlp-addons/diff/e2f3db75 Branch: refs/heads/master Commit: e2f3db754bec2bae2f196dc8cfcfd23d2260f31d Parents: 9adc252 Author: Jörn Kottmann <[email protected]> Authored: Mon Apr 24 15:26:57 2017 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Mon Apr 24 15:26:57 2017 +0200 ---------------------------------------------------------------------- morfologik-addon/bin/morfologik-addon | 20 -- morfologik-addon/bin/morfologik-addon.bat | 21 -- morfologik-addon/pom.xml | 109 --------- morfologik-addon/src/main/assembly/bin.xml | 91 -------- morfologik-addon/src/main/assembly/src.xml | 39 ---- morfologik-addon/src/main/bin/morfologik-addon | 35 --- .../src/main/bin/morfologik-addon.bat | 47 ---- morfologik-addon/src/main/bin/opennlp-cp | 35 --- .../builder/MorfologikDictionayBuilder.java | 103 --------- .../java/opennlp/morfologik/cmdline/CLI.java | 164 ------------- .../MorfologikDictionaryBuilderParams.java | 57 ----- .../MorfologikDictionaryBuilderTool.java | 62 ----- .../builder/XMLDictionaryToTableParams.java | 45 ---- .../builder/XMLDictionaryToTableTool.java | 127 ---------- .../lemmatizer/MorfologikLemmatizer.java | 96 -------- .../tagdict/MorfologikPOSTaggerFactory.java | 170 -------------- .../tagdict/MorfologikTagDictionary.java | 90 -------- .../opennlp/morfologik/util/MorfologikUtil.java | 36 --- morfologik-addon/src/main/readme/LICENSE | 230 ------------------- .../src/main/readme/MORFOLOGIK-LICENSE | 28 --- morfologik-addon/src/main/readme/NOTICE | 11 - .../builder/POSDictionayBuilderTest.java | 58 ----- .../lemmatizer/MorfologikLemmatizerTest.java | 35 --- .../tagdict/MorfologikTagDictionaryTest.java | 78 ------- .../tagdict/POSTaggerFactoryTest.java | 88 ------- .../src/test/resources/AnnotatedSentences.txt | 136 ----------- .../src/test/resources/dictionaryWithLemma.info | 15 -- .../src/test/resources/dictionaryWithLemma.txt | 11 - 28 files changed, 2037 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/bin/morfologik-addon ---------------------------------------------------------------------- diff --git a/morfologik-addon/bin/morfologik-addon b/morfologik-addon/bin/morfologik-addon deleted file mode 100755 index ccc635e..0000000 --- a/morfologik-addon/bin/morfologik-addon +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" "-Dexec.args=$*" http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/bin/morfologik-addon.bat ---------------------------------------------------------------------- diff --git a/morfologik-addon/bin/morfologik-addon.bat b/morfologik-addon/bin/morfologik-addon.bat deleted file mode 100644 index 26a4778..0000000 --- a/morfologik-addon/bin/morfologik-addon.bat +++ /dev/null @@ -1,21 +0,0 @@ -@ECHO OFF - -REM # Licensed to the Apache Software Foundation (ASF) under one -REM # or more contributor license agreements. See the NOTICE file -REM # distributed with this work for additional information -REM # regarding copyright ownership. The ASF licenses this file -REM # to you under the Apache License, Version 2.0 (the -REM # "License"); you may not use this file except in compliance -REM # with the License. You may obtain a copy of the License at -REM # -REM # http://www.apache.org/licenses/LICENSE-2.0 -REM # -REM # Unless required by applicable law or agreed to in writing, -REM # software distributed under the License is distributed on an -REM # -REM # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM # KIND, either express or implied. See the License for the -REM # specific language governing permissions and limitations -REM # under the License. - -mvn -e -q exec:java "-Dexec.mainClass=opennlp.morfologik.cmdline.CLI" "-Dexec.args=%*" http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/pom.xml ---------------------------------------------------------------------- diff --git a/morfologik-addon/pom.xml b/morfologik-addon/pom.xml deleted file mode 100644 index 56d0e47..0000000 --- a/morfologik-addon/pom.xml +++ /dev/null @@ -1,109 +0,0 @@ -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>org.apache.opennlp</groupId> - <artifactId>morfologik-addon</artifactId> - <version>1.0-SNAPSHOT</version> - <packaging>jar</packaging> - <name>Morfologik Addon</name> - - <url>http://maven.apache.org</url> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <version>2.3.2</version> - <configuration> - <source>1.7</source> - <target>1.7</target> - </configuration> - </plugin> - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <executions> - <execution> - <id>bundle-project-sources</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <descriptors> - <descriptor>src/main/assembly/bin.xml</descriptor> - <descriptor>src/main/assembly/src.xml</descriptor> - </descriptors> - <!-- Tar package is only compatible with gnu tar, - many file have more than 100 chars. - Right now only javadoc files are too long. - --> - <tarLongFileMode>gnu</tarLongFileMode> - - <finalName>apache-opennlp-morfologik-addon-${project.version}</finalName> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.6</version> - <executions> - <execution> - <id>generate checksums for binary artifacts</id> - <goals><goal>run</goal></goals> - <phase>verify</phase> - <configuration> - <target> - <checksum algorithm="sha1" format="MD5SUM"> - <fileset dir="${project.build.directory}"> - <include name="*.zip" /> - <include name="*.gz" /> - </fileset> - </checksum> - <checksum algorithm="md5" format="MD5SUM"> - <fileset dir="${project.build.directory}"> - <include name="*.zip" /> - <include name="*.gz" /> - </fileset> - </checksum> - </target> - </configuration> - </execution> - </executions> - </plugin> - </plugins> - </build> - <properties> - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - </properties> - - <dependencies> - <dependency> - <groupId>org.carrot2</groupId> - <artifactId>morfologik-stemming</artifactId> - <version>2.1.0</version> - <scope>compile</scope> - </dependency> - <dependency> - <groupId>org.carrot2</groupId> - <artifactId>morfologik-tools</artifactId> - <version>2.1.0</version> - <scope>compile</scope> - </dependency> - - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-tools</artifactId> - <version>1.6.0</version> - </dependency> - - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.8.1</version> - <scope>test</scope> - </dependency> - - </dependencies> -</project> http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/assembly/bin.xml ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/assembly/bin.xml b/morfologik-addon/src/main/assembly/bin.xml deleted file mode 100644 index ab4f6da..0000000 --- a/morfologik-addon/src/main/assembly/bin.xml +++ /dev/null @@ -1,91 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<assembly> - <id>bin</id> - <formats> - <format>tar.gz</format> - <format>zip</format> - <format>dir</format> - </formats> - - <includeBaseDirectory>true</includeBaseDirectory> - <baseDirectory>/apache-opennlp-morfologik-addon-${project.version}</baseDirectory> - - <dependencySets> - <dependencySet> - <scope>runtime</scope> - <unpack>false</unpack> - <useProjectArtifact>false</useProjectArtifact> - <fileMode>644</fileMode> - <directoryMode>755</directoryMode> - <outputDirectory>lib</outputDirectory> - <useTransitiveDependencies>true</useTransitiveDependencies> - </dependencySet> - </dependencySets> - - <fileSets> - <fileSet> - <directory>src/main/readme</directory> - <outputDirectory></outputDirectory> - <fileMode>644</fileMode> - <directoryMode>755</directoryMode> - </fileSet> - - <fileSet> - <directory>.</directory> - <outputDirectory></outputDirectory> - <filtered>true</filtered> - <fileMode>644</fileMode> - <directoryMode>755</directoryMode> - <includes> - <include>README</include> - <include>RELEASE_NOTES.html</include> - </includes> - </fileSet> - - <fileSet> - <directory>target</directory> - <outputDirectory></outputDirectory> - <fileMode>644</fileMode> - <directoryMode>755</directoryMode> - <includes> - <include>issuesFixed/**</include> - </includes> - </fileSet> - - <fileSet> - <directory>src/main/bin</directory> - <fileMode>755</fileMode> - <directoryMode>755</directoryMode> - <outputDirectory>bin</outputDirectory> - </fileSet> - - <fileSet> - <directory>target</directory> - <outputDirectory>lib</outputDirectory> - <includes> - <include>morfologik-addon-*.jar</include> - </includes> - </fileSet> - - </fileSets> -</assembly> http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/assembly/src.xml ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/assembly/src.xml b/morfologik-addon/src/main/assembly/src.xml deleted file mode 100644 index cdcc9d3..0000000 --- a/morfologik-addon/src/main/assembly/src.xml +++ /dev/null @@ -1,39 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<assembly> - <id>src</id> - <formats> - <format>tar.gz</format> - <format>zip</format> - </formats> - - <baseDirectory>/apache-opennlp-${project.version}-src</baseDirectory> - - <fileSets> - <fileSet> - <directory>../</directory> - <outputDirectory></outputDirectory> - <excludes> - <exclude>**/target/**</exclude> - <exclude>**/.*/**</exclude> - <exclude>**/pom.xml.releaseBackup</exclude> - <exclude>**/release.properties</exclude> - </excludes> - </fileSet> - </fileSets> -</assembly> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/bin/morfologik-addon ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/bin/morfologik-addon b/morfologik-addon/src/main/bin/morfologik-addon deleted file mode 100755 index 9b0faf9..0000000 --- a/morfologik-addon/src/main/bin/morfologik-addon +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Note: Do not output anything in this script file, any output -# may be inadvertantly placed in any output files if -# output redirection is used. - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - JAVACMD="$JAVA_HOME/bin/java" - else - JAVACMD="`which java`" - fi -fi - -# Might fail if $0 is a link -OPENNLP_HOME=`dirname "$0"`/.. - -$JAVACMD -Xmx1024m -cp "lib/*" opennlp.morfologik.cmdline.CLI $@ http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/bin/morfologik-addon.bat ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/bin/morfologik-addon.bat b/morfologik-addon/src/main/bin/morfologik-addon.bat deleted file mode 100644 index aeec31f..0000000 --- a/morfologik-addon/src/main/bin/morfologik-addon.bat +++ /dev/null @@ -1,47 +0,0 @@ -@ECHO off - -REM # Licensed to the Apache Software Foundation (ASF) under one -REM # or more contributor license agreements. See the NOTICE file -REM # distributed with this work for additional information -REM # regarding copyright ownership. The ASF licenses this file -REM # to you under the Apache License, Version 2.0 (the -REM # "License"); you may not use this file except in compliance -REM # with the License. You may obtain a copy of the License at -REM # -REM # http://www.apache.org/licenses/LICENSE-2.0 -REM # -REM # Unless required by applicable law or agreed to in writing, -REM # software distributed under the License is distributed on an -REM # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM # KIND, either express or implied. See the License for the -REM # specific language governing permissions and limitations -REM # under the License. - -REM # Note: Do not output anything in this script file, any output -REM # may be inadvertantly placed in any output files if -REM # output redirection is used. -SETLOCAL - -IF "%JAVA_CMD%" == "" ( - IF "%JAVA_HOME%" == "" ( - SET JAVA_CMD=java - ) ELSE ( - REM # Keep JAVA_HOME to short-name without spaces - FOR %%A IN ("%JAVA_HOME%") DO SET JAVA_CMD=%%~sfA\bin\java - ) -) - -REM # Should work with Windows XP and greater. If not, specify the path to where it is installed. -IF "%OPENNLP_HOME%" == "" ( - SET OPENNLP_HOME=%~sp0.. -) ELSE ( - REM # Keep OPENNLP_HOME to short-name without spaces - FOR %%A IN ("%OPENNLP_HOME%") DO SET OPENNLP_HOME=%%~sfA -) - -REM # Get the library JAR file name (JIRA OPENNLP-554) -FOR %%A IN ("%OPENNLP_HOME%\lib\*.jar") DO SET JAR_FILE=%%A - -%JAVA_CMD% -Xmx1024m -jar %JAR_FILE% %* - -ENDLOCAL \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/bin/opennlp-cp ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/bin/opennlp-cp b/morfologik-addon/src/main/bin/opennlp-cp deleted file mode 100755 index dff0d12..0000000 --- a/morfologik-addon/src/main/bin/opennlp-cp +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Note: Do not output anything in this script file, any output -# may be inadvertantly placed in any output files if -# output redirection is used. - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - JAVACMD="$JAVA_HOME/bin/java" - else - JAVACMD="`which java`" - fi -fi - -# Might fail if $0 is a link -OPENNLP_HOME=`dirname "$0"`/.. - -$JAVACMD -Xmx1024m -cp "lib/*" opennlp.tools.cmdline.CLI $@ http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java b/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java deleted file mode 100644 index dbbca4d..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.builder; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.file.Path; -import java.util.Properties; - -import morfologik.stemming.DictionaryMetadata; -import morfologik.stemming.EncoderType; -import morfologik.tools.DictCompile; - -/** - * Utility class to build Morfologik dictionaries from a tab separated values - * file. The first column is the word, the second its lemma and the third a POS - * tag. If there is no lemma information leave the second column empty. - */ -public class MorfologikDictionayBuilder { - - /** - * Helper to compile a morphological dictionary automaton. - * - * @param input - * The input file (base,inflected,tag). An associated metadata - * (*.info) file must exist. - * @param overwrite - * Overwrite the output file if it exists. - * @param validate - * Validate input to make sure it makes sense. - * @param acceptBom - * Accept leading BOM bytes (UTF-8). - * @param acceptCr - * Accept CR bytes in input sequences (\r). - * @param ignoreEmpty - * Ignore empty lines in the input. - * @return the dictionary path - * - * @throws Exception - */ - public Path build(Path input, boolean overwrite, boolean validate, - boolean acceptBom, boolean acceptCr, boolean ignoreEmpty) - throws Exception { - - DictCompile compiler = new DictCompile(input, overwrite, validate, - acceptBom, acceptCr, ignoreEmpty); - compiler.call(); - - - Path metadataPath = DictionaryMetadata - .getExpectedMetadataLocation(input); - - return metadataPath.resolveSibling( - metadataPath.getFileName().toString().replaceAll( - "\\." + DictionaryMetadata.METADATA_FILE_EXTENSION + "$", ".dict")); - } - - /** - * Helper to compile a morphological dictionary automaton using default - * parameters. - * - * @param input - * The input file (base,inflected,tag). An associated metadata - * (*.info) file must exist. - * - * @return the dictionary path - * - * @throws Exception - */ - public Path build(Path input) throws Exception { - - return build(input, true, true, false, false, false); - - } - - Properties createProperties(Charset encoding, String separator, - EncoderType encoderType) throws FileNotFoundException, IOException { - - Properties properties = new Properties(); - properties.setProperty("fsa.dict.separator", separator); - properties.setProperty("fsa.dict.encoding", encoding.name()); - properties.setProperty("fsa.dict.encoder", encoderType.name()); - - return properties; - - } -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java deleted file mode 100644 index f92d178..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.cmdline; - -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import opennlp.morfologik.cmdline.builder.MorfologikDictionaryBuilderTool; -import opennlp.morfologik.cmdline.builder.XMLDictionaryToTableTool; -import opennlp.tools.cmdline.BasicCmdLineTool; -import opennlp.tools.cmdline.CmdLineTool; -import opennlp.tools.cmdline.StreamFactoryRegistry; -import opennlp.tools.cmdline.TerminateToolException; -import opennlp.tools.cmdline.TypedCmdLineTool; -import opennlp.tools.util.Version; - -public final class CLI { - - public static final String CMD = "opennlp-morfologik-addon"; - - private static Map<String, CmdLineTool> toolLookupMap; - - static { - toolLookupMap = new LinkedHashMap<String, CmdLineTool>(); - - List<CmdLineTool> tools = new LinkedList<CmdLineTool>(); - - tools.add(new MorfologikDictionaryBuilderTool()); - tools.add(new XMLDictionaryToTableTool()); - - for (CmdLineTool tool : tools) { - toolLookupMap.put(tool.getName(), tool); - } - - toolLookupMap = Collections.unmodifiableMap(toolLookupMap); - } - - /** - * @return a set which contains all tool names - */ - public static Set<String> getToolNames() { - return toolLookupMap.keySet(); - } - - private static void usage() { - System.out.print("OpenNLP Morfologik Addon " - + Version.currentVersion().toString() + ". "); - System.out.println("Usage: " + CMD + " TOOL"); - System.out.println("where TOOL is one of:"); - - // distance of tool name from line start - int numberOfSpaces = -1; - for (String toolName : toolLookupMap.keySet()) { - if (toolName.length() > numberOfSpaces) { - numberOfSpaces = toolName.length(); - } - } - numberOfSpaces = numberOfSpaces + 4; - - for (CmdLineTool tool : toolLookupMap.values()) { - - System.out.print(" " + tool.getName()); - - for (int i = 0; i < Math.abs(tool.getName().length() - - numberOfSpaces); i++) { - System.out.print(" "); - } - - System.out.println(tool.getShortDescription()); - } - - System.out - .println("All tools print help when invoked with help parameter"); - System.out - .println("Example: opennlp-morfologik-addon POSDictionaryBuilder help"); - } - - - @SuppressWarnings("rawtypes") - public static void main(String[] args) { - - if (args.length == 0) { - usage(); - System.exit(0); - } - - String toolArguments[] = new String[args.length -1]; - System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); - - String toolName = args[0]; - - //check for format - String formatName = StreamFactoryRegistry.DEFAULT_FORMAT; - int idx = toolName.indexOf("."); - if (-1 < idx) { - formatName = toolName.substring(idx + 1); - toolName = toolName.substring(0, idx); - } - CmdLineTool tool = toolLookupMap.get(toolName); - - try { - if (null == tool) { - throw new TerminateToolException(1, "Tool " + toolName + " is not found."); - } - - if ((0 == toolArguments.length && tool.hasParams()) || - 0 < toolArguments.length && "help".equals(toolArguments[0])) { - if (tool instanceof TypedCmdLineTool) { - System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); - } else if (tool instanceof BasicCmdLineTool) { - System.out.println(tool.getHelp()); - } - - System.exit(0); - } - - if (tool instanceof TypedCmdLineTool) { - ((TypedCmdLineTool) tool).run(formatName, toolArguments); - } else if (tool instanceof BasicCmdLineTool) { - if (-1 == idx) { - ((BasicCmdLineTool) tool).run(toolArguments); - } else { - throw new TerminateToolException(1, "Tool " + toolName + " does not support formats."); - } - } else { - throw new TerminateToolException(1, "Tool " + toolName + " is not supported."); - } - } - catch (TerminateToolException e) { - - if (e.getMessage() != null) { - System.err.println(e.getMessage()); - } - - if (e.getCause() != null) { - System.err.println(e.getCause().getMessage()); - e.getCause().printStackTrace(System.err); - } - - System.exit(e.getCode()); - } - } - - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java deleted file mode 100644 index 5ea2e4f..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.cmdline.builder; - -import java.io.File; - -import opennlp.tools.cmdline.ArgumentParser.OptionalParameter; -import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; -import opennlp.tools.cmdline.params.EncodingParameter; - -/** - * Params for Dictionary tools. - */ -interface MorfologikDictionaryBuilderParams extends EncodingParameter { - - @ParameterDescription(valueName = "in", description = "The input file (base,inflected,tag). An associated metadata (*.info) file must exist.") - File getInputFile(); - - @ParameterDescription(valueName = "true|false", description = "Accept leading BOM bytes (UTF-8).") - @OptionalParameter(defaultValue="false") - Boolean getAcceptBOM(); - - @ParameterDescription(valueName = "true|false", description = "Accept CR bytes in input sequences (\r).") - @OptionalParameter(defaultValue="false") - Boolean getAcceptCR(); - - @ParameterDescription(valueName = "FSA5|CFSA2", description = "Automaton serialization format.") - @OptionalParameter(defaultValue="FSA5") - String getFormat(); - - @ParameterDescription(valueName = "true|false", description = "Ignore empty lines in the input.") - @OptionalParameter(defaultValue="false") - Boolean getIgnoreEmpty(); - - @ParameterDescription(valueName = "true|false", description = "Overwrite the output file if it exists.") - @OptionalParameter(defaultValue="false") - Boolean getOverwrite(); - - @ParameterDescription(valueName = "true|false", description = "Validate input to make sure it makes sense.") - @OptionalParameter(defaultValue="false") - Boolean getValidate(); -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java deleted file mode 100644 index eb9b51c..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.cmdline.builder; - -import java.io.File; -import java.nio.file.Path; - -import morfologik.stemming.DictionaryMetadata; -import opennlp.morfologik.builder.MorfologikDictionayBuilder; -import opennlp.tools.cmdline.BasicCmdLineTool; -import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; - -public class MorfologikDictionaryBuilderTool extends BasicCmdLineTool { - - interface Params extends MorfologikDictionaryBuilderParams { - } - - public String getShortDescription() { - return "builds a binary POS Dictionary using Morfologik"; - } - - public String getHelp() { - return getBasicHelp(Params.class); - } - - public void run(String[] args) { - Params params = validateAndParseParams(args, Params.class); - - File dictInFile = params.getInputFile(); - - CmdLineUtil.checkInputFile("dictionary input file", dictInFile); - Path metadataPath = DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath()); - CmdLineUtil.checkInputFile("dictionary metadata (.info) input file", metadataPath.toFile()); - - MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); - try { - builder.build(dictInFile.toPath(), params.getOverwrite(), - params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(), - params.getIgnoreEmpty()); - } catch (Exception e) { - throw new TerminateToolException(-1, - "Error while creating Morfologik POS Dictionay: " + e.getMessage(), e); - } - - } -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java deleted file mode 100644 index 4ee8cd4..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.cmdline.builder; - -import java.io.File; - -import opennlp.tools.cmdline.ArgumentParser.OptionalParameter; -import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; -import opennlp.tools.cmdline.params.EncodingParameter; - -/** - * Params for Dictionary tools. - */ -interface XMLDictionaryToTableParams extends EncodingParameter { - - @ParameterDescription(valueName = "in", description = "OpenNLP XML Tag Dictionary.") - File getInputFile(); - - @ParameterDescription(valueName = "out", description = "Output for Morfologik (.info will be also created).") - File getOutputFile(); - - @ParameterDescription(valueName = "char", description = "Columm separator (must be a single character)") - @OptionalParameter(defaultValue=",") - String getSeparator(); - - @ParameterDescription(valueName = "value", description = " Type of lemma-inflected form encoding compression that precedes automaton construction. Allowed values: [suffix, infix, prefix, none].") - @OptionalParameter(defaultValue="prefix") - String getEncoder(); - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java deleted file mode 100644 index 0e7f2d5..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.cmdline.builder; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Iterator; -import java.util.Properties; - -import morfologik.stemming.DictionaryMetadata; -import opennlp.tools.cmdline.BasicCmdLineTool; -import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; -import opennlp.tools.postag.POSDictionary; - -public class XMLDictionaryToTableTool extends BasicCmdLineTool { - - interface Params extends XMLDictionaryToTableParams { - } - - private String SEPARATOR; - - public String getShortDescription() { - return "reads an OpenNLP XML tag dictionary and outputs it in a tab separated file"; - } - - public String getHelp() { - return getBasicHelp(Params.class); - } - - public void run(String[] args) { - Params params = validateAndParseParams(args, Params.class); - - File dictInFile = params.getInputFile(); - File dictOutFile = params.getOutputFile(); - Charset encoding = params.getEncoding(); - SEPARATOR = params.getSeparator(); - - CmdLineUtil.checkInputFile("dictionary input file", dictInFile); - CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile); - - POSDictionary tagDictionary = null; - try { - tagDictionary = POSDictionary.create(new FileInputStream(dictInFile)); - } catch (IOException e) { - throw new TerminateToolException(-1, - "Error while loading XML POS Dictionay: " + e.getMessage(), e); - } - Iterator<String> iterator = tagDictionary.iterator(); - - try (BufferedWriter writer = Files.newBufferedWriter(dictOutFile.toPath(), - encoding)) { - while (iterator.hasNext()) { - String word = iterator.next(); - for (String tag : tagDictionary.getTags(word)) { - if(valid(word,tag)) { - String entry = createEntry(word, tag); - writer.write(entry); - writer.newLine(); - } - } - } - writer.close(); - System.out.println("Created dictionary: " + dictOutFile.toPath()); - } catch (IOException e) { - throw new TerminateToolException(-1, "Error while writing output: " - + e.getMessage(), e); - } - - Properties info = new Properties(); - info.setProperty("fsa.dict.separator", SEPARATOR); - info.setProperty("fsa.dict.encoding", params.getEncoding().name()); - info.setProperty("fsa.dict.encoder", params.getEncoder()); - - Path metaPath = DictionaryMetadata.getExpectedMetadataLocation(dictOutFile.toPath()); - - try { - info.store(Files.newOutputStream(metaPath), "Info file for FSA Morfologik dictionary."); - } catch (IOException e) { - throw new TerminateToolException(-1, "Error while writing metadata output: " - + e.getMessage(), e); - } - System.out.println("Created metadata: " + dictOutFile.toPath()); - - } - - private boolean valid(String word, String tag) { - if(word.contains(SEPARATOR) || tag.contains(SEPARATOR)) { - System.out - .println("Warn: invalid entry because contains separator - word: " - + word + " tag: " + tag); - return false; - } - - return true; - } - - private String createEntry(String word, String tag) { - - String entry = "" + SEPARATOR +// base - word + SEPARATOR + - tag; - - return entry; - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java deleted file mode 100644 index 2090ce5..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.lemmatizer; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import morfologik.stemming.Dictionary; -import morfologik.stemming.DictionaryLookup; -import morfologik.stemming.IStemmer; -import morfologik.stemming.WordData; -import opennlp.tools.lemmatizer.DictionaryLemmatizer; - -public class MorfologikLemmatizer implements DictionaryLemmatizer { - - private IStemmer dictLookup; - public final Set<String> constantTags = new HashSet<String>(Arrays.asList( - "NNP", "NP00000")); - - public MorfologikLemmatizer(Path dictionaryPath) throws IllegalArgumentException, - IOException { - dictLookup = new DictionaryLookup(Dictionary.read(dictionaryPath)); - } - - private HashMap<List<String>, String> getLemmaTagsDict(String word) { - List<WordData> wdList = dictLookup.lookup(word); - HashMap<List<String>, String> dictMap = new HashMap<List<String>, String>(); - for (WordData wd : wdList) { - List<String> wordLemmaTags = new ArrayList<String>(); - wordLemmaTags.add(word); - wordLemmaTags.add(wd.getTag().toString()); - dictMap.put(wordLemmaTags, wd.getStem().toString()); - } - return dictMap; - } - - private List<String> getDictKeys(String word, String postag) { - List<String> keys = new ArrayList<String>(); - if (constantTags.contains(postag)) { - keys.addAll(Arrays.asList(word, postag)); - } else { - keys.addAll(Arrays.asList(word.toLowerCase(), postag)); - } - return keys; - } - - private HashMap<List<String>, String> getDictMap(String word, String postag) { - HashMap<List<String>, String> dictMap = new HashMap<List<String>, String>(); - - if (constantTags.contains(postag)) { - dictMap = this.getLemmaTagsDict(word); - } else { - dictMap = this.getLemmaTagsDict(word.toLowerCase()); - } - return dictMap; - } - - public String lemmatize(String word, String postag) { - String lemma = null; - List<String> keys = this.getDictKeys(word, postag); - HashMap<List<String>, String> dictMap = this.getDictMap(word, postag); - // lookup lemma as value of the map - String keyValue = dictMap.get(keys); - if (keyValue != null) { - lemma = keyValue; - } else if (keyValue == null && constantTags.contains(postag)) { - lemma = word; - } else if (keyValue == null && word.toUpperCase() == word) { - lemma = word; - } else { - lemma = word.toLowerCase(); - } - return lemma; - } -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java deleted file mode 100644 index 93d6c61..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.tagdict; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Map; - -import morfologik.stemming.DictionaryMetadata; -import opennlp.tools.dictionary.Dictionary; -import opennlp.tools.postag.POSTaggerFactory; -import opennlp.tools.postag.TagDictionary; -import opennlp.tools.util.InvalidFormatException; -import opennlp.tools.util.model.ArtifactSerializer; -import opennlp.tools.util.model.ModelUtil; - -public class MorfologikPOSTaggerFactory extends POSTaggerFactory { - - private static final String MORFOLOGIK_POSDICT_SUF = "morfologik_dict"; - private static final String MORFOLOGIK_DICT_INFO_SUF = "morfologik_info"; - - private static final String MORFOLOGIK_POSDICT = "tagdict." - + MORFOLOGIK_POSDICT_SUF; - private static final String MORFOLOGIK_DICT_INFO = "tagdict." - + MORFOLOGIK_DICT_INFO_SUF; - - private TagDictionary dict; - - private byte[] dictInfo; - private byte[] dictData; - - public MorfologikPOSTaggerFactory() { - } - - public TagDictionary createTagDictionary(File dictionary) - throws InvalidFormatException, FileNotFoundException, IOException { - - if(!dictionary.canRead()) { - throw new FileNotFoundException("Could not read dictionary: " + dictionary.getAbsolutePath()); - } - - Path dictionaryMeta = DictionaryMetadata.getExpectedMetadataLocation(dictionary.toPath()); - - if(dictionaryMeta == null || !dictionaryMeta.toFile().canRead()) { - throw new FileNotFoundException("Could not read dictionary metadata: " + dictionaryMeta.getFileName()); - } - - this.dictData = Files.readAllBytes(dictionary.toPath()); - this.dictInfo = Files.readAllBytes(dictionaryMeta); - - return createMorfologikDictionary(dictData, dictInfo); - - } - - - @Override - protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) { - super.init(ngramDictionary, null); - this.dict = posDictionary; - } - - @Override - public TagDictionary getTagDictionary() { - if (this.dict == null) { - - if (artifactProvider != null) { - Object obj = artifactProvider.getArtifact(MORFOLOGIK_POSDICT); - if (obj != null) { - byte[] data = (byte[]) artifactProvider - .getArtifact(MORFOLOGIK_POSDICT); - byte[] info = (byte[]) artifactProvider - .getArtifact(MORFOLOGIK_DICT_INFO); - - try { - this.dict = createMorfologikDictionary(data, info); - } catch (IllegalArgumentException e) { - throw new RuntimeException( - "Could not load the dictionary files to Morfologik.", e); - } catch (IOException e) { - throw new RuntimeException( - "IO error while reading the Morfologik dictionary files.", e); - } - } - } - } - - return this.dict; - } - - @Override - public void setTagDictionary(TagDictionary dictionary) { - this.dict = dictionary; - } - - @Override - public TagDictionary createEmptyTagDictionary() { - throw new UnsupportedOperationException( - "Morfologik POS Tagger factory does not support this operation"); - } - - @Override - public TagDictionary createTagDictionary(InputStream in) - throws InvalidFormatException, IOException { - throw new UnsupportedOperationException( - "Morfologik POS Tagger factory does not support this operation"); - } - - @Override - @SuppressWarnings("rawtypes") - public Map<String, ArtifactSerializer> createArtifactSerializersMap() { - Map<String, ArtifactSerializer> serializers = super - .createArtifactSerializersMap(); - - serializers.put(MORFOLOGIK_POSDICT_SUF, new ByteArraySerializer()); - serializers.put(MORFOLOGIK_DICT_INFO_SUF, new ByteArraySerializer()); - - return serializers; - } - - @Override - public Map<String, Object> createArtifactMap() { - Map<String, Object> artifactMap = super.createArtifactMap(); - artifactMap.put(MORFOLOGIK_POSDICT, this.dictData); - artifactMap.put(MORFOLOGIK_DICT_INFO, this.dictInfo); - return artifactMap; - } - - private TagDictionary createMorfologikDictionary(byte[] data, byte[] info) - throws IOException { - morfologik.stemming.Dictionary dict = morfologik.stemming.Dictionary - .read(new ByteArrayInputStream(data), new ByteArrayInputStream( - info)); - return new MorfologikTagDictionary(dict); - } - - static class ByteArraySerializer implements ArtifactSerializer<byte[]> { - - public byte[] create(InputStream in) throws IOException, - InvalidFormatException { - - return ModelUtil.read(in); - } - - public void serialize(byte[] artifact, OutputStream out) throws IOException { - out.write(artifact); - } - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java deleted file mode 100644 index b34ca2b..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.tagdict; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import morfologik.stemming.Dictionary; -import morfologik.stemming.DictionaryLookup; -import morfologik.stemming.IStemmer; -import morfologik.stemming.WordData; -import opennlp.tools.postag.TagDictionary; - -/** - * A POS Tagger dictionary implementation based on Morfologik binary - * dictionaries - */ -public class MorfologikTagDictionary implements TagDictionary { - - private IStemmer dictLookup; - private boolean isCaseSensitive; - - /** - * Creates a case sensitive {@link MorfologikTagDictionary} - * - * @param dict - * a Morfologik FSA dictionary - * @throws IllegalArgumentException - * if FSA's root node cannot be acquired (dictionary is empty). - * @throws IOException - * could not read dictionary from dictURL - */ - public MorfologikTagDictionary(Dictionary dict) - throws IllegalArgumentException, IOException { - this(dict, true); - } - - /** - * Creates MorfologikLemmatizer - * - * @param dict - * a Morfologik FSA dictionary - * @param caseSensitive - * if true it performs case sensitive lookup - * @throws IllegalArgumentException - * if FSA's root node cannot be acquired (dictionary is empty). - * @throws IOException - * could not read dictionary from dictURL - */ - public MorfologikTagDictionary(Dictionary dict, boolean caseSensitive) - throws IllegalArgumentException, IOException { - this.dictLookup = new DictionaryLookup(dict); - this.isCaseSensitive = caseSensitive; - } - - @Override - public String[] getTags(String word) { - if (!isCaseSensitive) { - word = word.toLowerCase(); - } - - List<WordData> data = dictLookup.lookup(word); - if (data != null && data.size() > 0) { - List<String> tags = new ArrayList<String>(data.size()); - for (int i = 0; i < data.size(); i++) { - tags.add(data.get(i).getTag().toString()); - } - if (tags.size() > 0) - return tags.toArray(new String[tags.size()]); - return null; - } - return null; - } -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java b/morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java deleted file mode 100644 index bd4d1a4..0000000 --- a/morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.util; - -import java.io.File; - -import morfologik.stemming.DictionaryMetadata; - -public class MorfologikUtil { - - public static File getExpectedPropertiesFile(File dictFile) { - return DictionaryMetadata.getExpectedMetadataLocation(dictFile.toPath()) - .toFile(); - } - - public static File getExpectedPropertiesFile(String dictFile) { - File f = new File(dictFile); - return getExpectedPropertiesFile(f); - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/readme/LICENSE ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/readme/LICENSE b/morfologik-addon/src/main/readme/LICENSE deleted file mode 100644 index 576b4cf..0000000 --- a/morfologik-addon/src/main/readme/LICENSE +++ /dev/null @@ -1,230 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -The following license applies to the Snowball stemmers: - - Copyright (c) 2001, Dr Martin Porter - Copyright (c) 2002, Richard Boulton - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holders nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE b/morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE deleted file mode 100644 index 0554010..0000000 --- a/morfologik-addon/src/main/readme/MORFOLOGIK-LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2006 Dawid Weiss -Copyright (c) 2007-2015 Dawid Weiss, Marcin MiÅkowski -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name of Morfologik nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/main/readme/NOTICE ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/main/readme/NOTICE b/morfologik-addon/src/main/readme/NOTICE deleted file mode 100644 index 73fb1d7..0000000 --- a/morfologik-addon/src/main/readme/NOTICE +++ /dev/null @@ -1,11 +0,0 @@ -Apache OpenNLP -Copyright 2010, 2013 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -The snowball stemmers in -opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball -were developed by Martin Porter and Richard Boulton. -The full snowball package is available from -http://snowball.tartarus.org/ http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java deleted file mode 100644 index 0a7ba48..0000000 --- a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.builder; - -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; - -import junit.framework.TestCase; -import morfologik.stemming.DictionaryMetadata; -import opennlp.morfologik.lemmatizer.MorfologikLemmatizer; - -import org.junit.Test; - -public class POSDictionayBuilderTest extends TestCase { - - @Test - public void testBuildDictionary() throws Exception { - - Path output = createMorfologikDictionary(); - - MorfologikLemmatizer ml = new MorfologikLemmatizer(output); - - assertNotNull(ml); - } - - public static Path createMorfologikDictionary() throws Exception { - Path tabFilePath = File.createTempFile( - POSDictionayBuilderTest.class.getName(), ".txt").toPath(); - Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath); - - Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( - "/dictionaryWithLemma.txt"), tabFilePath, StandardCopyOption.REPLACE_EXISTING); - Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( - "/dictionaryWithLemma.info"), infoFilePath, StandardCopyOption.REPLACE_EXISTING); - - MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); - - return builder.build(tabFilePath); - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java deleted file mode 100644 index 6b7525e..0000000 --- a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java +++ /dev/null @@ -1,35 +0,0 @@ -package opennlp.morfologik.lemmatizer; - -import static org.junit.Assert.assertEquals; - -import java.nio.file.Path; - -import opennlp.morfologik.builder.POSDictionayBuilderTest; -import opennlp.tools.lemmatizer.DictionaryLemmatizer; - -import org.junit.Test; - -public class MorfologikLemmatizerTest { - - @Test - public void testLemmatizeInsensitive() throws Exception { - DictionaryLemmatizer dict = createDictionary(false); - - assertEquals("casar", dict.lemmatize("casa", "V")); - assertEquals("casa", dict.lemmatize("casa", "NOUN")); - - assertEquals("casa", dict.lemmatize("Casa", "PROP")); - - } - - private MorfologikLemmatizer createDictionary(boolean caseSensitive) - throws Exception { - - Path output = POSDictionayBuilderTest.createMorfologikDictionary(); - - MorfologikLemmatizer ml = new MorfologikLemmatizer(output); - - return ml; - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java deleted file mode 100644 index c6c9e04..0000000 --- a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java +++ /dev/null @@ -1,78 +0,0 @@ -package opennlp.morfologik.tagdict; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.Arrays; -import java.util.List; - -import morfologik.stemming.Dictionary; -import opennlp.morfologik.builder.POSDictionayBuilderTest; -import opennlp.tools.postag.TagDictionary; - -import org.junit.Test; - -public class MorfologikTagDictionaryTest { - - @Test - public void testNoLemma() throws Exception { - MorfologikTagDictionary dict = createDictionary(false); - - List<String> tags = Arrays.asList(dict.getTags("carro")); - assertEquals(1, tags.size()); - assertTrue(tags.contains("NOUN")); - - } - - @Test - public void testPOSDictionaryInsensitive() throws Exception { - TagDictionary dict = createDictionary(false); - - List<String> tags = Arrays.asList(dict.getTags("casa")); - assertEquals(2, tags.size()); - assertTrue(tags.contains("NOUN")); - assertTrue(tags.contains("V")); - - // this is the behavior of case insensitive dictionary - // if we search it using case insensitive, Casa as a proper noun - // should be lower case in the dictionary - tags = Arrays.asList(dict.getTags("Casa")); - assertEquals(2, tags.size()); - assertTrue(tags.contains("NOUN")); - assertTrue(tags.contains("V")); - - } - - @Test - public void testPOSDictionarySensitive() throws Exception { - TagDictionary dict = createDictionary(true); - - List<String> tags = Arrays.asList(dict.getTags("casa")); - assertEquals(2, tags.size()); - assertTrue(tags.contains("NOUN")); - assertTrue(tags.contains("V")); - - // this is the behavior of case insensitive dictionary - // if we search it using case insensitive, Casa as a proper noun - // should be lower case in the dictionary - tags = Arrays.asList(dict.getTags("Casa")); - assertEquals(1, tags.size()); - assertTrue(tags.contains("PROP")); - - } - - private MorfologikTagDictionary createDictionary(boolean caseSensitive) - throws Exception { - return this.createDictionary(caseSensitive, null); - } - - private MorfologikTagDictionary createDictionary(boolean caseSensitive, - List<String> constant) throws Exception { - - Dictionary dic = Dictionary.read(POSDictionayBuilderTest.createMorfologikDictionary()); - MorfologikTagDictionary ml = new MorfologikTagDictionary(dic, caseSensitive); - - return ml; - } - -} http://git-wip-us.apache.org/repos/asf/opennlp-addons/blob/e2f3db75/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java ---------------------------------------------------------------------- diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java deleted file mode 100644 index 7341a02..0000000 --- a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.morfologik.tagdict; - -import static org.junit.Assert.*; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.file.Path; - -import opennlp.morfologik.builder.POSDictionayBuilderTest; -import opennlp.tools.postag.POSModel; -import opennlp.tools.postag.POSSample; -import opennlp.tools.postag.POSTaggerFactory; -import opennlp.tools.postag.POSTaggerME; -import opennlp.tools.postag.TagDictionary; -import opennlp.tools.postag.WordTagSampleStream; -import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.TrainingParameters; -import opennlp.tools.util.model.ModelType; - -import org.junit.Test; - -/** - * Tests for the {@link POSTaggerFactory} class. - */ -public class POSTaggerFactoryTest { - - private static ObjectStream<POSSample> createSampleStream() - throws IOException { - InputStream in = POSTaggerFactoryTest.class.getClassLoader() - .getResourceAsStream("AnnotatedSentences.txt"); - - return new WordTagSampleStream((new InputStreamReader(in))); - } - - static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory) - throws IOException { - return POSTaggerME.train("en", createSampleStream(), - TrainingParameters.defaultParams(), factory); - } - - @Test - public void testPOSTaggerWithCustomFactory() throws Exception { - - Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary(); - POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory(); - TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile()); - inFactory.setTagDictionary(inDict); - - POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory); - - POSTaggerFactory factory = posModel.getFactory(); - assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); - - factory = null; - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - posModel.serialize(out); - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - - POSModel fromSerialized = new POSModel(in); - - factory = fromSerialized.getFactory(); - assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); - - assertEquals(2, factory.getTagDictionary().getTags("casa").length); - } - -} \ No newline at end of file
