[lucenenet] 18/20: Ported Lucene.Net.Analysis.Morfologik + tests

nightowl888 Tue, 03 Dec 2019 06:04:32 -0800

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


commit 71fcc19af0010baba7f7ccea4d6c91e29addde89
Author: Shad Storhaug <[email protected]>
AuthorDate: Sat Nov 23 05:00:44 2019 +0700

    Ported Lucene.Net.Analysis.Morfologik + tests
---
 Lucene.Net.sln                                     |   16 +-
 build/Dependencies.props                           |    3 +
 .../publish-test-results-for-test-projects.yml     |   20 +
 build/build.ps1                                    |   12 +
 global.json                                        |    9 +-
 .../Lucene.Net.Analysis.Morfologik.csproj          |   68 ++
 .../Morfologik/MorfologikAnalyzer.cs               |   79 ++
 .../Morfologik/MorfologikFilter.cs                 |  183 +++
 .../Morfologik/MorfologikFilterFactory.cs          |  105 ++
 .../IMorphosyntacticTagsAttribute.cs               |   44 +
 .../MorphosyntacticTagsAttribute.cs                |  105 ++
 src/Lucene.Net.Analysis.Morfologik/Uk/README       |   11 +
 .../Uk/UkrainianMorfologikAnalyzer.cs              |  177 +++
 .../Uk/mapping_uk.txt                              |   19 +
 .../Uk/stopwords.txt                               | 1269 ++++++++++++++++++++
 src/Lucene.Net.Analysis.Morfologik/Uk/tagset.txt   |  170 +++
 .../Uk/ukrainian.dict                              |  Bin 0 -> 6929502 bytes
 .../Uk/ukrainian.info                              |   10 +
 .../Properties/AssemblyInfo.cs                     |    2 +
 .../Lucene.Net.Tests.Analysis.Morfologik.csproj    |   47 +
 .../Morfologik/TestMorfologikAnalyzer.cs           |  235 ++++
 .../Morfologik/TestMorfologikFilterFactory.cs      |  107 ++
 .../Morfologik/custom-dictionary.dict              |  Bin 0 -> 90 bytes
 .../Morfologik/custom-dictionary.info              |   24 +
 .../Morfologik/custom-dictionary.input             |    2 +
 .../Uk/TestUkrainianAnalyzer.cs                    |   92 ++
 26 files changed, 2804 insertions(+), 5 deletions(-)

diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index 192f434..92655ec 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -193,9 +193,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = 
"Lucene.Net.Tests.TestFramew
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = 
"Lucene.Net.TestFramework.MSTest", 
"src\Lucene.Net.TestFramework.MSTest\Lucene.Net.TestFramework.MSTest.csproj", 
"{48520313-3B78-40D9-AE34-4864BFADF747}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Analysis.OpenNLP", 
"src\Lucene.Net.Analysis.OpenNLP\Lucene.Net.Analysis.OpenNLP.csproj", 
"{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = 
"Lucene.Net.Analysis.OpenNLP", 
"src\Lucene.Net.Analysis.OpenNLP\Lucene.Net.Analysis.OpenNLP.csproj", 
"{CC2CE069-5BBB-429E-8510-7C3FBA8069D5}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Tests.Analysis.OpenNLP", 
"src\Lucene.Net.Tests.Analysis.OpenNLP\Lucene.Net.Tests.Analysis.OpenNLP.csproj",
 "{88D6D124-711D-4232-AD70-F22AB6AF9EA1}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = 
"Lucene.Net.Tests.Analysis.OpenNLP", 
"src\Lucene.Net.Tests.Analysis.OpenNLP\Lucene.Net.Tests.Analysis.OpenNLP.csproj",
 "{88D6D124-711D-4232-AD70-F22AB6AF9EA1}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = 
"Lucene.Net.Analysis.Morfologik", 
"src\Lucene.Net.Analysis.Morfologik\Lucene.Net.Analysis.Morfologik.csproj", 
"{17C7E54C-7A95-46A5-9905-90F68D349F3F}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Lucene.Net.Tests.Analysis.Morfologik", 
"src\Lucene.Net.Tests.Analysis.Morfologik\Lucene.Net.Tests.Analysis.Morfologik.csproj",
 "{435F91AD-8BA4-4376-904C-385A165C1AF0}"
 EndProject
 Global
        GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -451,6 +455,14 @@ Global
                {88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
                {88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
                {88D6D124-711D-4232-AD70-F22AB6AF9EA1}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {17C7E54C-7A95-46A5-9905-90F68D349F3F}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {17C7E54C-7A95-46A5-9905-90F68D349F3F}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {17C7E54C-7A95-46A5-9905-90F68D349F3F}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {17C7E54C-7A95-46A5-9905-90F68D349F3F}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {435F91AD-8BA4-4376-904C-385A165C1AF0}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {435F91AD-8BA4-4376-904C-385A165C1AF0}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {435F91AD-8BA4-4376-904C-385A165C1AF0}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {435F91AD-8BA4-4376-904C-385A165C1AF0}.Release|Any CPU.Build.0 
= Release|Any CPU
        EndGlobalSection
        GlobalSection(SolutionProperties) = preSolution
                HideSolutionNode = FALSE
diff --git a/build/Dependencies.props b/build/Dependencies.props
index 6852efd..c9be589 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -43,6 +43,9 @@
     <MicrosoftCSharpPackageVersion>4.4.0</MicrosoftCSharpPackageVersion>
     
<MicrosoftExtensionsDependencyModelPackageVersion>2.0.0</MicrosoftExtensionsDependencyModelPackageVersion>
     
<MicrosoftNETTestSdkPackageVersion>16.2.0</MicrosoftNETTestSdkPackageVersion>
+    <MorfologikFsaPackageVersion>2.1.6-alpha-0001</MorfologikFsaPackageVersion>
+    
<MorfologikPolishPackageVersion>$(MorfologikFsaPackageVersion)</MorfologikPolishPackageVersion>
+    
<MorfologikStemmingPackageVersion>$(MorfologikFsaPackageVersion)</MorfologikStemmingPackageVersion>
     
<MSTestTestFrameworkPackageVersion>2.0.0</MSTestTestFrameworkPackageVersion>
     
<MSTestTestAdapterPackageVersion>$(MSTestTestFrameworkPackageVersion)</MSTestTestAdapterPackageVersion>
     
<NETStandardLibrary16PackageVersion>1.6.1</NETStandardLibrary16PackageVersion>
diff --git a/build/azure-templates/publish-test-results-for-test-projects.yml 
b/build/azure-templates/publish-test-results-for-test-projects.yml
index 1b61c3c..cdc53aa 100644
--- a/build/azure-templates/publish-test-results-for-test-projects.yml
+++ b/build/azure-templates/publish-test-results-for-test-projects.yml
@@ -91,6 +91,26 @@ steps:
     testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
     testResultsFileName: '${{ parameters.testResultsFileName }}'
 
+# Special case: Doesn't support .netcoreapp1.1
+- template: publish-test-results.yml
+  parameters:
+    framework: 'netcoreapp2.1'
+    testProjectName: 'Lucene.Net.Tests.Analysis.Morfologik'
+    osName: '${{ parameters.osName }}'
+    testResultsFormat: '${{ parameters.testResultsFormat }}'
+    testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
+    testResultsFileName: '${{ parameters.testResultsFileName }}'
+
+- template: publish-test-results.yml
+  parameters:
+    framework: 'net451'
+    testProjectName: 'Lucene.Net.Tests.Analysis.Morfologik'
+    osName: '${{ parameters.osName }}'
+    testResultsFormat: '${{ parameters.testResultsFormat }}'
+    testResultsArtifactName: '${{ parameters.testResultsArtifactName }}'
+    testResultsFileName: '${{ parameters.testResultsFileName }}'
+
+
 - template: publish-test-results-for-target-frameworks.yml
   parameters:
     testProjectName: 'Lucene.Net.Tests._A-D'
diff --git a/build/build.ps1 b/build/build.ps1
index d9725b6..e31d1a1 100644
--- a/build/build.ps1
+++ b/build/build.ps1
@@ -220,6 +220,11 @@ task Publish -depends Compile -description "This task uses 
dotnet publish to pac
                                        continue
                                }
 
+                               # Special case - Morfologik doesn't support 
.NET Standard 1.x
+                               if 
($projectName.Contains("Tests.Analysis.Morfologik") -and 
($framework.StartsWith("netcoreapp1."))) {
+                                       continue
+                               }
+
                                $logPath = "$outDirectory/$framework"
                                $outputPath = "$logPath/$projectName"
 
@@ -304,6 +309,13 @@ task Test -depends InstallSDK, UpdateLocalSDKVersion, 
Restore -description "This
                                continue
                        }
 
+                       # Special case - Morfologik doesn't support .NET 
Standard 1.x
+                       if ($testName.Contains("Tests.Analysis.Morfologik") 
-and ($framework.StartsWith("netcoreapp1."))) {
+                               $totalProjects--
+                               $remainingProjects--
+                               continue
+                       }
+
                        Write-Host "  Next Project in Queue: $testName, 
Framework: $framework" -ForegroundColor Yellow
 
                        # Pause if we have queued too many parallel jobs
diff --git a/global.json b/global.json
index ce063d2..cccadab 100644
--- a/global.json
+++ b/global.json
@@ -1,3 +1,6 @@
-{
-    "sources": [ "src" ]
-}
\ No newline at end of file
+{
+  "sources": [ "src" ],
+  "sdk": {
+    "version": "2.2.300"
+  }
+}
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Lucene.Net.Analysis.Morfologik.csproj 
b/src/Lucene.Net.Analysis.Morfologik/Lucene.Net.Analysis.Morfologik.csproj
new file mode 100644
index 0000000..10b82ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Lucene.Net.Analysis.Morfologik.csproj
@@ -0,0 +1,68 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <Import Project="$(SolutionDir)build/NuGet.props" />
+
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0</TargetFrameworks>
+    <TargetFrameworks 
Condition="$([MSBuild]::IsOsPlatform('Windows'))">$(TargetFrameworks);net451</TargetFrameworks>
+    <PackageTargetFallback Condition=" '$(TargetFramework)' == 
'netstandard1.6' ">$(PackageTargetFallback);dnxcore50</PackageTargetFallback>
+
+    <AssemblyTitle>Lucene.Net.Analysis.Morfologik</AssemblyTitle>
+    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
+    <Description>Japanese Morphological Analyzer for the Lucene.Net full-text 
search engine library from The Apache Software Foundation.</Description>
+    <PackageTags>$(PackageTags);analysis;japanese</PackageTags>
+    
<DocumentationFile>bin\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml</DocumentationFile>
+    <NoWarn>$(NoWarn);1591;1573</NoWarn>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Remove="Uk\mapping_uk.txt" />
+    <None Remove="Uk\README" />
+    <None Remove="Uk\stopwords.txt" />
+    <None Remove="Uk\tagset.txt" />
+    <None Remove="Uk\ukrainian.dict" />
+    <None Remove="Uk\ukrainian.info" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Compile Include="..\CommonAssemblyKeys.cs" 
Link="Properties\CommonAssemblyKeys.cs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <EmbeddedResource Include="Uk\mapping_uk.txt" />
+    <EmbeddedResource Include="Uk\README" />
+    <EmbeddedResource Include="Uk\stopwords.txt" />
+    <EmbeddedResource Include="Uk\tagset.txt" />
+    <EmbeddedResource Include="Uk\ukrainian.dict" />
+    <EmbeddedResource Include="Uk\ukrainian.info" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Morfologik.Fsa" 
Version="$(MorfologikFsaPackageVersion)" />
+    <PackageReference Include="Morfologik.Polish" 
Version="$(MorfologikPolishPackageVersion)" />
+    <PackageReference Include="Morfologik.Stemming" 
Version="$(MorfologikStemmingPackageVersion)" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference 
Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj" />
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" />
+  </ItemGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netstandard1.6' ">
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'net45' ">
+    <DebugType>full</DebugType>
+  </PropertyGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net45' ">
+    <Reference Include="System" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+
+</Project>
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikAnalyzer.cs 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikAnalyzer.cs
new file mode 100644
index 0000000..9147da2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikAnalyzer.cs
@@ -0,0 +1,79 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Util;
+using Morfologik.Stemming;
+using Morfologik.Stemming.Polish;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Morfologik
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <see cref="Analyzer"/> using Morfologik library.
+    /// <para/>
+    /// See: <a href="http://morfologik.blogspot.com/";>Morfologik project 
page</a>
+    /// </summary>
+    /// <since>4.0.0</since>
+    public class MorfologikAnalyzer : Analyzer
+    {
+        private readonly Dictionary dictionary;
+        private readonly LuceneVersion version;
+
+        /// <summary>
+        /// Builds an analyzer with an explicit <see cref="Dictionary"/> 
resource.
+        /// <para/>
+        /// See: <a 
href="https://github.com/morfologik/";>https://github.com/morfologik/</a>
+        /// </summary>
+        /// <param name="version">Lucene compatibility version</param>
+        /// <param name="dictionary">A prebuilt automaton with inflected and 
base word forms.</param>
+        public MorfologikAnalyzer(LuceneVersion version, Dictionary dictionary)
+        {
+            this.version = version;
+            this.dictionary = dictionary;
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default Morfologik's Polish dictionary.
+        /// </summary>
+        /// <param name="version">Lucene compatibility version</param>
+        public MorfologikAnalyzer(LuceneVersion version)
+            : this(version, new PolishStemmer().Dictionary)
+        {
+        }
+
+        /// <summary>
+        /// Creates a <see cref="TokenStreamComponents"/>
+        /// which tokenizes all the text in the provided <paramref 
name="reader"/>.
+        /// </summary>
+        /// <param name="fieldName">Ignored field name.</param>
+        /// <param name="reader">Source of tokens.</param>
+        /// <returns>A <see cref="TokenStreamComponents"/>
+        /// built from a <see cref="StandardTokenizer"/> filtered with
+        /// <see cref="MorfologikFilter"/>.</returns>
+        protected override TokenStreamComponents CreateComponents(string 
fieldName, TextReader reader)
+        {
+            Tokenizer src = new StandardTokenizer(this.version, reader);
+
+            return new TokenStreamComponents(
+                src,
+                new MorfologikFilter(src, dictionary));
+        }
+
+    }
+}
diff --git a/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs
new file mode 100644
index 0000000..5562c8d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs
@@ -0,0 +1,183 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.Morfologik.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Morfologik.Stemming;
+using Morfologik.Stemming.Polish;
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Morfologik
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <see cref="TokenFilter"/> using Morfologik library to transform input 
tokens into lemma and
+    /// morphosyntactic (POS) tokens. Applies to Polish only.
+    /// <para/>
+    /// MorfologikFilter contains a <see 
cref="MorphosyntacticTagsAttribute"/>, which provides morphosyntactic
+    /// annotations for produced lemmas. See the Morfologik documentation for 
details.
+    /// </summary>
+    public class MorfologikFilter : TokenFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly IMorphosyntacticTagsAttribute tagsAtt;
+        private readonly IPositionIncrementAttribute posIncrAtt;
+        private readonly IKeywordAttribute keywordAttr;
+
+        private readonly CharsRef scratch = new CharsRef();
+
+        private State current;
+        private readonly TokenStream input;
+        private readonly IStemmer stemmer;
+
+        private IList<WordData> lemmaList;
+        private readonly List<StringBuilder> tagsList = new 
List<StringBuilder>();
+
+        private int lemmaListIndex;
+
+        /// <summary>
+        /// Creates a filter with the default (Polish) dictionary.
+        /// </summary>
+        /// <param name="input">Input token stream.</param>
+        public MorfologikFilter(TokenStream input)
+            : this(input, new PolishStemmer().Dictionary)
+        {
+        }
+
+        /// <summary>
+        /// Creates a filter with a given dictionary.
+        /// </summary>
+        /// <param name="input">Input token stream.</param>
+        /// <param name="dict"><see cref="Dictionary"/> to use for 
stemming.</param>
+        public MorfologikFilter(TokenStream input, Dictionary dict)
+            : base(input)
+        {
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.tagsAtt = AddAttribute<IMorphosyntacticTagsAttribute>();
+            this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+            this.keywordAttr = AddAttribute<IKeywordAttribute>();
+
+            this.input = input;
+            this.stemmer = new DictionaryLookup(dict);
+            this.lemmaList = new List<WordData>();
+        }
+
+        /// <summary>
+        /// A regex used to split lemma forms.
+        /// </summary>
+        private readonly static Regex lemmaSplitter = new Regex("\\+|\\|", 
RegexOptions.Compiled);
+
+        private void PopNextLemma()
+        {
+            // One tag (concatenated) per lemma.
+            WordData lemma = lemmaList[lemmaListIndex++];
+            termAtt.SetEmpty().Append(lemma.GetStem().ToString());
+            var tag = lemma.GetTag();
+            if (tag != null)
+            {
+                string[] tags = lemmaSplitter.Split(tag.ToString());
+                for (int i = 0; i < tags.Length; i++)
+                {
+                    if (tagsList.Count <= i)
+                    {
+                        tagsList.Add(new StringBuilder());
+                    }
+                    StringBuilder buffer = tagsList[i];
+                    buffer.Length = 0;
+                    buffer.Append(tags[i]);
+                }
+                tagsAtt.Tags = tagsList.SubList(0, tags.Length);
+            }
+            else
+            {
+                tagsAtt.Tags = Collections.EmptyList<StringBuilder>();
+            }
+        }
+
+        /// <summary>
+        /// Lookup a given surface form of a token and update
+        /// <see cref="lemmaList"/> and <see cref="lemmaListIndex"/> 
accordingly.
+        /// </summary>
+        private bool LookupSurfaceForm(string token)
+        {
+            lemmaList = this.stemmer.Lookup(token);
+            lemmaListIndex = 0;
+            return lemmaList.Count > 0;
+        }
+
+        /// <summary>Retrieves the next token (possibly from the list of 
lemmas).</summary>
+        public override sealed bool IncrementToken()
+        {
+            if (lemmaListIndex < lemmaList.Count)
+            {
+                RestoreState(current);
+                posIncrAtt.PositionIncrement = 0;
+                PopNextLemma();
+                return true;
+            }
+            else if (this.input.IncrementToken())
+            {
+                if (!keywordAttr.IsKeyword &&
+                    (LookupSurfaceForm(termAtt.ToString()) || 
LookupSurfaceForm(ToLowercase(termAtt.ToString()))))
+                {
+                    current = CaptureState();
+                    PopNextLemma();
+                }
+                else
+                {
+                    tagsAtt.Clear();
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>Convert to lowercase in-place.</summary>
+        private string ToLowercase(string chs)
+        {
+            int length = chs.Length;
+            scratch.Length = length;
+            scratch.Grow(length);
+
+            char[] buffer = scratch.Chars;
+            for (int i = 0; i < length;)
+            {
+                i += Character.ToChars(
+                    Character.ToLower(Character.CodePointAt(chs, i)), buffer, 
i);
+            }
+
+            return scratch.ToString();
+        }
+
+        /// <summary>Resets stems accumulator and hands over to 
superclass.</summary>
+        public override void Reset()
+        {
+            lemmaListIndex = 0;
+            lemmaList = new List<WordData>();
+            tagsList.Clear();
+            base.Reset();
+        }
+    }
+}
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilterFactory.cs 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilterFactory.cs
new file mode 100644
index 0000000..5feef4a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilterFactory.cs
@@ -0,0 +1,105 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.Util;
+using Morfologik.Stemming;
+using Morfologik.Stemming.Polish;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Morfologik
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Filter factory for <see cref="MorfologikFilter"/>.
+    /// <para/>
+    /// An explicit resource name of the dictionary (<c>".dict"</c>) can be 
+    /// provided via the <code>dictionary</code> attribute, as the example 
below demonstrates:
+    /// <code>
+    /// &lt;fieldType name="text_mylang" class="solr.TextField" 
positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.MorfologikFilterFactory" 
dictionary="mylang.dict" /&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// <para/>
+    /// If the dictionary attribute is not provided, the Polish dictionary is 
loaded
+    /// and used by default.
+    /// <para/>
+    /// See: <a href="http://morfologik.blogspot.com/";>Morfologik web site</a>
+    /// </summary>
+    /// <since>4.0.0</since>
+    public class MorfologikFilterFactory : TokenFilterFactory, 
IResourceLoaderAware
+    {
+        /// <summary>Dictionary resource attribute (should have <c>".dict"</c> 
suffix), loaded from <see cref="IResourceLoader"/>.</summary>
+        public const string DICTIONARY_ATTRIBUTE = "dictionary";
+
+        /// <summary><see cref="DICTIONARY_ATTRIBUTE"/> value passed to <see 
cref="Inform(IResourceLoader)"/>.</summary>
+        private readonly string resourceName;
+
+        /// <summary>Loaded <see cref="Dictionary"/>, initialized on <see 
cref="Inform(IResourceLoader)"/>.</summary>
+        private Dictionary dictionary;
+
+        /// <summary>Creates a new <see 
cref="MorfologikFilterFactory"/></summary>
+        public MorfologikFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            // Be specific about no-longer-supported dictionary attribute.
+            string DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
+            string dictionaryResource = Get(args, 
DICTIONARY_RESOURCE_ATTRIBUTE);
+            if (!string.IsNullOrEmpty(dictionaryResource))
+            {
+                throw new ArgumentException("The " + 
DICTIONARY_RESOURCE_ATTRIBUTE + " attribute is no "
+                    + "longer supported. Use the '" + DICTIONARY_ATTRIBUTE + 
"' attribute instead (see LUCENE-6833).");
+            }
+
+            resourceName = Get(args, DICTIONARY_ATTRIBUTE);
+
+            if (args.Count != 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            if (resourceName == null)
+            {
+                // Get the dictionary lazily, does not hold up memory.
+                this.dictionary = new PolishStemmer().Dictionary;
+            }
+            else
+            {
+                using (Stream dict = loader.OpenResource(resourceName))
+                using (Stream meta = 
loader.OpenResource(DictionaryMetadata.GetExpectedMetadataFileName(resourceName)))
+                {
+                    this.dictionary = Dictionary.Read(dict, meta);
+                }
+            }
+        }
+
+        public override TokenStream Create(TokenStream ts)
+        {
+            if (this.dictionary == null)
+                throw new ArgumentException("MorfologikFilterFactory was not 
fully initialized.");
+
+            return new MorfologikFilter(ts, dictionary);
+        }
+    }
+}
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/IMorphosyntacticTagsAttribute.cs
 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/IMorphosyntacticTagsAttribute.cs
new file mode 100644
index 0000000..e537403
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/IMorphosyntacticTagsAttribute.cs
@@ -0,0 +1,44 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Morfologik.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Morfologik provides morphosyntactic annotations for
+    /// surface forms. For the exact format and description of these,
+    /// see the project's documentation.
+    /// </summary>
+    public interface IMorphosyntacticTagsAttribute : IAttribute
+    {
+        /// <summary>
+        /// Gets or sets the POS tag of the term. A single word may have 
multiple POS tags,
+        /// depending on the interpretation (context disambiguation is 
typically needed
+        /// to determine which particular tag is appropriate).
+        /// <para/>
+        /// The default value (no-value) is null. Returns a list of POS tags 
corresponding to current lemma.
+        /// </summary>
+        IList<StringBuilder> Tags { get; set; }
+
+        /// <summary>Clear to default value.</summary>
+        void Clear();
+    }
+}
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/MorphosyntacticTagsAttribute.cs
 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/MorphosyntacticTagsAttribute.cs
new file mode 100644
index 0000000..2e15a3d
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.Morfologik/Morfologik/TokenAttributes/MorphosyntacticTagsAttribute.cs
@@ -0,0 +1,105 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Morfologik.TokenAttributes
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Morphosyntactic annotations for surface forms.
+    /// </summary>
+    /// <seealso cref="IMorphosyntacticTagsAttribute"/>
+    public class MorphosyntacticTagsAttribute : Attribute, 
IMorphosyntacticTagsAttribute
+#if FEATURE_CLONEABLE
+        , ICloneable
+#endif
+    {
+        /// <summary>Initializes this attribute with no tags</summary>
+        public MorphosyntacticTagsAttribute() { }
+
+        /// <summary>
+        /// A list of potential tag variants for the current token.
+        /// </summary>
+        private IList<StringBuilder> tags;
+
+        /// <summary>
+        /// Gets or sets the POS tag of the term. If you need a copy of this 
char sequence, copy
+        /// its contents (and clone <see cref="StringBuilder"/>s) because it 
changes with
+        /// each new term to avoid unnecessary memory allocations.
+        /// </summary>
+        public virtual IList<StringBuilder> Tags
+        {
+            get => tags;
+            set => tags = value;
+        }
+
+
+        public override void Clear()
+        {
+            tags = null;
+        }
+
+
+        public override bool Equals(object other)
+        {
+            if (other is IMorphosyntacticTagsAttribute)
+            {
+                return Equal(this.Tags, 
((IMorphosyntacticTagsAttribute)other).Tags);
+            }
+            return false;
+        }
+
+        private bool Equal(object l1, object l2)
+        {
+            return l1 == null ? (l2 == null) : (l1.Equals(l2));
+        }
+
+        public override int GetHashCode()
+        {
+            return this.tags == null ? 0 : tags.GetHashCode();
+        }
+
+        public override void CopyTo(IAttribute target)
+        {
+            List<StringBuilder> cloned = null;
+            if (tags != null)
+            {
+                cloned = new List<StringBuilder>(tags.Count);
+                foreach (StringBuilder b in tags)
+                {
+                    cloned.Add(new StringBuilder(b.ToString()));
+                }
+            }
+            ((IMorphosyntacticTagsAttribute)target).Tags = cloned;
+        }
+
+        public override object Clone()
+        {
+            MorphosyntacticTagsAttribute cloned = new 
MorphosyntacticTagsAttribute();
+            this.CopyTo(cloned);
+            return cloned;
+        }
+
+        public override void ReflectWith(IAttributeReflector reflector)
+        {
+            reflector.Reflect(typeof(MorphosyntacticTagsAttribute), "tags", 
tags);
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/README 
b/src/Lucene.Net.Analysis.Morfologik/Uk/README
new file mode 100644
index 0000000..f11d845
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/README
@@ -0,0 +1,11 @@
+ukrainian.dict is a binary dictionary file for morphological analysis in 
fsa_morph program
+(see 
http://www.eti.pg.gda.pl/katedry/kiw/pracownicy/Jan.Daciuk/personal/fsa.html).
+
+See tagset.txt for description of the tags.
+
+This dictionary is currently under development and is based on dict_uk project 
(https://github.com/brown-uk/dict_uk)
+
+Note: to better fit into full-text search model this dictionary has all word 
forms in lower case but keeps lemmas for proper nouns in upper case.
+Also letter ґ was normalized to г.
+
+Licensed under GPL/LGPL, CC BY-NC-SA 4.0, and Apache License 2.0.
diff --git 
a/src/Lucene.Net.Analysis.Morfologik/Uk/UkrainianMorfologikAnalyzer.cs 
b/src/Lucene.Net.Analysis.Morfologik/Uk/UkrainianMorfologikAnalyzer.cs
new file mode 100644
index 0000000..e38e974
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/UkrainianMorfologikAnalyzer.cs
@@ -0,0 +1,177 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Morfologik;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Morfologik.Stemming;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Uk
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A dictionary-based <see cref="Analyzer"/> for Ukrainian.
+    /// </summary>
+    /// <since>6.2.0</since>
+    public sealed class UkrainianMorfologikAnalyzer : StopwordAnalyzerBase
+    {
+        private readonly CharArraySet stemExclusionSet;
+
+        /// <summary>File containing default Ukrainian stopwords.</summary>
+        public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+        /// <summary>
+        /// Returns an unmodifiable instance of the default stop words set.
+        /// </summary>
+        /// <returns>Default stop words set.</returns>
+        public static CharArraySet DefaultStopSet => 
DefaultSetHolder.DEFAULT_STOP_SET;
+
+        /// <summary>
+        /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy 
fashion once the outer class
+        /// accesses the static final set the first time.
+        /// </summary>
+        private static class DefaultSetHolder
+        {
+            internal static readonly CharArraySet DEFAULT_STOP_SET = 
LoadDefaultSet(); // LUCENENET: Avoid static constructors (see 
https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
+
+            private static CharArraySet LoadDefaultSet()
+            {
+                try
+                {
+                    return 
WordlistLoader.GetSnowballWordSet(IOUtils.GetDecodingReader(typeof(UkrainianMorfologikAnalyzer),
+                        DEFAULT_STOPWORD_FILE, Encoding.UTF8),
+#pragma warning disable 612, 618
+                        LuceneVersion.LUCENE_CURRENT);
+#pragma warning restore 612, 618
+                }
+#pragma warning disable 168
+                catch (IOException ex)
+#pragma warning restore 168
+                {
+                    // default set should always be present as it is part of 
the
+                    // distribution (JAR)
+                    throw new Exception("Unable to load default stopword set");
+                }
+            }
+        }
+
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words: <see 
cref="DEFAULT_STOPWORD_FILE"/>.
+        /// </summary>
+        /// <param name="matchVersion"><see cref="LuceneVersion"/> to 
match.</param>
+        public UkrainianMorfologikAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words.
+        /// </summary>
+        /// <param name="matchVersion"><see cref="LuceneVersion"/> to 
match.</param>
+        /// <param name="stopwords">A stopword set.</param>
+        public UkrainianMorfologikAnalyzer(LuceneVersion matchVersion, 
CharArraySet stopwords)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. If a non-empty stem 
exclusion set is
+        /// provided this analyzer will add a <see 
cref="SetKeywordMarkerFilter"/> before
+        /// stemming.
+        /// </summary>
+        /// <param name="matchVersion"><see cref="LuceneVersion"/> to 
match.</param>
+        /// <param name="stopwords">A stopword set.</param>
+        /// <param name="stemExclusionSet">A set of terms not to be 
stemmed.</param>
+        public UkrainianMorfologikAnalyzer(LuceneVersion matchVersion, 
CharArraySet stopwords, CharArraySet stemExclusionSet)
+                    : base(matchVersion, stopwords)
+        {
+            this.stemExclusionSet = 
CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
+        }
+
+        protected override TextReader InitReader(string fieldName, TextReader 
reader)
+        {
+            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+            // different apostrophes
+            builder.Add("\u2019", "'");
+            builder.Add("\u2018", "'");
+            builder.Add("\u02BC", "'");
+            builder.Add("`", "'");
+            builder.Add("´", "'");
+            // ignored characters
+            builder.Add("\u0301", "");
+            builder.Add("\u00AD", "");
+            builder.Add("ґ", "г");
+            builder.Add("Ґ", "Г");
+
+            NormalizeCharMap normMap = builder.Build();
+            reader = new MappingCharFilter(normMap, reader);
+            return reader;
+        }
+
+        /// <summary>
+        /// Creates a <see cref="TokenStreamComponents"/>
+        /// which tokenizes all the text in the provided <see 
cref="TextReader"/>.
+        /// </summary>
+        /// <param name="fieldName"></param>
+        /// <param name="reader"></param>
+        /// <returns>A <see cref="TokenStreamComponents"/> built from a <see 
cref="StandardTokenizer"/>
+        /// filtered with <see cref="LowerCaseFilter"/>, <see 
cref="StopFilter"/>, <see cref="SetKeywordMarkerFilter"/>
+        /// if a stem exclusion set is provided and <see 
cref="MorfologikFilter"/> on the Ukrainian dictionary.</returns>
+        protected override TokenStreamComponents CreateComponents(string 
fieldName, TextReader reader)
+        {
+            Tokenizer source = new StandardTokenizer(m_matchVersion, reader);
+            TokenStream result = new LowerCaseFilter(m_matchVersion, source);
+            result = new StopFilter(m_matchVersion, result, m_stopwords);
+
+            if (stemExclusionSet.Count > 0)
+            {
+                result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+            }
+
+            result = new MorfologikFilter(result, GetDictionary());
+            return new TokenStreamComponents(source, result);
+        }
+
+        private static Dictionary GetDictionary()
+        {
+            try
+            {
+                Type type = typeof(UkrainianMorfologikAnalyzer);
+                // LUCENENET NOTE: In Lucene, this was downloaded from Maven 
as a dependency
+                // (see 
https://search.maven.org/search?q=a:morfologik-ukrainian-search). However, we 
are embedding the file in .NET.
+                // Since it doesn't appear to be updated frequently, this 
should be okay.
+                string dictFile = "ukrainian.dict";
+                using (var dictStream = 
type.Assembly.FindAndGetManifestResourceStream(type, dictFile))
+                using (var metadataStream = 
type.Assembly.FindAndGetManifestResourceStream(type, 
DictionaryMetadata.GetExpectedMetadataFileName(dictFile)))
+                    return Dictionary.Read(dictStream, metadataStream);
+            }
+            catch (IOException e)
+            {
+                throw new Exception(e.ToString(), e);
+            }
+        }
+    }
+}
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/mapping_uk.txt 
b/src/Lucene.Net.Analysis.Morfologik/Uk/mapping_uk.txt
new file mode 100644
index 0000000..1142604
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/mapping_uk.txt
@@ -0,0 +1,19 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This map normalizes some characters used in Ukrainian text
+"\u2019" => "'"
+"\u02BC" => "'"
+
+# Remove accent
+"\u0301" => ""
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/stopwords.txt 
b/src/Lucene.Net.Analysis.Morfologik/Uk/stopwords.txt
new file mode 100644
index 0000000..651776b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/stopwords.txt
@@ -0,0 +1,1269 @@
+а
+аби
+абиде
+абиколи
+абикуди
+абихто
+абикого
+абикому
+абиким
+абичий
+абичийого
+абичиєму
+абичийому
+абичиїм
+абичия
+абичиєї
+абичиїй
+абичию
+абичиєю
+абичиє
+абичиї
+абичиїх
+абичиїми
+абищо
+абичого
+абичому
+абичим
+абиякий
+абиякого
+абиякому
+абияким
+абиякім
+абияка
+абиякої
+абиякій
+абияку
+абиякою
+абияке
+абиякі
+абияких
+абиякими
+або
+абощо
+авжеж
+авось
+ага
+адже
+аж
+ажень
+але
+амінь
+ані
+аніде
+аніж
+анізащо
+анікогісінько
+аніколи
+аніскільки
+аніхто
+анікого
+анікому
+аніким
+анічогісінько
+аніщо
+анічого
+анічому
+анічим
+аніякий
+аніякого
+аніякому
+аніяким
+аніякім
+аніяка
+аніякої
+аніякій
+аніяку
+аніякою
+аніяке
+аніякі
+аніяких
+аніякими
+аніякісенький
+аніякісенького
+аніякісенькому
+аніякісеньким
+аніякісенькім
+аніякісенька
+аніякісенької
+аніякісенькій
+аніякісеньку
+аніякісенькою
+аніякісеньке
+аніякісенькі
+аніякісеньких
+аніякісенькими
+аніякісінький
+аніякісінького
+аніякісінькому
+аніякісіньким
+аніякісінькім
+аніякісінька
+аніякісінької
+аніякісінькій
+аніякісіньку
+аніякісінькою
+аніякісіньке
+аніякісінькі
+аніякісіньких
+аніякісінькими
+ану
+ато
+атож
+ач
+ачей
+аякже
+б
+ба
+багато
+багатьох
+багатьом
+багатьма
+без
+би
+біля
+бо
+бодай
+бути
+будь
+будьмо
+будьте
+є
+єси
+суть
+буду
+будеш
+буде
+будем
+будемо
+будете
+будуть
+був
+була
+було
+були
+буцім
+буцімто
+в
+ваш
+вашого
+вашому
+вашим
+вашім
+ваша
+вашої
+вашій
+вашу
+вашою
+ваше
+ваші
+ваших
+вашими
+ввесь
+всього
+всьому
+всім
+вся
+всієї
+всій
+всю
+всією
+все
+всі
+всіх
+всіма
+вві
+весь
+вздовж
+ви
+вас
+вам
+вами
+ві
+від
+відколи
+відповідно
+відтепер
+відтоді
+він
+його
+нього
+йому
+ним
+нім
+ньому
+власне
+властиво
+внаслідок
+вона
+її
+неї
+їй
+нею
+ній
+вони
+їх
+них
+їм
+ними
+воно
+вподовж
+впоперек
+впродовж
+всілякий
+всілякого
+всілякому
+всіляким
+всілякім
+всіляка
+всілякої
+всілякій
+всіляку
+всілякою
+всіляке
+всілякі
+всіляких
+всілякими
+вслід
+всупереч
+всюди
+всякий
+всякого
+всякому
+всяким
+всякім
+всяка
+всякої
+всякій
+всяку
+всякою
+всяке
+всякі
+всяких
+всякими
+всяк
+втім
+гаразд
+ге
+геть
+де
+дедалі
+деінде
+декілька
+декількох
+декільком
+декількома
+деколи
+декотрий
+декотрого
+декотрому
+декотрим
+декотрім
+декотра
+декотрої
+декотрій
+декотру
+декотрою
+декотре
+декотрі
+декотрих
+декотрими
+десь
+дехто
+декого
+декому
+деким
+декім
+дечий
+дечийого
+дечиєму
+дечийому
+дечиїм
+дечия
+дечиєї
+дечиїй
+дечию
+дечиєю
+дечиє
+дечиї
+дечиїх
+дечиїми
+дещо
+дечого
+дечому
+дечим
+дечім
+деякий
+деякого
+деякому
+деяким
+деякім
+деяка
+деякої
+деякій
+деяку
+деякою
+деяке
+деякі
+деяких
+деякими
+для
+до
+довкола
+доки
+допіру
+допоки
+досі
+дотепер
+доти
+еге
+ж
+же
+жодний
+жодного
+жодному
+жодним
+жоднім
+жодна
+жодної
+жодній
+жодну
+жодною
+жодне
+жодні
+жодних
+жодними
+жоден
+жоднісінький
+жоднісінького
+жоднісінькому
+жоднісіньким
+жоднісінькім
+жоднісінька
+жоднісінької
+жоднісінькій
+жоднісіньку
+жоднісінькою
+жоднісіньке
+жоднісінькі
+жоднісіньких
+жоднісінькими
+з
+за
+завгодно
+завдяки
+завжди
+завше
+задля
+залежно
+замість
+заради
+зараз
+зате
+зверху
+звідки
+звідкилясь
+звідкись
+звідкіль
+звідкіля
+звідкілясь
+звідси
+звідсіль
+звідсіля
+звідти
+звідтіль
+звідтіля
+звідусіль
+звідусюди
+звідціля
+здовж
+ззаду
+зі
+зо
+зсередини
+ич
+і
+ібн
+із
+ізсередини
+інакше
+інакший
+інакшого
+інакшому
+інакшим
+інакшім
+інакша
+інакшої
+інакшій
+інакшу
+інакшою
+інакші
+інакших
+інакшими
+інколи
+іноді
+інше
+іншого
+іншому
+іншим
+інший
+іншім
+інша
+іншої
+іншій
+іншу
+іншою
+інші
+інших
+іншими
+іще
+їхній
+їхнього
+їхньому
+їхнім
+їхня
+їхньої
+їхню
+їхньою
+їхнє
+їхні
+їхніх
+їхніми
+й
+кілька
+кількох
+кільком
+кількома
+кінець
+кожний
+кожного
+кожному
+кожним
+кожнім
+кожна
+кожної
+кожній
+кожну
+кожною
+кожне
+кожні
+кожних
+кожними
+кожен
+кожнісінький
+кожнісінького
+кожнісінькому
+кожнісіньким
+кожнісінькім
+кожнісінька
+кожнісінької
+кожнісінькій
+кожнісіньку
+кожнісінькою
+кожнісіньке
+кожнісінькі
+кожнісіньких
+кожнісінькими
+коли
+колись
+коло
+котрий
+котрого
+котрому
+котрим
+котрім
+котра
+котрої
+котрій
+котру
+котрою
+котре
+котрі
+котрих
+котрими
+котрийсь
+котрогось
+котромусь
+котримось
+котримсь
+котрімсь
+котрась
+котроїсь
+котрійсь
+котрусь
+котроюсь
+котресь
+котрісь
+котрихось
+котрихсь
+котримись
+край
+крізь
+крім
+круг
+кругом
+куди
+кудись
+кудою
+ледве
+ледь
+лиш
+лише
+лишень
+мерсі
+ми
+нас
+нам
+нами
+між
+мій
+мого
+моєму
+моїм
+моя
+моєї
+моїй
+мою
+моєю
+моє
+мої
+моїх
+моїми
+мов
+мовби
+мовбито
+могти
+можіть
+можу
+можеш
+може
+можем
+можемо
+можете
+можуть
+міг
+могла
+могло
+могли
+можна
+на
+навіть
+навіщо
+навіщось
+навколо
+навкруг
+навпаки
+навперейми
+навпроти
+над
+наді
+надо
+наперед
+напередодні
+наперекір
+напереріз
+наприкінці
+напроти
+насеред
+насупроти
+нате
+наче
+начеб
+начебто
+наш
+нашого
+нашому
+нашим
+нашім
+наша
+нашої
+нашій
+нашу
+нашою
+наше
+наші
+наших
+нашими
+не
+неабищо
+неабичого
+неабичому
+неабичим
+небагато
+небагатьох
+небагатьом
+небагатьма
+невважаючи
+невже
+незважаючи
+немов
+немовби
+немовбито
+неначе
+неначебто
+нехай
+нижче
+ні
+ніби
+нібито
+ніде
+ніж
+нізащо
+нізвідки
+нізвідкіля
+ніким
+нікогісінько
+нікого
+ніколи
+нікому
+нікотрий
+нікотрого
+нікотрому
+нікотрим
+нікотрім
+нікотра
+нікотрої
+нікотрій
+нікотру
+нікотрою
+нікотре
+нікотрі
+нікотрих
+нікотрими
+нікуди
+нінащо
+ніскільки
+ніхто
+нічий
+нічийого
+нічиєму
+нічийому
+нічиїм
+нічия
+нічиєї
+нічиїй
+нічию
+нічиєю
+нічиє
+нічиї
+нічиїх
+нічиїми
+нічийний
+нічийного
+нічийному
+нічийним
+нічийнім
+нічийна
+нічийної
+нічийній
+нічийну
+нічийною
+нічийне
+нічийні
+нічийних
+нічийними
+нічим
+нічого
+нічому
+ніщо
+ніяк
+ніякий
+ніякого
+ніякому
+ніяким
+ніякім
+ніяка
+ніякої
+ніякій
+ніяку
+ніякою
+ніяке
+ніякі
+ніяких
+ніякими
+ніякісінький
+ніякісінького
+ніякісінькому
+ніякісіньким
+ніякісінькім
+ніякісінька
+ніякісінької
+ніякісінькій
+ніякісіньку
+ніякісінькою
+ніякісіньке
+ніякісінькі
+ніякісіньких
+ніякісінькими
+но
+ну
+нумо
+нумте
+о
+об
+обабіч
+обік
+обіч
+од
+один
+одного
+одному
+одним
+однім
+одна
+однієї
+одної
+одній
+одну
+однією
+одною
+одне
+одно
+одні
+одних
+одними
+однак
+одначе
+окрай
+окрім
+округ
+округи
+он
+онде
+онно
+оно
+опісля
+опріч
+осе
+осісьо
+оскільки
+ось
+от
+отак
+отакий
+отакого
+отакому
+отаким
+отакім
+отака
+отакої
+отакій
+отаку
+отакою
+отаке
+отакі
+отаких
+отакими
+отакісінький
+отакісінького
+отакісінькому
+отакісіньким
+отакісінькім
+отакісінька
+отакісінької
+отакісінькій
+отакісіньку
+отакісінькою
+отакісіньке
+отакісінькі
+отакісіньких
+отакісінькими
+отам
+отже
+ото
+отож
+отой
+отого
+отому
+отим
+отім
+ота
+отієї
+отої
+отій
+оту
+отією
+отою
+оте
+оті
+отих
+отими
+отсе
+оттак
+отто
+отут
+оце
+оцей
+оцього
+оцьому
+оцим
+оцім
+оця
+оцієї
+оцій
+оцю
+оцією
+оці
+оцих
+оцими
+пак
+перед
+перетакий
+перетакого
+перетакому
+перетаким
+перетакім
+перетака
+перетакої
+перетакій
+перетаку
+перетакою
+перетаке
+перетакі
+перетаких
+перетакими
+під
+підо
+після
+по
+побік
+побіч
+поблизу
+поверх
+повз
+повздовж
+повсюди
+повсюдно
+подекуди
+подеякий
+подеякого
+подеякому
+подеяким
+подеякім
+подеяка
+подеякої
+подеякій
+подеяку
+подеякою
+подеяке
+подеякі
+подеяких
+подеякими
+подовж
+поза
+позад
+позаду
+позатой
+позатого
+позатому
+позатим
+позатім
+позата
+позатієї
+позатої
+позатій
+позату
+позатією
+позатою
+позате
+позаті
+позатих
+позатими
+позаяк
+поздовж
+поки
+покіль
+покрай
+поміж
+понад
+понадо
+понижче
+поперед
+попереду
+поперек
+попід
+попліч
+попри
+попросту
+поруч
+поряд
+посеред
+посередині
+потім
+поуз
+прецінь
+при
+притому
+причім
+причому
+про
+проміж
+просто
+проте
+проти
+протягом
+раз
+раніше
+сам
+самого
+самому
+самим
+самім
+сама
+самої
+самій
+саму
+самою
+саме
+само
+сами
+самі
+самих
+самими
+самий
+свій
+свого
+своєму
+своїм
+своя
+своєї
+своїй
+свою
+своєю
+своє
+свої
+своїх
+своїми
+се
+себе
+собі
+собою
+себто
+серед
+сиріч
+скільки
+скількох
+скільком
+скількома
+скількись
+скількохось
+скількохсь
+скількомось
+скількомсь
+скількомась
+скрізь
+спереду
+справді
+стільки
+стількох
+стільком
+стількома
+супроти
+супротив
+сюди
+сякий
+сякого
+сякому
+сяким
+сякім
+сяка
+сякої
+сякій
+сяку
+сякою
+сяке
+сякі
+сяких
+сякими
+та
+так
+такенний
+такенного
+такенному
+такенним
+такеннім
+такенна
+такенної
+такенній
+такенну
+такенною
+такенне
+такенні
+такенних
+такенними
+таки
+такий
+такого
+такому
+таким
+такім
+така
+такої
+такій
+таку
+такою
+таке
+такі
+таких
+такими
+такісінький
+такісінького
+такісінькому
+такісіньким
+такісінькім
+такісінька
+такісінької
+такісінькій
+такісіньку
+такісінькою
+такісіньке
+такісінькі
+такісіньких
+такісінькими
+також
+там
+тамки
+тамтой
+тамтого
+тамтому
+тамтим
+тамтім
+тамта
+тамтієї
+тамтої
+тамтій
+тамту
+тамтією
+тамтою
+тамте
+тамті
+тамтих
+тамтими
+твій
+твого
+твоєму
+твоїм
+твоя
+твоєї
+твоїй
+твою
+твоєю
+твоє
+твої
+твоїх
+твоїми
+те
+того
+тому
+тим
+тім
+теє
+теж
+тепер
+теперечки
+ти
+тебе
+тобі
+тобою
+тільки
+то
+тобто
+тоді
+тож
+той
+тієї
+тої
+тій
+ту
+тією
+тою
+ті
+тих
+тими
+тощо
+туди
+тудою
+тут
+тутеньки
+тутечки
+тутки
+у
+ув
+увесь
+усього
+усьому
+усім
+уся
+усієї
+усій
+усю
+усією
+усе
+усі
+усіх
+усіма
+уві
+угу
+уздовж
+унаслідок
+уподовж
+упоперек
+упродовж
+усілякий
+усілякого
+усілякому
+усіляким
+усілякім
+усіляка
+усілякої
+усілякій
+усіляку
+усілякою
+усіляке
+усілякі
+усіляких
+усілякими
+услід
+усупереч
+усюди
+усякий
+усякого
+усякому
+усяким
+усякім
+усяка
+усякої
+усякій
+усяку
+усякою
+усяке
+усякі
+усяких
+усякими
+усяк
+утім
+хай
+хіба
+хоч
+хоча
+хто
+кого
+кому
+ким
+кім
+хтось
+когось
+комусь
+кимось
+кимсь
+кімось
+кімсь
+це
+цього
+цьому
+цим
+цім
+цебто
+цей
+ця
+цієї
+цій
+цю
+цією
+ці
+цих
+цими
+чень
+через
+чи
+чий
+чийого
+чиєму
+чийому
+чиїм
+чия
+чиєї
+чиїй
+чию
+чиєю
+чиє
+чиї
+чиїх
+чиїми
+чийсь
+чийогось
+чиємусь
+чийомусь
+чиїмось
+чиїмсь
+чиясь
+чиєїсь
+чиїйсь
+чиюсь
+чиєюсь
+чиєсь
+чиїсь
+чиїхось
+чиїхсь
+чиїмись
+чому
+чомусь
+чортзна
+шляхом
+ще
+що
+чого
+віщо
+чим
+чім
+щоб
+щоби
+щодо
+щойно
+щоправда
+щось
+чогось
+віщось
+чимось
+чимсь
+чімось
+чімсь
+я
+мене
+мені
+мною
+як
+якби
+який
+якого
+якому
+яким
+якім
+яка
+якої
+якій
+яку
+якою
+яке
+які
+яких
+якими
+якийсь
+якогось
+якомусь
+якимось
+якимсь
+якімсь
+якась
+якоїсь
+якійсь
+якусь
+якоюсь
+якесь
+якісь
+якихось
+якихсь
+якимись
+якось
+якраз
+якщо
\ No newline at end of file
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/tagset.txt 
b/src/Lucene.Net.Analysis.Morfologik/Uk/tagset.txt
new file mode 100644
index 0000000..bc5371e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/tagset.txt
@@ -0,0 +1,170 @@
+Теги:
+
+[КЛ] - ключ леми (тег, який розрізняє різні леми з омонімів)
+
+noun    іменник
+    [КЛ] anim       істота
+    [КЛ] fname      ім'я
+    [КЛ] lname      прізвище
+    [КЛ] patr       по батькові
+    [КЛ] inanim     неістота
+    [КЛ] unanim     невизначена категорія істота/неістота (бактерія)
+         prop       власна назва
+
+verb    дієслово
+    [КЛ] imperf/perf недоконаний вид
+    [КЛ] perf доконаний вид
+    [КЛ] rev  зворотна форма (дієслова) (тег є неявним ключем, оскільки лема 
на -ся завжди відрізняється від прямого дієслова)
+
+    inf інфінітив
+    futr  майбутній час
+    past  минулий час
+    pres  теперішній час
+    impr    наказова форма
+    impers безособова форма
+
+    1       1-а особа
+    2       2-а особа
+    3       3-а особа
+
+    TODO: Ще немає:
+        tran    перехідне
+        intran  непрехідне
+
+
+adj     прикметник
+    compb    базова форма
+    compr    порівняльна форма
+    super    найвища форма
+    short    короткі форми прикметників
+
+    adjp    дієприкметник: (:&adjp - лише дієприкметник; :&_adjp - 
дієприкметник і прикметник)
+        actv   активний
+        pasv   пасивний
+        imperf недоконаний вид
+        perf   доконаний вид
+
+    (past/pres є в коментарях сирців для більшості дієприкметників, але наразі 
не використовується)
+
+adj/adjp:
+    v_zna:rinanim   знахідний для неістот (лише ч.р.)
+    v_zna:ranim     знахідний для істот (лише ч.р.)
+    uncontr         нестягнені
+
+adv     прислівник
+    compb    базова форма
+    compr    порівняльна форма
+    super    найвища форма
+
+advp    дієприслівник
+    [КЛ] perf
+    [КЛ] imperf
+
+prep    прийменник
+    Вимагає відмінка:
+        rv_rod
+        rv_dav
+        rv_zna
+        rv_oru
+        rv_mis
+
+conj    сполучник
+    subord підрядний
+    coord сурядний
+
+part    частка
+
+intj    вигук
+
+numr    числівник
+
+foreign невідмінювані запозичені слова невизначеної частини мови (Альгемайне, 
Юнайтед тощо)
+
+noninfl     невідмінювані частини (най-най, брутто, екстра...)
+
+
+Спільні для noun/adj/adjp:
+    Відмінки:
+        v_naz   називний
+        v_rod   родовий
+        v_dav   давальний
+        v_zna   знахідний
+        v_oru   орудний
+        v_mis   місцевий
+        v_kly   кличний
+        nv    не відмінюється
+        np    без множини (TODO: проставлено не всюди)
+        ns    без однини (TODO: проставлено не всюди)
+
+
+
+Спільні для noun/adj/adjp/verb
+    p  множина
+    s  однина
+
+    Рід:
+        m  чоловічий
+        f  жіночий
+        n  середній
+
+
+Додаткові теги:
+
+    abbr  абревіатура
+    bad   покруч
+    subst просторічна форма
+    rare  рідковживане/діалектичне/застаріле
+    coll  розмовне слово/розмовна форма
+    slang сленг
+    alt   альтернативне написання (не за чинним правописом)
+
+    :xp[1-9] омоніми, що відрізняються парадигмою відмінювання (напр. бар - 
р.в. бару, бар - р.в. бара)
+    # в коментарях також :xv[1-9] омоніми, що відрізняються семантично (напр. 
глупий (дурний, має вищий ступінь глупіший) і глупий - глупа ніч, без 
порівняльних форм)
+
+
+    v-u   паралельні форми на в-/у- (для правил милозвучності, вимкнено за 
уставою)
+
+
+Додаткові теги класів слів (після &):
+     &adjp — слова, що є дієприкметниками
+     &_adjp — слова, що є і прикметниками і дієприкметниками
+[КЛ] &pron - наразі всі займенники мають теги відповідних частин мови 
(noun/adj/adv), але всі мають додатковий тег &pron
+        (тег &pron разом з наступним класифікатором стає ключем леми)
+     &numr - слова, що є порядковими числівниками
+     &_numr - слова, що є і прикметниками і порядковими числівниками або і 
іменниками і кількісними числівниками
+     &insert - може бути вставним словом
+     &predic - може бути предикативом
+
+
+Теги займенників:
+    pers  особовий
+    refl  зворотний
+    pos   присвійний
+    dem   вказівний
+    def   означальний
+    int   питальний
+    rel   відносний
+    neg   заперечний
+    ind   неозначений
+    gen   узагальнювальний
+    emph  підсилювальний
+
+
+
+Динамічні теги (відсутні в словнику, їх проставляє модуль тегування LT):
+    number - число
+    date - дата
+    time - час
+
+
+Теги, яких немає, але які теоретично нескладно додати:
+    noun:
+        common gender
+    verb:
+        dual form (imperf+perf)
+    adj:
+        qualitative (має порівняльні форми) / relative (не має порівняльних)
+    adjp:
+        past/pres
+    advp:
+        past/pres
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.dict 
b/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.dict
new file mode 100644
index 0000000..49b1655
Binary files /dev/null and 
b/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.dict differ
diff --git a/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.info 
b/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.info
new file mode 100644
index 0000000..b5331be
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Morfologik/Uk/ukrainian.info
@@ -0,0 +1,10 @@
+#
+# Dictionary properties.
+#
+
+fsa.dict.separator=+
+fsa.dict.encoding=utf-8
+
+fsa.dict.encoder=SUFFIX
+
+fsa.dict.speller.ignore-diacritics=false
diff --git a/src/Lucene.Net.TestFramework.NUnit/Properties/AssemblyInfo.cs 
b/src/Lucene.Net.TestFramework.NUnit/Properties/AssemblyInfo.cs
index fe8b490..942dec1 100644
--- a/src/Lucene.Net.TestFramework.NUnit/Properties/AssemblyInfo.cs
+++ b/src/Lucene.Net.TestFramework.NUnit/Properties/AssemblyInfo.cs
@@ -54,6 +54,8 @@ using System.Runtime.InteropServices;
 [assembly: InternalsVisibleTo("Lucene.Net.Tests._U-Z, PublicKey=" + 
AssemblyKeys.PublicKey)]
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Common, PublicKey=" + 
AssemblyKeys.PublicKey)]
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Kuromoji, PublicKey=" 
+ AssemblyKeys.PublicKey)]
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Morfologik, 
PublicKey=" + AssemblyKeys.PublicKey)]
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.OpenNLP, PublicKey=" 
+ AssemblyKeys.PublicKey)]
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Phonetic, PublicKey=" 
+ AssemblyKeys.PublicKey)]
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.SmartCn, PublicKey=" 
+ AssemblyKeys.PublicKey)]
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Stempel, PublicKey=" 
+ AssemblyKeys.PublicKey)]
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Lucene.Net.Tests.Analysis.Morfologik.csproj
 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Lucene.Net.Tests.Analysis.Morfologik.csproj
new file mode 100644
index 0000000..0005bb8
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Lucene.Net.Tests.Analysis.Morfologik.csproj
@@ -0,0 +1,47 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <Import Project="$(SolutionDir)TestTargetFramework.props" />
+
+  <PropertyGroup>
+    <TargetFrameworks>netcoreapp2.1;net451</TargetFrameworks>
+
+    <AssemblyTitle>Lucene.Net.Tests.Analysis.Morfologik</AssemblyTitle>
+    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
+    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
+    <RuntimeIdentifiers>win7-x86;win7-x64</RuntimeIdentifiers>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Remove="Morfologik\custom-dictionary.dict" />
+    <None Remove="Morfologik\custom-dictionary.info" />
+    <None Remove="Morfologik\custom-dictionary.input" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <EmbeddedResource Include="Morfologik\custom-dictionary.dict" />
+    <EmbeddedResource Include="Morfologik\custom-dictionary.info" />
+    <EmbeddedResource Include="Morfologik\custom-dictionary.input" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference 
Include="..\Lucene.Net.Analysis.Morfologik\Lucene.Net.Analysis.Morfologik.csproj"
 />
+    <ProjectReference 
Include="..\Lucene.Net.TestFramework.NUnit\Lucene.Net.TestFramework.NUnit.csproj"
 />
+  </ItemGroup>
+
+  <Import Project="$(SolutionDir)build/TestReferences.Common.targets" />
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'netcoreapp2.1' ">
+    <DefineConstants>$(DefineConstants);NETSTANDARD</DefineConstants>
+    <DebugType>portable</DebugType>
+  </PropertyGroup>
+
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'net451' ">
+    <DebugType>full</DebugType>
+  </PropertyGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net451' ">
+    <Reference Include="System" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+
+</Project>
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikAnalyzer.cs 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikAnalyzer.cs
new file mode 100644
index 0000000..96faa72
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikAnalyzer.cs
@@ -0,0 +1,235 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Morfologik.TokenAttributes;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Morfologik
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// TODO: The tests below rely on the order of returned lemmas, which is 
probably not good. 
+    /// </summary>
+    public class TestMorfologikAnalyzer : BaseTokenStreamTestCase
+    {
+        private Analyzer getTestAnalyzer()
+        {
+            return new MorfologikAnalyzer(TEST_VERSION_CURRENT);
+        }
+
+        /** Test stemming of single tokens with Morfologik library. */
+        [Test]
+        public void TestSingleTokens()
+        {
+            Analyzer a = getTestAnalyzer();
+            AssertAnalyzesTo(a, "a", new String[] { "a" });
+            AssertAnalyzesTo(a, "liście", new String[] { "liście", "liść", 
"list", "lista" });
+            AssertAnalyzesTo(a, "danych", new String[] { "dany", "dana", 
"dane", "dać" });
+            AssertAnalyzesTo(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" });
+            a.Dispose();
+        }
+
+        /** Test stemming of multiple tokens and proper term metrics. */
+        [Test]
+        public void TestMultipleTokens()
+        {
+            Analyzer a = getTestAnalyzer();
+            AssertAnalyzesTo(
+                a,
+                "liście danych",
+                new String[] { "liście", "liść", "list", "lista", "dany", 
"dana", "dane", "dać" },
+                new int[] { 0, 0, 0, 0, 7, 7, 7, 7 },
+                new int[] { 6, 6, 6, 6, 13, 13, 13, 13 },
+                new int[] { 1, 0, 0, 0, 1, 0, 0, 0 });
+
+            AssertAnalyzesTo(
+                a,
+                "T. Gl\u00FCcksberg",
+                new String[] { "tom", "tona", "Gl\u00FCcksberg" },
+                new int[] { 0, 0, 3 },
+                new int[] { 1, 1, 13 },
+                new int[] { 1, 0, 1 });
+            a.Dispose();
+        }
+
+        private void dumpTokens(String input)
+        {
+            using (Analyzer a = getTestAnalyzer())
+            using (TokenStream ts = a.GetTokenStream("dummy", input))
+            {
+                ts.Reset();
+
+                IMorphosyntacticTagsAttribute attribute = 
ts.GetAttribute<IMorphosyntacticTagsAttribute>();
+                ICharTermAttribute charTerm = 
ts.GetAttribute<ICharTermAttribute>();
+                while (ts.IncrementToken())
+                {
+                    Console.WriteLine(charTerm.ToString() + " => " + 
Collections.ToString(attribute.Tags));
+                }
+                ts.End();
+            }
+        }
+
+        /** Test reuse of MorfologikFilter with leftover stems. */
+        [Test]
+        public void TestLeftoverStems()
+        {
+            Analyzer a = getTestAnalyzer();
+            using (TokenStream ts_1 = a.GetTokenStream("dummy", "liście"))
+            {
+                ICharTermAttribute termAtt_1 = 
ts_1.GetAttribute<ICharTermAttribute>();
+                ts_1.Reset();
+                ts_1.IncrementToken();
+                assertEquals("first stream", "liście", termAtt_1.ToString());
+                ts_1.End();
+            }
+
+            using (TokenStream ts_2 = a.GetTokenStream("dummy", "danych"))
+            {
+                ICharTermAttribute termAtt_2 = 
ts_2.GetAttribute<ICharTermAttribute>();
+                ts_2.Reset();
+                ts_2.IncrementToken();
+                assertEquals("second stream", "dany", termAtt_2.toString());
+                ts_2.End();
+            }
+            a.Dispose();
+        }
+
+        /** Test stemming of mixed-case tokens. */
+        [Test]
+        public void TestCase()
+        {
+            Analyzer a = getTestAnalyzer();
+
+            AssertAnalyzesTo(a, "AGD", new String[] { "AGD", "artykuły 
gospodarstwa domowego" });
+            AssertAnalyzesTo(a, "agd", new String[] { "artykuły gospodarstwa 
domowego" });
+
+            AssertAnalyzesTo(a, "Poznania", new String[] { "Poznań" });
+            AssertAnalyzesTo(a, "poznania", new String[] { "poznanie", 
"poznać" });
+
+            AssertAnalyzesTo(a, "Aarona", new String[] { "Aaron" });
+            AssertAnalyzesTo(a, "aarona", new String[] { "aarona" });
+
+            AssertAnalyzesTo(a, "Liście", new String[] { "liście", "liść", 
"list", "lista" });
+            a.Dispose();
+        }
+
+        private void assertPOSToken(TokenStream ts, String term, params 
String[] tags)
+        {
+            ts.IncrementToken();
+            assertEquals(term, 
ts.GetAttribute<ICharTermAttribute>().ToString());
+
+            TreeSet<String> actual = new TreeSet<String>();
+            TreeSet<String> expected = new TreeSet<String>();
+            foreach (StringBuilder b in 
ts.GetAttribute<IMorphosyntacticTagsAttribute>().Tags)
+            {
+                actual.add(b.toString());
+            }
+            foreach (String s in tags)
+            {
+                expected.add(s);
+            }
+
+            if (!expected.equals(actual))
+            {
+                Console.WriteLine("Expected:\n" + expected);
+                Console.WriteLine("Actual:\n" + actual);
+                assertEquals(expected, actual);
+            }
+        }
+
+        /** Test morphosyntactic annotations. */
+        [Test]
+        public void TestPOSAttribute()
+        {
+            using (Analyzer a = getTestAnalyzer())
+            using (TokenStream ts = a.GetTokenStream("dummy", "liście"))
+            {
+                ts.Reset();
+                assertPOSToken(ts, "liście",
+                  "subst:sg:acc:n2",
+                  "subst:sg:nom:n2",
+                  "subst:sg:voc:n2");
+
+                assertPOSToken(ts, "liść",
+                  "subst:pl:acc:m3",
+                  "subst:pl:nom:m3",
+                  "subst:pl:voc:m3");
+
+                assertPOSToken(ts, "list",
+                  "subst:sg:loc:m3",
+                  "subst:sg:voc:m3");
+
+                assertPOSToken(ts, "lista",
+                  "subst:sg:dat:f",
+                  "subst:sg:loc:f");
+                ts.End();
+            }
+        }
+
+        private class MockMorfologikAnalyzer : MorfologikAnalyzer
+        {
+            public MockMorfologikAnalyzer()
+                : base(TEST_VERSION_CURRENT)
+            { }
+
+            protected override TokenStreamComponents CreateComponents(string 
fieldName, TextReader reader)
+            {
+                CharArraySet keywords = new CharArraySet(TEST_VERSION_CURRENT, 
1, false);
+                keywords.add("liście");
+
+                Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, 
reader);
+                TokenStream result = new SetKeywordMarkerFilter(src, keywords);
+                result = new MorfologikFilter(result);
+
+                return new TokenStreamComponents(src, result);
+            }
+        }
+
+        /** */
+        [Test]
+        public void TestKeywordAttrTokens()
+        {
+            Analyzer a = new MockMorfologikAnalyzer();
+
+            AssertAnalyzesTo(
+              a,
+                  "liście danych",
+                  new String[] { "liście", "dany", "dana", "dane", "dać" },
+                  new int[] { 0, 7, 7, 7, 7 },
+                  new int[] { 6, 13, 13, 13, 13 },
+                  new int[] { 1, 1, 0, 0, 0 });
+            a.Dispose();
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandom()
+        {
+            Analyzer a = getTestAnalyzer();
+            CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);
+            a.Dispose();
+        }
+    }
+}
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikFilterFactory.cs
 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikFilterFactory.cs
new file mode 100644
index 0000000..7a8449e
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/TestMorfologikFilterFactory.cs
@@ -0,0 +1,107 @@
+// Lucene version compatibility level 8.2.0
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Morfologik
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Test for <see cref="MorfologikFilterFactory"/>
+    /// </summary>
+    public class TestMorfologikFilterFactory : BaseTokenStreamTestCase
+    {
+        private class ForbidResourcesLoader : IResourceLoader
+        {
+            public Type FindType(string cname)
+            {
+                throw new NotSupportedException();
+            }
+
+            public T NewInstance<T>(string cname)
+            {
+                throw new NotSupportedException();
+            }
+
+            public Stream OpenResource(string resource)
+            {
+                throw new NotSupportedException();
+            }
+        }
+
+        [Test]
+        public void TestDefaultDictionary()
+        {
+            StringReader reader = new StringReader("rowery bilety");
+            MorfologikFilterFactory factory = new 
MorfologikFilterFactory(Collections.EmptyMap<String, String>());
+            factory.Inform(new ForbidResourcesLoader());
+            TokenStream stream = new MockTokenizer(reader); 
//whitespaceMockTokenizer(reader);
+            stream = factory.Create(stream);
+            AssertTokenStreamContents(stream, new String[] { "rower", "bilet" 
});
+        }
+
+        [Test]
+        public void TestExplicitDictionary()
+        {
+            IResourceLoader loader = new 
ClasspathResourceLoader(typeof(TestMorfologikFilterFactory));
+
+            StringReader reader = new StringReader("inflected1 inflected2");
+            IDictionary<String, String> @params = new HashMap<string, 
string>();
+            @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = 
"custom-dictionary.dict";
+            MorfologikFilterFactory factory = new 
MorfologikFilterFactory(@params);
+            factory.Inform(loader);
+            TokenStream stream = new MockTokenizer(reader); // 
whitespaceMockTokenizer(reader);
+            stream = factory.Create(stream);
+            AssertTokenStreamContents(stream, new String[] { "lemma1", 
"lemma2" });
+        }
+
+        [Test]
+        public void TestMissingDictionary()
+        {
+            IResourceLoader loader = new 
ClasspathResourceLoader(typeof(TestMorfologikFilterFactory));
+
+            IOException expected = 
NUnit.Framework.Assert.Throws<IOException>(() =>
+            {
+                IDictionary<String, String> @params = new HashMap<String, 
String>();
+                @params[MorfologikFilterFactory.DICTIONARY_ATTRIBUTE] = 
"missing-dictionary-resource.dict";
+                MorfologikFilterFactory factory = new 
MorfologikFilterFactory(@params);
+                factory.Inform(loader);
+            });
+
+            assertTrue(expected.Message.Contains("Resource not found"));
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            ArgumentException expected = 
NUnit.Framework.Assert.Throws<ArgumentException>(() =>
+            {
+                HashMap<String, String> @params = new HashMap<String, 
String>();
+                @params["bogusArg"] = "bogusValue";
+                new MorfologikFilterFactory(@params);
+            });
+
+            assertTrue(expected.Message.Contains("Unknown parameters"));
+        }
+    }
+}
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.dict 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.dict
new file mode 100644
index 0000000..e157303
Binary files /dev/null and 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.dict 
differ
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.info 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.info
new file mode 100644
index 0000000..53796c0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.info
@@ -0,0 +1,24 @@
+#
+# An example stemming dictionary file for Morfologik filter.
+#
+# Compile with Morfologik-stemming, see
+# https://github.com/morfologik/morfologik-stemming/wiki/Examples
+#
+
+# Author of the dictionary.
+fsa.dict.author=Acme Inc.
+
+# Date the dictionary data was assembled (not compilation time!).
+fsa.dict.created=2015/10/08 09:16:00
+
+# The license for the dictionary data.
+fsa.dict.license=ASL 2.0
+
+# Character encoding inside the automaton (and input file).
+fsa.dict.encoding=UTF-8
+
+# field separator (lemma;inflected;tag)
+fsa.dict.separator=;
+
+# type of base/lemma compression encoding before automaton compression.
+fsa.dict.encoder=INFIX
\ No newline at end of file
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.input 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.input
new file mode 100644
index 0000000..b6e07f9
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Morfologik/custom-dictionary.input
@@ -0,0 +1,2 @@
+lemma1;inflected1;tag1
+lemma2;inflected2;tag2
\ No newline at end of file
diff --git 
a/src/Lucene.Net.Tests.Analysis.Morfologik/Uk/TestUkrainianAnalyzer.cs 
b/src/Lucene.Net.Tests.Analysis.Morfologik/Uk/TestUkrainianAnalyzer.cs
new file mode 100644
index 0000000..5e8e414
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Morfologik/Uk/TestUkrainianAnalyzer.cs
@@ -0,0 +1,92 @@
+// Lucene version compatibility level 8.2.0
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Uk
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Test case for <see cref="UkrainianAnalyzer"/>.
+    /// </summary>
+    public class TestUkrainianAnalyzer : BaseTokenStreamTestCase
+    {
+        /** Check that UkrainianAnalyzer doesn't discard any numbers */
+        [Test]
+        public void TestDigitsInUkrainianCharset()
+        {
+            UkrainianMorfologikAnalyzer ra = new 
UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
+            ra.Dispose();
+        }
+
+        [Test]
+        public void TestReusableTokenStream()
+        {
+            Analyzer a = new UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(a, "Ця п'єса, у свою чергу, рухається по 
емоційно-напруженому колу за ритм-енд-блюзом.",
+                                 new String[] { "п'єса", "черга", "рухатися", 
"емоційно", "напружений", "кола", "коло", "кіл", "ритм", "енд", "блюз" });
+            a.Dispose();
+        }
+
+        [Test]
+        public void TestSpecialCharsTokenStream()
+        {
+            Analyzer a = new UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(a, "м'яса м'я\u0301са м\u02BCяса м\u2019яса 
м\u2018яса м`яса",
+                     new String[] { "м'ясо", "м'ясо", "м'ясо", "м'ясо", 
"м'ясо", "м'ясо" });
+            a.Dispose();
+        }
+
+        [Test]
+        public void TestCapsTokenStream()
+        {
+            Analyzer a = new UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(a, "Цих Чайковського і Ґете.",
+                     new String[] { "Чайковське", "Чайковський", "Гете" });
+            a.Dispose();
+        }
+
+        [Test]
+        public void TestCharNormalization()
+        {
+            Analyzer a = new UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
+                     new String[] { "Гюмрі", "Гюмрі" });
+            a.Dispose();
+        }
+
+        [Test]
+        public void TestSampleSentence()
+        {
+            Analyzer a = new UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            AssertAnalyzesTo(a, "Це — проект генерування словника з тегами 
частин мови для української мови.",
+                     new String[] { "проект", "генерування", "словник", "тег", 
"частина", "мова", "українська", "український", "Українська", "мова" });
+            a.Dispose();
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandomStrings()
+        {
+            Analyzer analyzer = new 
UkrainianMorfologikAnalyzer(TEST_VERSION_CURRENT);
+            CheckRandomData(Random, analyzer, 1000 * RANDOM_MULTIPLIER);
+            analyzer.Dispose();
+        }
+    }
+}

[lucenenet] 18/20: Ported Lucene.Net.Analysis.Morfologik + tests

Reply via email to