[06/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

nightowl888 Tue, 27 Jun 2017 13:34:21 -0700

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
new file mode 100644
index 0000000..07e7f66
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
@@ -0,0 +1,111 @@
+ï»¿using Lucene.Net.Analysis.Core;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class DoubleMetaphoneFilterTest : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestSize4FalseInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
+            AssertTokenStreamContents(filter, new String[] { "ANTR" });
+        }
+
+        [Test]
+        public void TestSize4TrueInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true);
+            AssertTokenStreamContents(filter, new String[] { "international", 
"ANTR" });
+        }
+        [Test]
+        public void TestAlternateInjectFalse()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("Kuczewski"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
+            AssertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" });
+        }
+        [Test]
+        public void TestSize8FalseInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "ANTRNXNL" });
+        }
+        [Test]
+        public void TestNonConvertableStringsWithInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("12345 #$%@#^%&"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true);
+            AssertTokenStreamContents(filter, new String[] { "12345", 
"#$%@#^%&" });
+        }
+
+        [Test]
+        public void TestNonConvertableStringsWithoutInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
new StringReader("12345 #$%@#^%&"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "12345", 
"#$%@#^%&" });
+
+            // should have something after the stream
+            stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new 
StringReader("12345 #$%@#^%& hello"));
+            filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "12345", 
"#$%@#^%&", "HL" });
+        }
+
+        [Test]
+        public void TestRandom()
+        {
+            int codeLen = TestUtil.NextInt(Random(), 1, 8);
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, 
reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new 
DoubleMetaphoneFilter(tokenizer, codeLen, false));
+            });
+
+            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+            Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, 
reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new 
DoubleMetaphoneFilter(tokenizer, codeLen, true));
+            });
+
+            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, 
reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new 
DoubleMetaphoneFilter(tokenizer, 8, Random().nextBoolean()));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+    }
+}


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
new file mode 100644
index 0000000..bd3681b
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
@@ -0,0 +1,255 @@
+ï»¿using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests BeiderMorseEncoder.
+    /// </summary>
+    public class BeiderMorseEncoderTest : 
StringEncoderAbstractTest<BeiderMorseEncoder>
+    {
+        private static readonly char[] TEST_CHARS = new char[] { 'a', 'b', 
'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' };
+
+        private void AssertNotEmpty(BeiderMorseEncoder bmpm, string value)
+        {
+            Assert.False(bmpm.Encode(value).Equals(""), value);
+        }
+
+        private BeiderMorseEncoder CreateGenericApproxEncoder()
+        {
+            BeiderMorseEncoder encoder = new BeiderMorseEncoder();
+            encoder.NameType=(NameType.GENERIC);
+            encoder.RuleType=(RuleType.APPROX);
+            return encoder;
+        }
+
+        protected override BeiderMorseEncoder CreateStringEncoder()
+        {
+            return new BeiderMorseEncoder();
+        }
+
+        /**
+         * Tests we do not blow up.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestAllChars()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c = char.MinValue; c < char.MaxValue; c++)
+            {
+                bmpm.Encode(c.ToString());
+            }
+        }
+
+        [Test]
+        public void TestAsciiEncodeNotEmpty1Letter()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c = 'a'; c <= 'z'; c++)
+            {
+                string value = c.ToString();
+                string valueU = value.ToUpperInvariant();
+                AssertNotEmpty(bmpm, value);
+                AssertNotEmpty(bmpm, valueU);
+            }
+        }
+
+        [Test]
+        public void TestAsciiEncodeNotEmpty2Letters()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c1 = 'a'; c1 <= 'z'; c1++)
+            {
+                for (char c2 = 'a'; c2 <= 'z'; c2++)
+                {
+                    String value = new String(new char[] { c1, c2 });
+                    String valueU = value.ToUpperInvariant();
+                    AssertNotEmpty(bmpm, value);
+                    AssertNotEmpty(bmpm, valueU);
+                }
+            }
+        }
+
+        [Test]
+        public void TestEncodeAtzNotEmpty()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            //String[] names = { "Ã¡cz", "Ã¡tz", "IgnÃ¡cz", "IgnÃ¡tz", 
"IgnÃ¡c" };
+            String[]
+           names = { "\u00e1cz", "\u00e1tz", "Ign\u00e1cz", "Ign\u00e1tz", 
"Ign\u00e1c" };
+            foreach (String name in names)
+            {
+                AssertNotEmpty(bmpm, name);
+            }
+        }
+
+        /**
+         * Tests 
https://issues.apache.org/jira/browse/CODEC-125?focusedCommentId=13071566&page=com.atlassian.jira.plugin.system.issuetabpanels:
+         * comment-tabpanel#comment-13071566
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeGna()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            bmpm.Encode("gna");
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestInvalidLangIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() => 
Rule.GetInstance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage"));
+        }
+
+        [Test]//@Test(expected = IllegalStateException.class)
+        public void TestInvalidLangIllegalStateException()
+        {
+            Assert.Throws<InvalidOperationException>(() => 
Lang.LoadFromResource("thisIsAMadeUpResourceName", 
Languages.GetInstance(NameType.GENERIC)));
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestInvalidLanguageIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() => 
Languages.GetInstance("thereIsNoSuchLanguage"));
+        }
+
+        [Test]//@Test(timeout = 10000L)
+        public void TestLongestEnglishSurname()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            bmpm.Encode("MacGhilleseatheanaich");
+        }
+
+        [Test]//@Test(expected = IndexOutOfBoundsException.class)
+        public void TestNegativeIndexForRuleMatchIndexOutOfBoundsException()
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() =>
+            {
+                Rule r = new Rule("a", "", "", new Phoneme("", 
Languages.ANY_LANGUAGE));
+                r.PatternAndContextMatches("bob", -1);
+            });
+        }
+
+        [Test]
+        public void TestOOM()
+        {
+            String phrase = "200697900'-->&#1913348150;</  bceaeef 
>aadaabcf\"aedfbff<!--\'-->?>cae"
+                       + "cfaaa><?&#<!--</script>&lang&fc;aadeaf?>>&bdquo<    
cc =\"abff\"    /></   afe  >"
+                       + "<script><!-- f(';<    cf aefbeef = \"bfabadcf\" 
ebbfeedd = fccabeb >";
+
+            BeiderMorseEncoder encoder = new BeiderMorseEncoder();
+            encoder.NameType=(NameType.GENERIC);
+            encoder.RuleType=(RuleType.EXACT);
+            encoder.SetMaxPhonemes(10);
+
+            String phonemes = encoder.Encode(phrase);
+            Assert.True(phonemes.Length > 0);
+
+            String[] phonemeArr = new Regex("\\|").Split(phonemes);
+            Assert.True(phonemeArr.Length <= 10);
+        }
+
+        [Test]
+        public void TestSetConcat()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.IsConcat=(false);
+            Assert.False(bmpm.IsConcat, "Should be able to set concat to 
false");
+        }
+
+        [Test]
+        public void TestSetNameTypeAsh()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.NameType=(NameType.ASHKENAZI);
+            Assert.AreEqual(NameType.ASHKENAZI, bmpm.NameType, "Name type 
should have been set to ash");
+        }
+
+        [Test]
+        public void TestSetRuleTypeExact()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.RuleType=(RuleType.EXACT);
+            Assert.AreEqual(RuleType.EXACT, bmpm.RuleType, "Rule type should 
have been set to exact");
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestSetRuleTypeToRulesIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() =>
+            {
+                BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+                bmpm.RuleType=(RuleType.RULES);
+            });
+        }
+
+        /**
+         * (Un)luckily, the worse performing test because of the data in 
{@link #TEST_CHARS}
+         *
+         * @throws EncoderException
+         */
+        [Test]/* timeout = 20000L */
+        public void TestSpeedCheck()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            StringBuilder stringBuffer = new StringBuilder();
+            stringBuffer.append(TEST_CHARS[0]);
+            for (int i = 0, j = 1; i < 40; i++, j++)
+            {
+                if (j == TEST_CHARS.Length)
+                {
+                    j = 0;
+                }
+                bmpm.Encode(stringBuffer.toString());
+                stringBuffer.append(TEST_CHARS[j]);
+            }
+        }
+
+        [Test]
+        public void TestSpeedCheck2()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            String phrase = "ItstheendoftheworldasweknowitandIfeelfine";
+
+            for (int i = 1; i <= phrase.Length; i++)
+            {
+                bmpm.Encode(phrase.Substring(0, i));
+            }
+        }
+
+        [Test]
+        public void TestSpeedCheck3()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            String phrase = 
"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+
+            for (int i = 1; i <= phrase.Length; i++)
+            {
+                bmpm.Encode(phrase.Substring(0, i));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
new file mode 100644
index 0000000..45e9513
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
@@ -0,0 +1,138 @@
+ï»¿using Lucene.Net.Attributes;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class CacheSubSequencePerformanceTest
+    {
+        [Test, LongRunningTest]
+        public void Test()
+        {
+            //int times = 10000000;
+            int times = 100000; // LUCENENET: 10 million times would take 
several minutes to run - decreasing to 100,000
+            Console.WriteLine("Test with String : ");
+            Test("Angelo", times);
+            Console.WriteLine("Test with StringBuilder : ");
+            Test(new StringBuilder("Angelo"), times);
+            Console.WriteLine("Test with cached String : ");
+            Test(CacheSubSequence("Angelo").ToString(), times);
+            Console.WriteLine("Test with cached StringBuilder : ");
+            Test(CacheSubSequence(new StringBuilder("Angelo")).ToString(), 
times);
+        }
+
+        private void Test(string input, int times)
+        {
+            long beginTime = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < times; i++)
+            {
+                Test(input);
+            }
+            Console.WriteLine(DateTime.UtcNow.Ticks - beginTime + " millis");
+        }
+
+        private void Test(StringBuilder input, int times)
+        {
+            long beginTime = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < times; i++)
+            {
+                Test(input);
+            }
+            Console.WriteLine(DateTime.UtcNow.Ticks - beginTime + " millis");
+        }
+
+        private void Test(string input)
+        {
+            for (int i = 0; i < input.Length; i++)
+            {
+                for (int j = i; j <= input.Length; j++)
+                {
+                    input.Substring(i, (j - i));
+                }
+            }
+        }
+
+        private void Test(StringBuilder input)
+        {
+            for (int i = 0; i < input.Length; i++)
+            {
+                for (int j = i; j <= input.Length; j++)
+                {
+                    input.ToString(i, (j - i));
+                }
+            }
+        }
+
+        private class CachedCharSequence : ICharSequence
+        {
+            private readonly string[][] cache;
+            private readonly string cached;
+            public CachedCharSequence(string[][] cache, string cached)
+            {
+                this.cache = cache;
+                this.cached = cached;
+            }
+            public char this[int index]
+            {
+                get
+                {
+                    return cached[index];
+                }
+            }
+
+            public int Length
+            {
+                get
+                {
+                    return cached.Length;
+                }
+            }
+
+            public ICharSequence SubSequence(int start, int end)
+            {
+                if (start == end)
+                {
+                    return "".ToCharSequence();
+                }
+                string res = cache[start][end - 1];
+                if (res == null)
+                {
+                    res = cached.Substring(start, end - start);
+                    cache[start][end - 1] = res;
+                }
+                return res.ToCharSequence();
+            }
+        }
+
+        private ICharSequence CacheSubSequence(string cached)
+        {
+            string[][] cache = 
Support.RectangularArrays.ReturnRectangularArray<string>(cached.Length, 
cached.Length);
+            return new CachedCharSequence(cache, cached);
+        }
+
+        private ICharSequence CacheSubSequence(StringBuilder cached)
+        {
+            string[][] cache = 
Support.RectangularArrays.ReturnRectangularArray<string>(cached.Length, 
cached.Length);
+            return new CachedCharSequence(cache, cached.ToString());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
new file mode 100644
index 0000000..d50c6f7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
@@ -0,0 +1,84 @@
+ï»¿using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests guessLanguages API.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public class LanguageGuessingTest
+    {
+        private static string EXACT = "exact";
+        private static string ONE_OF = "one of";
+
+        public static List<object[]> Values =  Arrays.AsList(new object[][] {
+                new object[] { "Renault", "french", EXACT },
+                new object[] { "Mickiewicz", "polish", EXACT },
+                new object[] { "Thompson", "english", ONE_OF }, // this also 
hits german and greeklatin
+                new object[] { "Nu\u00f1ez", "spanish", EXACT }, // NuÃ±ez
+                new object[] { "Carvalho", "portuguese", EXACT },
+                new object[] { "\u010capek", "czech", EXACT }, // Äapek
+                new object[] { "Sjneijder", "dutch", EXACT },
+                new object[] { "Klausewitz", "german", EXACT },
+                new object[] { "K\u00fc\u00e7\u00fck", "turkish", EXACT }, // 
KÃ¼Ã§Ã¼k
+                new object[] { "Giacometti", "italian", EXACT },
+                new object[] { "Nagy", "hungarian", EXACT },
+                new object[] { "Ceau\u015fescu", "romanian", EXACT }, // 
CeauÅescu
+                new object[] { "Angelopoulos", "greeklatin", EXACT },
+                new object[] { 
"\u0391\u03b3\u03b3\u03b5\u03bb\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2", 
"greek", EXACT }, // ÎÎ³Î³ÎµÎ»ÏÏÎ¿ÏÎ»Î¿Ï
+                new object[] { "\u041f\u0443\u0448\u043a\u0438\u043d", 
"cyrillic", EXACT }, // ÐÑÑÐºÐ¸Ð½
+                new object[] { "\u05db\u05d4\u05df", "hebrew", EXACT }, // 
×××
+                new object[] { "\u00e1cz", "any", EXACT }, // Ã¡cz
+                new object[] { "\u00e1tz", "any", EXACT } // Ã¡tz
+        });
+            
+       
+
+        //private readonly String exactness;
+
+        private readonly Lang lang = Lang.GetInstance(NameType.GENERIC);
+        //private readonly String language;
+        //private readonly String name;
+
+        //[TestCaseSource("Values")]
+        //public LanguageGuessingTest(String name, String language, String 
exactness)
+        //{
+        //    this.name = name;
+        //    this.language = language;
+        //    this.exactness = exactness;
+        //}
+
+        [Test]
+        [TestCaseSource("Values")]
+        public void TestLanguageGuessing(String name, String language, String 
exactness)
+        {
+            LanguageSet guesses = this.lang.GuessLanguages(name);
+
+            Assert.True(guesses.Contains(language),
+                "language predicted for name '" + name + "' is wrong: " + 
guesses + " should contain '" + language + "'"
+                    );
+
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
new file mode 100644
index 0000000..7b8b400
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
@@ -0,0 +1,141 @@
+ï»¿using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /**
+     * Tests performance for {@link PhoneticEngine}.
+     * <p>
+     * See <a 
href="https://issues.apache.org/jira/browse/CODEC-174";>[CODEC-174] Improve 
performance of Beider Morse
+     * encoder</a>.
+     * </p>
+     * <p>
+     * Results for November 7, 2013, project SVN revision 1539678.
+     * </p>
+     * <p>
+     * Environment:
+     * </p>
+     * <ul>
+     * <li>java version "1.7.0_45"</li>
+     * <li>Java(TM) SE Runtime Environment (build 1.7.0_45-b18)</li>
+     * <li>Java HotSpot(TM) 64-Bit Server VM (build 24.45-b08, mixed mode)</li>
+     * <li>OS name: "windows 7", version: "6.1", arch: "amd64", family: 
"windows")</li>
+     * </ul>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 33,039 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,297 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,857 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>31,561 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,665 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,215 
millis.</li>
+     * </ol>
+     * <p>
+     * On this file's revision 1539678, with patch <a
+     * 
href="https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch";
+     * >CODEC-174-change-rules-storage-to-Map</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 18,196 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,858 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,644 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,591 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,861 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,696 
millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1539783.
+     * </p>
+     * <p>
+     * On this file's revision 1539783, with patch <a
+     * 
href="https://issues.apache.org/jira/secure/attachment/12611962/CODEC-174-delete-subsequence-cache.patch";
+     * >CODEC-174-delete-subsequence-cache.patch</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,547 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,501 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,528 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 17,110 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,910 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 16,969 
millis.</li>
+     * </ol>
+     * <p>
+     * Patch not applied.
+     * </p>
+     * <p>
+     * On this file's revision 1539787, with patch <a
+     * 
href="https://issues.apache.org/jira/secure/attachment/12612178/CODEC-174-reuse-set-in-PhonemeBuilder.patch";
+     * >CODEC-174-reuse-set-in-PhonemeBuilder.patch</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,724 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,451 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,742 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,186 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,600 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 16,405 
millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1539788.
+     * </p>
+     * <p>
+     * Before patch 
https://issues.apache.org/jira/secure/attachment/12613371/CODEC-174-refactor-restrictTo-method-in-SomeLanguages.patch
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,133 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,064 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>12,838 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 12,970 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,122 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,293 
millis.</li>
+     * </ol>
+     * <p>
+     * After patch 
https://issues.apache.org/jira/secure/attachment/12613371/CODEC-174-refactor-restrictTo-method-in-SomeLanguages.patch
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,576 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,506 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,361 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>11,142 
millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,430 
millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,297 
millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1541234.
+     * </p>
+     */
+    public class PhoneticEnginePerformanceTest
+    {
+        private static readonly int LOOP = 80000;
+
+        [Test]
+        public void Test()
+        {
+            PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, 
RuleType.APPROX, true);
+            String input = "Angelo";
+            long startMillis = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < LOOP; i++)
+            {
+                engine.Encode(input);
+            }
+            long totalMillis = DateTime.UtcNow.Ticks - startMillis;
+            Console.WriteLine(String.Format("Time for encoding {0} times the 
input '{1}': {2} millis.", LOOP, input, totalMillis));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
new file mode 100644
index 0000000..cb9a40d
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
@@ -0,0 +1,234 @@
+ï»¿using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="PhoneticEngine"/> and <see cref="LanguageSet"/> in 
ways very similar to code found in solr-3.6.0.
+    /// <para/>
+    /// since 1.7
+    /// </summary>
+    public class PhoneticEngineRegressionTest
+    {
+        [Test]
+        public void TestSolrGENERIC()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "GENERIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"andZelo|angelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"andZelo|angelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        [Test]
+        public void TestSolrASHKENAZI()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"andZelo|angelo|anhelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"dandZelo|dangelo|danhelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "angelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"andZelo|angelo|anhelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"dandZelo|dangelo|danhelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "angelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"AnSelO|AngElO|AngzelO|AnkselO");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"AnSelO|AngElO|AngzelO|AnkselO");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        [Test]
+        public void TestSolrSEPHARDIC()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anZelo|andZelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"anZelo|andZelo|anxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anZelo|andZelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"danZelo|dandZelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), 
"danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), 
"anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        /**
+         * This code is similar in style to code found in Solr:
+         * 
solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
+         *
+         * Making a JUnit test out of it to protect Solr from possible future
+         * regressions in Commons-Codec.
+         */
+        private static String Encode(IDictionary<String, String> args, bool 
concat, String input)
+        {
+            LanguageSet languageSet;
+            PhoneticEngine engine;
+
+            // PhoneticEngine = NameType + RuleType + concat
+            // we use common-codec's defaults: GENERIC + APPROX + true
+            String nameTypeArg;
+            args.TryGetValue("nameType", out nameTypeArg);
+            NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : 
(NameType)Enum.Parse(typeof(NameType), nameTypeArg, true);
+
+            String ruleTypeArg;
+            args.TryGetValue("ruleType", out ruleTypeArg);
+            RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : 
(RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true);
+
+            engine = new PhoneticEngine(nameType, ruleType, concat);
+
+            // LanguageSet: defaults to automagic, otherwise a comma-separated 
list.
+            String languageSetArg;
+            args.TryGetValue("languageSet", out languageSetArg);
+            if (languageSetArg == null || languageSetArg.equals("auto"))
+            {
+                languageSet = null;
+            }
+            else
+            {
+                languageSet = LanguageSet.From(new 
HashSet<String>(Arrays.AsList(languageSetArg.Split(new string[] { "," }, 
StringSplitOptions.RemoveEmptyEntries))));
+            }
+
+            /*
+                org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java 
(lines 96-98) does this:
+
+                encoded = (languages == null)
+                    ? engine.encode(termAtt.toString())
+                    : engine.encode(termAtt.toString(), languages);
+
+                Hence our approach, below:
+            */
+            if (languageSet == null)
+            {
+                return engine.Encode(input);
+            }
+            else
+            {
+                return engine.Encode(input, languageSet);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
new file mode 100644
index 0000000..281fc45
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
@@ -0,0 +1,89 @@
+ï»¿using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class PhoneticEngineTest
+    {
+        private static readonly int TEN = 10;
+
+        public static List<Object[]> Values = new List<object[]> { new 
Object[] { "Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", 
NameType.GENERIC, RuleType.APPROX, true, TEN },
+                            new Object[] { "Renault", 
"rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, 
RuleType.APPROX, true, TEN },
+                            new Object[] { "Renault", "rYnDlt", 
NameType.ASHKENAZI, RuleType.APPROX, true, 1 },
+                            new Object[] { "Renault", "rinDlt", 
NameType.SEPHARDIC, RuleType.APPROX, true, TEN },
+                            new Object[] { "SntJohn-Smith", "sntjonsmit", 
NameType.GENERIC, RuleType.EXACT, true, TEN },
+                            new Object[] { "d'ortley", 
"(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, true, 
TEN },
+                            new Object[] {
+                                "van helsing",
+                                
"(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)",
+                                NameType.GENERIC,
+                                RuleType.EXACT,
+                                false, TEN } };
+
+        //    private readonly bool concat;
+        //private readonly String name;
+        //private readonly NameType nameType;
+        //private readonly String phoneticExpected;
+        //private readonly RuleType ruleType;
+        //private readonly int maxPhonemes;
+
+        //    public PhoneticEngineTest(String name, String phoneticExpected, 
NameType nameType,
+        //                              RuleType ruleType, bool concat, int 
maxPhonemes)
+        //    {
+        //        this.name = name;
+        //        this.phoneticExpected = phoneticExpected;
+        //        this.nameType = nameType;
+        //        this.ruleType = ruleType;
+        //        this.concat = concat;
+        //        this.maxPhonemes = maxPhonemes;
+        //    }
+
+        [Test]//@Test(timeout = 10000L)
+        [TestCaseSource("Values")]
+        public void TestEncode(String name, String phoneticExpected, NameType 
nameType,
+                                      RuleType ruleType, bool concat, int 
maxPhonemes)
+        {
+            PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, 
concat, maxPhonemes);
+
+            String phoneticActual = engine.Encode(name);
+
+            //System.err.println("expecting: " + this.phoneticExpected);
+            //System.err.println("actual:    " + phoneticActual);
+            Assert.AreEqual(phoneticExpected, phoneticActual, "phoneme 
incorrect");
+
+            if (concat)
+            {
+                String[] split = new Regex("\\|").Split(phoneticActual);
+                Assert.True(split.Length <= maxPhonemes);
+            }
+            else
+            {
+                String[] words = phoneticActual.Split(new string[] { "-" }, 
StringSplitOptions.RemoveEmptyEntries);
+                foreach (String word in words)
+                {
+                    String[] split = new Regex("\\|").Split(word);
+                    Assert.True(split.Length <= maxPhonemes);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
new file mode 100644
index 0000000..fd2e8a2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
@@ -0,0 +1,163 @@
+ï»¿using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Rule.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public class RuleTest
+    {
+        //    private static class NegativeIntegerBaseMatcher : 
BaseMatcher<Integer> {
+        //        @Override
+        //    public void describeTo(final Description description)
+        //    {
+        //        description.appendText("value should be negative");
+        //    }
+
+        //    @Override
+        //    public boolean matches(final Object item)
+        //    {
+        //        return ((Integer)item).intValue() < 0;
+        //    }
+        //}
+
+        private Phoneme[][] MakePhonemes()
+        {
+            String[][]
+        words = {
+               new string[] { "rinD", "rinDlt", "rina", "rinalt", "rino", 
"rinolt", "rinu", "rinult" },
+               new string[] { "dortlaj", "dortlej", "ortlaj", "ortlej", 
"ortlej-dortlaj" } };
+            Phoneme[][] phonemes = new Phoneme[words.Length][];
+
+            for (int i = 0; i < words.Length; i++)
+            {
+                String[] words_i = words[i];
+                Phoneme[] phonemes_i = phonemes[i] = new 
Phoneme[words_i.Length];
+                for (int j = 0; j < words_i.Length; j++)
+                {
+                    phonemes_i[j] = new Phoneme(words_i[j], 
Languages.NO_LANGUAGES);
+                }
+            }
+
+            return phonemes;
+        }
+
+        [Test]
+        public void TestPhonemeComparedToLaterIsNegative()
+        {
+            foreach (Phoneme[] phs in MakePhonemes())
+            {
+                for (int i = 0; i < phs.Length; i++)
+                {
+                    for (int j = i + 1; j < phs.Length; j++)
+                    {
+                        int c = Phoneme.COMPARER.Compare(phs[i], phs[j]);
+
+                        Assert.True(c < 0,
+                                "Comparing " + phs[i].GetPhonemeText() + " to 
" + phs[j].GetPhonemeText() + " should be negative");
+                    }
+                }
+            }
+        }
+
+        [Test]
+        public void TestPhonemeComparedToSelfIsZero()
+        {
+            foreach (Phoneme[] phs in MakePhonemes())
+            {
+                foreach (Phoneme ph in phs)
+                {
+                    Assert.AreEqual(0,
+                            Phoneme.COMPARER.Compare(ph, ph),
+                            "Phoneme compared to itself should be zero: " + 
ph.GetPhonemeText());
+                }
+            }
+        }
+
+        [Test]
+        public void TestSubSequenceWorks()
+        {
+            // AppendableCharSequence is private to Rule. We can only make it 
through a Phoneme.
+
+            Phoneme a = new Phoneme("a", null);
+            Phoneme b = new Phoneme("b", null);
+            Phoneme cd = new Phoneme("cd", null);
+            Phoneme ef = new Phoneme("ef", null);
+            Phoneme ghi = new Phoneme("ghi", null);
+            Phoneme jkl = new Phoneme("jkl", null);
+
+            Assert.AreEqual('a', a.GetPhonemeText()[0]);
+            Assert.AreEqual('b', b.GetPhonemeText()[0]);
+            Assert.AreEqual('c', cd.GetPhonemeText()[0]);
+            Assert.AreEqual('d', cd.GetPhonemeText()[1]);
+            Assert.AreEqual('e', ef.GetPhonemeText()[0]);
+            Assert.AreEqual('f', ef.GetPhonemeText()[1]);
+            Assert.AreEqual('g', ghi.GetPhonemeText()[0]);
+            Assert.AreEqual('h', ghi.GetPhonemeText()[1]);
+            Assert.AreEqual('i', ghi.GetPhonemeText()[2]);
+            Assert.AreEqual('j', jkl.GetPhonemeText()[0]);
+            Assert.AreEqual('k', jkl.GetPhonemeText()[1]);
+            Assert.AreEqual('l', jkl.GetPhonemeText()[2]);
+
+            Phoneme a_b = new Phoneme(a, b);
+            Assert.AreEqual('a', a_b.GetPhonemeText()[0]);
+            Assert.AreEqual('b', a_b.GetPhonemeText()[1]);
+            Assert.AreEqual("ab", a_b.GetPhonemeText().Substring(0, 2 - 
0).toString());
+            Assert.AreEqual("a", a_b.GetPhonemeText().Substring(0, 1 - 
0).toString());
+            Assert.AreEqual("b", a_b.GetPhonemeText().Substring(1, 2 - 
1).toString());
+
+            Phoneme cd_ef = new Phoneme(cd, ef);
+            Assert.AreEqual('c', cd_ef.GetPhonemeText()[0]);
+            Assert.AreEqual('d', cd_ef.GetPhonemeText()[1]);
+            Assert.AreEqual('e', cd_ef.GetPhonemeText()[2]);
+            Assert.AreEqual('f', cd_ef.GetPhonemeText()[3]);
+            Assert.AreEqual("c", cd_ef.GetPhonemeText().Substring(0, 1 - 
0).toString());
+            Assert.AreEqual("d", cd_ef.GetPhonemeText().Substring(1, 2 - 
1).toString());
+            Assert.AreEqual("e", cd_ef.GetPhonemeText().Substring(2, 3 - 
2).toString());
+            Assert.AreEqual("f", cd_ef.GetPhonemeText().Substring(3, 4 - 
3).toString());
+            Assert.AreEqual("cd", cd_ef.GetPhonemeText().Substring(0, 2 - 
0).toString());
+            Assert.AreEqual("de", cd_ef.GetPhonemeText().Substring(1, 3 - 
1).toString());
+            Assert.AreEqual("ef", cd_ef.GetPhonemeText().Substring(2, 4 - 
2).toString());
+            Assert.AreEqual("cde", cd_ef.GetPhonemeText().Substring(0, 3 - 
0).toString());
+            Assert.AreEqual("def", cd_ef.GetPhonemeText().Substring(1, 4 - 
1).toString());
+            Assert.AreEqual("cdef", cd_ef.GetPhonemeText().Substring(0, 4 - 
0).toString());
+
+            var test = new Phoneme(a, b);
+            Phoneme a_b_cd = new Phoneme(test, cd);
+            Assert.AreEqual('a', a_b_cd.GetPhonemeText()[0]);
+            Assert.AreEqual('b', a_b_cd.GetPhonemeText()[1]);
+            Assert.AreEqual('c', a_b_cd.GetPhonemeText()[2]);
+            Assert.AreEqual('d', a_b_cd.GetPhonemeText()[3]);
+            Assert.AreEqual("a", a_b_cd.GetPhonemeText().Substring(0, 1 - 
0).toString());
+            Assert.AreEqual("b", a_b_cd.GetPhonemeText().Substring(1, 2 - 
1).toString());
+            Assert.AreEqual("c", a_b_cd.GetPhonemeText().Substring(2, 3 - 
2).toString());
+            Assert.AreEqual("d", a_b_cd.GetPhonemeText().Substring(3, 4 - 
3).toString());
+            Assert.AreEqual("ab", a_b_cd.GetPhonemeText().Substring(0, 2 - 
0).toString());
+            Assert.AreEqual("bc", a_b_cd.GetPhonemeText().Substring(1, 3 - 
1).toString());
+            Assert.AreEqual("cd", a_b_cd.GetPhonemeText().Substring(2, 4 - 
2).toString());
+            Assert.AreEqual("abc", a_b_cd.GetPhonemeText().Substring(0, 3 - 
0).toString());
+            Assert.AreEqual("bcd", a_b_cd.GetPhonemeText().Substring(1, 4 - 
1).toString());
+            Assert.AreEqual("abcd", a_b_cd.GetPhonemeText().Substring(0, 4 - 
0).toString());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
new file mode 100644
index 0000000..9112ed4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
@@ -0,0 +1,109 @@
+ï»¿using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Caverphone1.
+    /// </summary>
+    public class Caverphone1Test : StringEncoderAbstractTest<Caverphone1>
+    {
+        protected override Caverphone1 CreateStringEncoder()
+        {
+            return new Caverphone1();
+        }
+
+        /**
+         * Tests example adapted from version 2.0  
http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * AT1111 words: add, aid, at, art, eat, earth, head, hit, hot, hold, 
hard, heart, it, out, old
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedCommonCodeAT1111()
+        {
+            this.CheckEncodingVariations("AT1111", new String[]{
+            "add",
+            "aid",
+            "at",
+            "art",
+            "eat",
+            "earth",
+            "head",
+            "hit",
+            "hot",
+            "hold",
+            "hard",
+            "heart",
+            "it",
+            "out",
+            "old"});
+        }
+
+        [Test]
+        public void TestEndMb()
+        {
+            String[]
+            []
+            data = { new string[] { "mb", "M11111" }, new string[] { "mbmb", 
"MPM111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * Tests some examples from version 2.0 
http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestIsCaverphoneEquals()
+        {
+            Caverphone1 caverphone = new Caverphone1();
+            Assert.False(caverphone.IsEncodeEqual("Peter", "Stevenson"), 
"Caverphone encodings should not be equal");
+            Assert.True(caverphone.IsEncodeEqual("Peter", "Peady"), 
"Caverphone encodings should be equal");
+        }
+
+        /**
+         * Tests example from 
http://caversham.otago.ac.nz/files/working/ctp060902.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestSpecificationV1Examples()
+        {
+            String[]
+            []
+            data = { new string[] { "David", "TFT111" }, new string[] { 
"Whittle", "WTL111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * Tests examples from http://en.wikipedia.org/wiki/Caverphone
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestWikipediaExamples()
+        {
+            String[][] data = { new string[] { "Lee", "L11111" }, new string[] 
{ "Thompson", "TMPSN1" } };
+            this.CheckEncodings(data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test
 .cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test 
.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs
new file mode 100644
index 0000000..4ec1daa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs       
@@ -0,0 +1,375 @@
+ï»¿using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Caverphone2.
+    /// </summary>
+    public class Caverphone2Test : StringEncoderAbstractTest<Caverphone2>
+    {
+        protected override Caverphone2 CreateStringEncoder()
+        {
+            return new Caverphone2();
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * AT11111111 words: add, aid, at, art, eat, earth, head, hit, hot, 
hold, hard, heart, it, out, old
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedCommonCodeAT11111111()
+        {
+            this.CheckEncodingVariations("AT11111111", new String[]{
+            "add",
+            "aid",
+            "at",
+            "art",
+            "eat",
+            "earth",
+            "head",
+            "hit",
+            "hot",
+            "hold",
+            "hard",
+            "heart",
+            "it",
+            "out",
+            "old"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedExamples()
+        {
+            String[]
+            []
+            data = { new string[] { "Stevenson", "STFNSN1111" }, new string[] 
{ "Peter", "PTA1111111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameKLN1111111()
+        {
+            this.CheckEncodingVariations("KLN1111111", new String[]{
+            "Cailean",
+            "Calan",
+            "Calen",
+            "Callahan",
+            "Callan",
+            "Callean",
+            "Carleen",
+            "Carlen",
+            "Carlene",
+            "Carlin",
+            "Carline",
+            "Carlyn",
+            "Carlynn",
+            "Carlynne",
+            "Charlean",
+            "Charleen",
+            "Charlene",
+            "Charline",
+            "Cherlyn",
+            "Chirlin",
+            "Clein",
+            "Cleon",
+            "Cline",
+            "Cohleen",
+            "Colan",
+            "Coleen",
+            "Colene",
+            "Colin",
+            "Colleen",
+            "Collen",
+            "Collin",
+            "Colline",
+            "Colon",
+            "Cullan",
+            "Cullen",
+            "Cullin",
+            "Gaelan",
+            "Galan",
+            "Galen",
+            "Garlan",
+            "Garlen",
+            "Gaulin",
+            "Gayleen",
+            "Gaylene",
+            "Giliane",
+            "Gillan",
+            "Gillian",
+            "Glen",
+            "Glenn",
+            "Glyn",
+            "Glynn",
+            "Gollin",
+            "Gorlin",
+            "Kalin",
+            "Karlan",
+            "Karleen",
+            "Karlen",
+            "Karlene",
+            "Karlin",
+            "Karlyn",
+            "Kaylyn",
+            "Keelin",
+            "Kellen",
+            "Kellene",
+            "Kellyann",
+            "Kellyn",
+            "Khalin",
+            "Kilan",
+            "Kilian",
+            "Killen",
+            "Killian",
+            "Killion",
+            "Klein",
+            "Kleon",
+            "Kline",
+            "Koerlin",
+            "Kylen",
+            "Kylynn",
+            "Quillan",
+            "Quillon",
+            "Qulllon",
+            "Xylon"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameTN11111111()
+        {
+            this.CheckEncodingVariations("TN11111111", new String[]{
+            "Dan",
+            "Dane",
+            "Dann",
+            "Darn",
+            "Daune",
+            "Dawn",
+            "Ddene",
+            "Dean",
+            "Deane",
+            "Deanne",
+            "DeeAnn",
+            "Deeann",
+            "Deeanne",
+            "Deeyn",
+            "Den",
+            "Dene",
+            "Denn",
+            "Deonne",
+            "Diahann",
+            "Dian",
+            "Diane",
+            "Diann",
+            "Dianne",
+            "Diannne",
+            "Dine",
+            "Dion",
+            "Dione",
+            "Dionne",
+            "Doane",
+            "Doehne",
+            "Don",
+            "Donn",
+            "Doone",
+            "Dorn",
+            "Down",
+            "Downe",
+            "Duane",
+            "Dun",
+            "Dunn",
+            "Duyne",
+            "Dyan",
+            "Dyane",
+            "Dyann",
+            "Dyanne",
+            "Dyun",
+            "Tan",
+            "Tann",
+            "Teahan",
+            "Ten",
+            "Tenn",
+            "Terhune",
+            "Thain",
+            "Thaine",
+            "Thane",
+            "Thanh",
+            "Thayne",
+            "Theone",
+            "Thin",
+            "Thorn",
+            "Thorne",
+            "Thun",
+            "Thynne",
+            "Tien",
+            "Tine",
+            "Tjon",
+            "Town",
+            "Towne",
+            "Turne",
+            "Tyne"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameTTA1111111()
+        {
+            this.CheckEncodingVariations("TTA1111111", new String[]{
+            "Darda",
+            "Datha",
+            "Dedie",
+            "Deedee",
+            "Deerdre",
+            "Deidre",
+            "Deirdre",
+            "Detta",
+            "Didi",
+            "Didier",
+            "Dido",
+            "Dierdre",
+            "Dieter",
+            "Dita",
+            "Ditter",
+            "Dodi",
+            "Dodie",
+            "Dody",
+            "Doherty",
+            "Dorthea",
+            "Dorthy",
+            "Doti",
+            "Dotti",
+            "Dottie",
+            "Dotty",
+            "Doty",
+            "Doughty",
+            "Douty",
+            "Dowdell",
+            "Duthie",
+            "Tada",
+            "Taddeo",
+            "Tadeo",
+            "Tadio",
+            "Tati",
+            "Teador",
+            "Tedda",
+            "Tedder",
+            "Teddi",
+            "Teddie",
+            "Teddy",
+            "Tedi",
+            "Tedie",
+            "Teeter",
+            "Teodoor",
+            "Teodor",
+            "Terti",
+            "Theda",
+            "Theodor",
+            "Theodore",
+            "Theta",
+            "Thilda",
+            "Thordia",
+            "Tilda",
+            "Tildi",
+            "Tildie",
+            "Tildy",
+            "Tita",
+            "Tito",
+            "Tjader",
+            "Toddie",
+            "Toddy",
+            "Torto",
+            "Tuddor",
+            "Tudor",
+            "Turtle",
+            "Tuttle",
+            "Tutto"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomWords()
+        {
+            this.CheckEncodingVariations("RTA1111111", new String[] { 
"rather", "ready", "writer" });
+            this.CheckEncoding("SSA1111111", "social");
+            this.CheckEncodingVariations("APA1111111", new String[] { "able", 
"appear" });
+        }
+
+        [Test]
+        public void TestEndMb()
+        {
+            String[]
+            []
+            data = { new string[] { "mb", "M111111111" }, new string[] { 
"mbmb", "MPM1111111" } };
+            this.CheckEncodings(data);
+        }
+
+        // Caverphone Revisited
+        [Test]
+        public void TestIsCaverphoneEquals()
+        {
+            Caverphone2 caverphone = new Caverphone2();
+            Assert.False(caverphone.IsEncodeEqual("Peter", "Stevenson"), 
"Caverphone encodings should not be equal");
+            Assert.True(caverphone.IsEncodeEqual("Peter", "Peady"), 
"Caverphone encodings should be equal");
+        }
+
+        [Test]
+        public void TestSpecificationExamples()
+        {
+            String[]
+            []
+            data = {
+                new string[] { "Peter", "PTA1111111"},
+                new string[] { "ready", "RTA1111111"},
+                new string[] { "social", "SSA1111111"},
+                new string[] { "able", "APA1111111"},
+                new string[] { "Tedder", "TTA1111111"},
+                new string[] { "Karleen", "KLN1111111"},
+                new string[] { "Dyun", "TN11111111"}
+            };
+            this.CheckEncodings(data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
new file mode 100644
index 0000000..46b14ff
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
@@ -0,0 +1,171 @@
+ï»¿using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests the <see cref="ColognePhonetic"/> class.
+    /// </summary>
+    public class ColognePhoneticTest : 
StringEncoderAbstractTest<ColognePhonetic>
+    {
+        protected override ColognePhonetic CreateStringEncoder()
+        {
+            return new ColognePhonetic();
+        }
+
+        [Test]
+        public void TestAabjoe()
+        {
+            this.CheckEncoding("01", "Aabjoe");
+        }
+
+        [Test]
+        public void TestAaclan()
+        {
+            this.CheckEncoding("0856", "Aaclan");
+        }
+
+        /**
+         * Tests [CODEC-122]
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestAychlmajrForCodec122()
+        {
+            this.CheckEncoding("04567", "Aychlmajr");
+        }
+
+        [Test]
+        public void TestEdgeCases()
+        {
+            String[][] data = {
+            new string[] { "a", "0"},
+            new string[] { "e", "0"},
+            new string[] { "i", "0"},
+            new string[] { "o", "0"},
+            new string[] { "u", "0"},
+            new string[] { "\u00E4", "0"}, // a-umlaut
+            new string[] { "\u00F6", "0"}, // o-umlaut
+            new string[] { "\u00FC", "0"}, // u-umlaut
+            new string[] { "aa", "0"},
+            new string[] { "ha", "0"},
+            new string[] { "h", ""},
+            new string[] { "aha", "0"},
+            new string[] { "b", "1"},
+            new string[] { "p", "1"},
+            new string[] { "ph", "3"},
+            new string[] { "f", "3"},
+            new string[] { "v", "3"},
+            new string[] { "w", "3"},
+            new string[] { "g", "4"},
+            new string[] { "k", "4"},
+            new string[] { "q", "4"},
+            new string[] { "x", "48"},
+            new string[] { "ax", "048"},
+            new string[] { "cx", "48"},
+            new string[] { "l", "5"},
+            new string[] { "cl", "45"},
+            new string[] { "acl", "085"},
+            new string[] { "mn", "6"},
+            new string[] { "r", "7"}
+            };
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestExamples()
+        {
+            String[][] data = {
+            new string[] { "m\u00DCller", "657"}, // mÃller - why upper case 
U-umlaut?
+            new string[] { "schmidt", "862"},
+            new string[] { "schneider", "8627"},
+            new string[] { "fischer", "387"},
+            new string[] { "weber", "317"},
+            new string[] { "wagner", "3467"},
+            new string[] { "becker", "147"},
+            new string[] { "hoffmann", "0366"},
+            new string[] { "sch\u00C4fer", "837"}, // schÃfer - why upper 
case A-umlaut ?
+            new string[] { "Breschnew", "17863"},
+            new string[] { "Wikipedia", "3412"},
+            new string[] { "peter", "127"},
+            new string[] { "pharma", "376"},
+            new string[] { "m\u00f6nchengladbach", "664645214"}, // 
mÃ¶nchengladbach
+            new string[] { "deutsch", "28"},
+            new string[] { "deutz", "28"},
+            new string[] { "hamburg", "06174"},
+            new string[] { "hannover", "0637"},
+            new string[] { "christstollen", "478256"},
+            new string[] { "Xanthippe", "48621"},
+            new string[] { "Zacharias", "8478"},
+            new string[] { "Holzbau", "0581"},
+            new string[] { "matsch", "68"},
+            new string[] { "matz", "68"},
+            new string[] { "Arbeitsamt", "071862"},
+            new string[] { "Eberhard", "01772"},
+            new string[] { "Eberhardt", "01772"},
+            new string[] { "heithabu", "021"}
+            };
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestHyphen()
+        {
+            String[][] data = {
+                new string[] { "bergisch-gladbach", "174845214"},
+                new string[] { "M\u00fcller-L\u00fcdenscheidt", "65752682"}
+            }; // MÃ¼ller-LÃ¼denscheidt
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestIsEncodeEquals()
+        {
+            String[][] data = {
+            new string[] {"Meyer", "M\u00fcller"}, // MÃ¼ller
+            new string[] {"Meyer", "Mayr"},
+            new string[] {"house", "house"},
+            new string[] {"House", "house"},
+            new string[] {"Haus", "house"},
+            new string[] {"ganz", "Gans"},
+            new string[] {"ganz", "G\u00e4nse"}, // GÃ¤nse
+            new string[] {"Miyagi", "Miyako"}};
+            foreach (String[] element in data)
+            {
+                this.StringEncoder.IsEncodeEqual(element[1], element[0]);
+            }
+        }
+
+        [Test]
+        public void TestVariationsMella()
+        {
+            String[] data = { "mella", "milah", "moulla", "mellah", "muehle", 
"mule" };
+            this.CheckEncodingVariations("65", data);
+        }
+
+        [Test]
+        public void TestVariationsMeyer()
+        {
+            String[] data = { "Meier", "Maier", "Mair", "Meyer", "Meyr", 
"Mejer", "Major" };
+            this.CheckEncodingVariations("67", data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
new file mode 100644
index 0000000..84bb5d3
--- /dev/null
+++ 
b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
@@ -0,0 +1,176 @@
+ï»¿// commons-codec version compatibility level: 1.10
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="DaitchMokotoffSoundex"/>.
+    /// <para/>
+    /// since 1.10
+    /// </summary>
+    public class DaitchMokotoffSoundexTest : 
StringEncoderAbstractTest<DaitchMokotoffSoundex>
+    {
+        protected override DaitchMokotoffSoundex CreateStringEncoder()
+        {
+            return new DaitchMokotoffSoundex();
+        }
+
+        private string GetSoundex(string source)
+        {
+            return StringEncoder.GetSoundex(source);
+        }
+
+        private string Encode(string source)
+        {
+            return StringEncoder.Encode(source);
+        }
+
+        [Test]
+        public void TestAccentedCharacterFolding()
+        {
+            Assert.AreEqual("294795", GetSoundex("StraÃburg"));
+            Assert.AreEqual("294795", GetSoundex("Strasburg"));
+
+            Assert.AreEqual("095600", GetSoundex("Ãregon"));
+            Assert.AreEqual("095600", GetSoundex("Eregon"));
+        }
+
+        [Test]
+        public void TestAdjacentCodes()
+        {
+            // AKSSOL
+            // A-KS-S-O-L
+            // 0-54-4---8 -> wrong
+            // 0-54-----8 -> correct
+            Assert.AreEqual("054800", GetSoundex("AKSSOL"));
+
+            // GERSCHFELD
+            // G-E-RS-CH-F-E-L-D
+            // 5--4/94-5/4-7-8-3 -> wrong
+            // 5--4/94-5/--7-8-3 -> correct
+            Assert.AreEqual("547830|545783|594783|594578", 
GetSoundex("GERSCHFELD"));
+        }
+
+        [Test]
+        public void TestEncodeBasic()
+        {
+            // same as above, but without branching
+            Assert.AreEqual("097400", Encode("AUERBACH"));
+            Assert.AreEqual("097400", Encode("OHRBACH"));
+            Assert.AreEqual("874400", Encode("LIPSHITZ"));
+            Assert.AreEqual("874400", Encode("LIPPSZYC"));
+            Assert.AreEqual("876450", Encode("LEWINSKY"));
+            Assert.AreEqual("876450", Encode("LEVINSKI"));
+            Assert.AreEqual("486740", Encode("SZLAMAWICZ"));
+            Assert.AreEqual("486740", Encode("SHLAMOVITZ"));
+        }
+
+        [Test]
+        public void TestEncodeIgnoreApostrophes()
+        {
+            this.CheckEncodingVariations("079600", new String[] { "OBrien", 
"'OBrien", "O'Brien", "OB'rien", "OBr'ien",
+                "OBri'en", "OBrie'n", "OBrien'" });
+        }
+
+        /**
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeIgnoreHyphens()
+        {
+            this.CheckEncodingVariations("565463", new String[] { "KINGSMITH", 
"-KINGSMITH", "K-INGSMITH", "KI-NGSMITH",
+                "KIN-GSMITH", "KING-SMITH", "KINGS-MITH", "KINGSM-ITH", 
"KINGSMI-TH", "KINGSMIT-H", "KINGSMITH-" });
+        }
+
+        [Test]
+        public void TestEncodeIgnoreTrimmable()
+        {
+            Assert.AreEqual("746536", Encode(" \t\n\r Washington \t\n\r "));
+            Assert.AreEqual("746536", Encode("Washington"));
+        }
+
+        /**
+         * Examples from http://www.jewishgen.org/infofiles/soundex.html
+         */
+        [Test]
+        public void TestSoundexBasic()
+        {
+            Assert.AreEqual("583600", GetSoundex("GOLDEN"));
+            Assert.AreEqual("087930", GetSoundex("Alpert"));
+            Assert.AreEqual("791900", GetSoundex("Breuer"));
+            Assert.AreEqual("579000", GetSoundex("Haber"));
+            Assert.AreEqual("665600", GetSoundex("Mannheim"));
+            Assert.AreEqual("664000", GetSoundex("Mintz"));
+            Assert.AreEqual("370000", GetSoundex("Topf"));
+            Assert.AreEqual("586660", GetSoundex("Kleinmann"));
+            Assert.AreEqual("769600", GetSoundex("Ben Aron"));
+
+            Assert.AreEqual("097400|097500", GetSoundex("AUERBACH"));
+            Assert.AreEqual("097400|097500", GetSoundex("OHRBACH"));
+            Assert.AreEqual("874400", GetSoundex("LIPSHITZ"));
+            Assert.AreEqual("874400|874500", GetSoundex("LIPPSZYC"));
+            Assert.AreEqual("876450", GetSoundex("LEWINSKY"));
+            Assert.AreEqual("876450", GetSoundex("LEVINSKI"));
+            Assert.AreEqual("486740", GetSoundex("SZLAMAWICZ"));
+            Assert.AreEqual("486740", GetSoundex("SHLAMOVITZ"));
+        }
+
+        /**
+         * Examples from http://www.avotaynu.com/soundex.htm
+         */
+        [Test]
+        public void TestSoundexBasic2()
+        {
+            Assert.AreEqual("467000|567000", GetSoundex("Ceniow"));
+            Assert.AreEqual("467000", GetSoundex("Tsenyuv"));
+            Assert.AreEqual("587400|587500", GetSoundex("Holubica"));
+            Assert.AreEqual("587400", GetSoundex("Golubitsa"));
+            Assert.AreEqual("746480|794648", GetSoundex("Przemysl"));
+            Assert.AreEqual("746480", GetSoundex("Pshemeshil"));
+            
Assert.AreEqual("944744|944745|944754|944755|945744|945745|945754|945755", 
GetSoundex("Rosochowaciec"));
+            Assert.AreEqual("945744", GetSoundex("Rosokhovatsets"));
+        }
+
+        /**
+         * Examples from 
http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex
+         */
+        [Test]
+        public void TestSoundexBasic3()
+        {
+            Assert.AreEqual("734000|739400", GetSoundex("Peters"));
+            Assert.AreEqual("734600|739460", GetSoundex("Peterson"));
+            Assert.AreEqual("645740", GetSoundex("Moskowitz"));
+            Assert.AreEqual("645740", GetSoundex("Moskovitz"));
+            Assert.AreEqual("154600|145460|454600|445460", 
GetSoundex("Jackson"));
+            
Assert.AreEqual("154654|154645|154644|145465|145464|454654|454645|454644|445465|445464",
+                    GetSoundex("Jackson-Jackson"));
+        }
+
+        [Test]
+        public void TestSpecialRomanianCharacters()
+        {
+            Assert.AreEqual("364000|464000", GetSoundex("Å£amas")); // 
t-cedilla
+            Assert.AreEqual("364000|464000", GetSoundex("Èamas")); // t-comma
+        }
+    }
+}

[06/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Reply via email to