[13/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

nightowl888 Tue, 27 Jun 2017 13:34:35 -0700

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
new file mode 100644
index 0000000..3cf5c7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
@@ -0,0 +1,578 @@
+ï»¿// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Converts words into potential phonetic representations.
+    /// </summary>
+    /// <remarks>
+    /// This is a two-stage process. Firstly, the word is converted into a 
phonetic representation that takes
+    /// into account the likely source language. Next, this phonetic 
representation is converted into a
+    /// pan-European 'average' representation, allowing comparison between 
different versions of essentially
+    /// the same word from different languages.
+    /// <para/>
+    /// This class is intentionally immutable and thread-safe.
+    /// If you wish to alter the settings for a PhoneticEngine, you
+    /// must make a new one with the updated settings.
+    /// <para/>
+    /// Ported from phoneticengine.php
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class PhoneticEngine
+    {
+        internal Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+
+        /// <summary>
+        /// Utility for manipulating a set of phonemes as they are being built 
up. Not intended for use outside
+        /// this package, and probably not outside the <see 
cref="PhoneticEngine"/> class.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        internal sealed class PhonemeBuilder
+        {
+            /// <summary>
+            /// An empty builder where all phonemes must come from some set of 
languages. This will contain a single
+            /// phoneme of zero characters. This can then be appended to. This 
should be the only way to create a new
+            /// phoneme from scratch.
+            /// </summary>
+            /// <param name="languages">The set of languages.</param>
+            /// <returns>A new, empty phoneme builder.</returns>
+            public static PhonemeBuilder Empty(LanguageSet languages)
+            {
+                return new PhonemeBuilder(new Phoneme("", languages));
+            }
+
+            private readonly IList<Phoneme> phonemes;
+
+            private PhonemeBuilder(Phoneme phoneme)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - 
in .NET, we can just use List<T> for that
+                this.phonemes = new List<Phoneme>();
+                this.phonemes.Add(phoneme);
+            }
+
+            internal PhonemeBuilder(IList<Phoneme> phonemes)
+            {
+                this.phonemes = phonemes;
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this 
one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the 
phonemes.</param>
+            public void Append(ICharSequence str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this 
one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the 
phonemes.</param>
+            // LUCENENET specific
+            public void Append(string str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str);
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this 
one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the 
phonemes.</param>
+            // LUCENENET specific
+            public void Append(StringBuilder str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Applies the given phoneme expression to all phonemes in this 
phoneme builder.
+            /// <para/>
+            /// This will lengthen phonemes that have compatible language sets 
to the expression, and drop those that are
+            /// incompatible.
+            /// </summary>
+            /// <param name="phonemeExpr">The expression to apply.</param>
+            /// <param name="maxPhonemes">The maximum number of phonemes to 
build up.</param>
+            public void Apply(IPhonemeExpr phonemeExpr, int maxPhonemes)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - 
in .NET, we can just use List<T> for that
+                IList<Phoneme> newPhonemes = new List<Phoneme>(maxPhonemes);
+
+                //EXPR_continue:
+                foreach (Phoneme left in this.phonemes)
+                {
+                    foreach (Phoneme right in phonemeExpr.Phonemes)
+                    {
+                        LanguageSet languages = 
left.Languages.RestrictTo(right.Languages);
+                        if (!languages.IsEmpty)
+                        {
+                            Phoneme join = new Phoneme(left, right, languages);
+                            if (newPhonemes.Count < maxPhonemes)
+                            {
+                                newPhonemes.Add(join);
+                                if (newPhonemes.Count >= maxPhonemes)
+                                {
+                                    goto EXPR_break;
+                                }
+                            }
+                        }
+                    }
+                }
+                EXPR_break: { }
+
+                this.phonemes.Clear();
+                // LUCENENET: We need to filter out any duplicates, since we 
converted from LinkedHashSet
+                // to List.
+                this.phonemes.AddRange(newPhonemes.Where(x => !phonemes.Any(y 
=> y.Equals(x))));
+            }
+
+            /// <summary>
+            /// Gets underlying phoneme set. Please don't mutate.
+            /// </summary>
+            public IList<Phoneme> Phonemes
+            {
+                get { return this.phonemes; }
+            }
+
+            /// <summary>
+            /// Stringifies the phoneme set. This produces a single string of 
the strings of each phoneme,
+            /// joined with a pipe. This is explicitly provided in place of 
<see cref="object.ToString()"/> as it is a potentially
+            /// expensive operation, which should be avoided when debugging.
+            /// </summary>
+            /// <returns>The stringified phoneme set.</returns>
+            public string MakeString()
+            {
+                StringBuilder sb = new StringBuilder();
+
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    if (sb.Length > 0)
+                    {
+                        sb.Append("|");
+                    }
+                    sb.Append(ph.GetPhonemeText());
+                }
+
+                return sb.ToString();
+            }
+        }
+
+        /// <summary>
+        /// A function closure capturing the application of a list of rules to 
an input sequence at a particular offset.
+        /// After invocation, the values <c>i</c> and <c>found</c> are 
updated. <c>i</c> points to the
+        /// index of the next char in <c>input</c> that must be processed next 
(the input up to that index having been
+        /// processed already), and <c>found</c> indicates if a matching rule 
was found or not. In the case where a
+        /// matching rule was found, <c>phonemeBuilder</c> is replaced with a 
new builder containing the phonemes
+        /// updated by the matching rule.
+        /// <para/>
+        /// Although this class is not thread-safe (it has mutable unprotected 
fields), it is not shared between threads
+        /// as it is constructed as needed by the calling methods.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        private sealed class RulesApplication
+        {
+            private readonly IDictionary<string, IList<Rule>> finalRules;
+            private readonly string input;
+
+            private PhonemeBuilder phonemeBuilder;
+            private int i;
+            private readonly int maxPhonemes;
+            private bool found;
+
+            public RulesApplication(IDictionary<string, IList<Rule>> 
finalRules, string input,
+                                    PhonemeBuilder phonemeBuilder, int i, int 
maxPhonemes)
+            {
+                if (finalRules == null)
+                {
+                    throw new ArgumentNullException("The finalRules argument 
must not be null");
+                }
+                this.finalRules = finalRules;
+                this.phonemeBuilder = phonemeBuilder;
+                this.input = input;
+                this.i = i;
+                this.maxPhonemes = maxPhonemes;
+            }
+
+            public int I
+            {
+                get { return this.i; }
+            }
+
+            public PhonemeBuilder PhonemeBuilder
+            {
+                get { return this.phonemeBuilder; }
+            }
+
+            /// <summary>
+            /// Invokes the rules. Loops over the rules list, stopping at the 
first one that has a matching context
+            /// and pattern. Then applies this rule to the phoneme builder to 
produce updated phonemes. If there was no
+            /// match, <c>i</c> is advanced one and the character is silently 
dropped from the phonetic spelling.
+            /// </summary>
+            /// <returns><c>this</c></returns>
+            public RulesApplication Invoke()
+            {
+                this.found = false;
+                int patternLength = 1;
+                IList<Rule> rules;
+                if (this.finalRules.TryGetValue(input.Substring(i, 
patternLength), out rules) && rules != null)
+                {
+                    foreach (Rule rule in rules)
+                    {
+                        string pattern = rule.Pattern;
+                        patternLength = pattern.Length;
+                        if (rule.PatternAndContextMatches(this.input, this.i))
+                        {
+                            this.phonemeBuilder.Apply(rule.Phoneme, 
maxPhonemes);
+                            this.found = true;
+                            break;
+                        }
+                    }
+                }
+
+                if (!this.found)
+                {
+                    patternLength = 1;
+                }
+
+                this.i += patternLength;
+                return this;
+            }
+
+            public bool IsFound
+            {
+                get { return this.found; }
+            }
+        }
+
+        private static readonly IDictionary<NameType, ISet<string>> 
NAME_PREFIXES = new Dictionary<NameType, ISet<string>>();
+
+        static PhoneticEngine()
+        {
+            NAME_PREFIXES[NameType.ASHKENAZI] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "bar", "ben", "da", "de", 
"van", "von" });
+            NAME_PREFIXES[NameType.SEPHARDIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "al", "el", "da", "dal", 
"de", "del", "dela", "de la",
+                                                              "della", "des", 
"di", "do", "dos", "du", "van", "von" });
+            NAME_PREFIXES[NameType.GENERIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "da", "dal", "de", "del", 
"dela", "de la", "della",
+                                                          "des", "di", "do", 
"dos", "du", "van", "von" });
+        }
+
+        /// <summary>
+        /// Joins some strings with an internal separator.
+        /// </summary>
+        /// <param name="strings">Strings to join.</param>
+        /// <param name="sep">String to separate them with.</param>
+        /// <returns>A single string consisting of each element of <paramref 
name="strings"/> interleaved by <paramref name="sep"/>.</returns>
+        private static string Join(IEnumerable<string> strings, string sep)
+        {
+            StringBuilder sb = new StringBuilder();
+            using (IEnumerator<string> si = strings.GetEnumerator())
+            {
+                if (si.MoveNext())
+                {
+                    sb.Append(si.Current);
+                }
+                while (si.MoveNext())
+                {
+                    sb.Append(sep).Append(si.Current);
+                }
+            }
+
+            return sb.ToString();
+        }
+
+        private static readonly int DEFAULT_MAX_PHONEMES = 20;
+
+        private readonly Lang lang;
+
+        private readonly NameType nameType;
+
+        private readonly RuleType ruleType;
+
+        private readonly bool concat;
+
+        private readonly int maxPhonemes;
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple 
encodings.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool 
concat)
+            : this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES)
+        {
+        }
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple 
encodings.</param>
+        /// <param name="maxPhonemes">The maximum number of phonemes that will 
be handled.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool 
concat,
+                              int maxPhonemes)
+        {
+            if (ruleType == RuleType.RULES)
+            {
+                throw new ArgumentException("ruleType must not be " + 
RuleType.RULES);
+            }
+            this.nameType = nameType;
+            this.ruleType = ruleType;
+            this.concat = concat;
+            this.lang = Lang.GetInstance(nameType);
+            this.maxPhonemes = maxPhonemes;
+        }
+
+        /// <summary>
+        /// Applies the final rules to convert from a language-specific 
phonetic representation to a
+        /// language-independent representation.
+        /// </summary>
+        /// <param name="phonemeBuilder">The current phonemes.</param>
+        /// <param name="finalRules">The final rules to apply.</param>
+        /// <returns>The resulting phonemes.</returns>
+        private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder,
+                                               IDictionary<string, 
IList<Rule>> finalRules)
+        {
+            if (finalRules == null)
+            {
+                throw new ArgumentNullException("finalRules can not be null");
+            }
+            if (finalRules.Count == 0)
+            {
+                return phonemeBuilder;
+            }
+
+            ISet<Phoneme> phonemes = new SortedSet<Phoneme>(Phoneme.COMPARER);
+
+            foreach (Phoneme phoneme in phonemeBuilder.Phonemes)
+            {
+                PhonemeBuilder subBuilder = 
PhonemeBuilder.Empty(phoneme.Languages);
+                string phonemeText = phoneme.GetPhonemeText();
+
+                for (int i = 0; i < phonemeText.Length;)
+                {
+                    RulesApplication rulesApplication =
+                            new RulesApplication(finalRules, phonemeText, 
subBuilder, i, maxPhonemes).Invoke();
+                    bool found = rulesApplication.IsFound;
+                    subBuilder = rulesApplication.PhonemeBuilder;
+
+                    if (!found)
+                    {
+                        // not found, appending as-is
+                        subBuilder.Append(phonemeText.Substring(i, 1));
+                    }
+
+                    i = rulesApplication.I;
+                }
+
+                phonemes.UnionWith(subBuilder.Phonemes);
+            }
+
+            return new PhonemeBuilder(phonemes.ToList());
+        }
+
+        /// <summary>
+        /// Encodes a string to its phonetic representation.
+        /// </summary>
+        /// <param name="input">The string to encode.</param>
+        /// <returns>The encoding of the input.</returns>
+        public virtual string Encode(string input)
+        {
+            LanguageSet languageSet = this.lang.GuessLanguages(input);
+            return Encode(input, languageSet);
+        }
+
+        /// <summary>
+        /// Encodes an input string into an output phonetic representation, 
given a set of possible origin languages.
+        /// </summary>
+        /// <param name="input">String to phoneticise; a string with dashes or 
spaces separating each word.</param>
+        /// <param name="languageSet"></param>
+        /// <returns>A phonetic representation of the input; a string 
containing '-'-separated phonetic representations of the input.</returns>
+        public virtual string Encode(string input, LanguageSet languageSet)
+        {
+            IDictionary<string, IList<Rule>> rules = 
Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet);
+            // rules common across many (all) languages
+            IDictionary<string, IList<Rule>> finalRules1 = 
Rule.GetInstanceMap(this.nameType, this.ruleType, "common");
+            // rules that apply to a specific language that may be ambiguous 
or wrong if applied to other languages
+            IDictionary<string, IList<Rule>> finalRules2 = 
Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet);
+
+            // tidy the input
+            // lower case is a locale-dependent operation
+            input = input.ToLowerInvariant().Replace('-', ' ').Trim();
+
+            if (this.nameType == NameType.GENERIC)
+            {
+                if (input.Length >= 2 && input.Substring(0, 2 - 
0).Equals("d'"))
+                { // check for d'
+                    string remainder = input.Substring(2);
+                    string combined = "d" + remainder;
+                    return "(" + Encode(remainder) + ")-(" + Encode(combined) 
+ ")";
+                }
+                foreach (string l in NAME_PREFIXES[this.nameType])
+                {
+                    // handle generic prefixes
+                    if (input.StartsWith(l + " ", StringComparison.Ordinal))
+                    {
+                        // check for any prefix in the words list
+                        string remainder = input.Substring(l.Length + 1); // 
input without the prefix
+                        string combined = l + remainder; // input with prefix 
without space
+                        return "(" + Encode(remainder) + ")-(" + 
Encode(combined) + ")";
+                    }
+                }
+            }
+
+            IList<string> words = WHITESPACE.Split(input).ToList();
+            IList<string> words2 = new List<string>();
+
+            // special-case handling of word prefixes based upon the name type
+            switch (this.nameType)
+            {
+                case NameType.SEPHARDIC:
+                    foreach (string aWord in words)
+                    {
+                        string[] parts = aWord.Split(new char[] { '\'' }, 
StringSplitOptions.RemoveEmptyEntries);
+                        string lastPart = parts[parts.Length - 1];
+                        words2.Add(lastPart);
+                    }
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.ASHKENAZI:
+                    words2.AddRange(words);
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.GENERIC:
+                    words2.AddRange(words);
+                    break;
+                default:
+                    throw new InvalidOperationException("Unreachable case: " + 
this.nameType);
+            }
+
+            if (this.concat)
+            {
+                // concat mode enabled
+                input = Join(words2, " ");
+            }
+            else if (words2.Count == 1)
+            {
+                // not a multi-word name
+                //input = words.iterator().next();
+                input = words.FirstOrDefault();
+            }
+            else
+            {
+                // encode each word in a multi-word name separately (normally 
used for approx matches)
+                StringBuilder result = new StringBuilder();
+                foreach (string word in words2)
+                {
+                    result.Append("-").Append(Encode(word));
+                }
+                // return the result without the leading "-"
+                return result.ToString(1, result.Length - 1);
+            }
+
+            PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet);
+
+            // loop over each char in the input - we will handle the increment 
manually
+            for (int i = 0; i < input.Length;)
+            {
+                RulesApplication rulesApplication =
+                        new RulesApplication(rules, input, phonemeBuilder, i, 
maxPhonemes).Invoke();
+                i = rulesApplication.I;
+                phonemeBuilder = rulesApplication.PhonemeBuilder;
+            }
+
+            // Apply the general rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1);
+            // Apply the language-specific rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2);
+
+            return phonemeBuilder.MakeString();
+        }
+
+        /// <summary>
+        /// Gets the Lang language guessing rules being used.
+        /// </summary>
+        public virtual Lang Lang
+        {
+            get { return this.lang; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.NameType"/> being used.
+        /// </summary>
+        public virtual NameType NameType
+        {
+            get { return this.nameType; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.RuleType"/> being used.
+        /// </summary>
+        public virtual RuleType RuleType
+        {
+            get { return this.ruleType; }
+        }
+
+        /// <summary>
+        /// Gets if multiple phonetic encodings are concatenated or if just 
the first one is kept.
+        /// Returns <c>true</c> if multiple phonetic encodings are returned, 
<c>false</c> if just the first is.
+        /// </summary>
+        public virtual bool IsConcat
+        {
+            get { return this.concat; }
+        }
+
+        /// <summary>
+        /// Gets the maximum number of phonemes the engine will calculate for 
a given input.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        public virtual int MaxPhonemes
+        {
+            get { return this.maxPhonemes; }
+        }
+    }
+}


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
new file mode 100644
index 0000000..c70d404
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
@@ -0,0 +1,37 @@
+ï»¿// commons-codec version compatibility level: 1.9
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Constants used to process resource files.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    internal class ResourceConstants
+    {
+        public static readonly string CMT = "//";
+        public static readonly Encoding ENCODING = Encoding.UTF8;
+        public static readonly string EXT_CMT_END = "*/";
+        public static readonly string EXT_CMT_START = "/*";
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
new file mode 100644
index 0000000..52f3d9a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
@@ -0,0 +1,1069 @@
+ï»¿// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A phoneme rule.
+    /// </summary>
+    /// <remarks>
+    /// Rules have a pattern, left context, right context, output phoneme, set 
of languages for which they apply
+    /// and a logical flag indicating if all languages must be in play. A rule 
matches if:
+    /// <list type="bullet">
+    ///     <item><description>the pattern matches at the current 
position</description></item>
+    ///     <item><description>the string up until the beginning of the 
pattern matches the left context</description></item>
+    ///     <item><description>the string from the end of the pattern matches 
the right context</description></item>
+    ///     <item><description>logical is ALL and all languages are in scope; 
or</description></item>
+    ///     <item><description>logical is any other value and at least one 
language is in scope</description></item>
+    /// </list>
+    /// <para/>
+    /// Rules are typically generated by parsing rules resources. In normal 
use, there will be no need for the user
+    /// to explicitly construct their own.
+    /// <para/>
+    /// Rules are immutable and thread-safe.
+    /// <para/>
+    /// <b>Rules resources</b>
+    /// <para/>
+    /// Rules are typically loaded from resource files. These are UTF-8 
encoded text files. They are systematically
+    /// named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see 
cref="NameType"/>_<see cref="RuleType"/>_[language].txt</c>
+    /// <para/>
+    /// The format of these resources is the following:
+    /// <list type="table">
+    ///     <item>
+    ///         <term>Rules:</term>
+    ///         <description>
+    ///             whitespace separated, double-quoted strings. There should 
be 4 columns to each row, and these
+    ///             will be interpreted as:
+    ///             <list type="number">
+    ///                 <item><description>pattern</description></item>
+    ///                 <item><description>left context</description></item>
+    ///                 <item><description>right context</description></item>
+    ///                 <item><description>phoneme</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text 
following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line 
commenting mode. This will skip all content until a line ending in '*' and '/' 
is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Rule
+    {
+        private static Regex PIPE = new Regex("[|]", RegexOptions.Compiled);
+        private static Regex WHITESPACE = new Regex("\\s+", 
RegexOptions.Compiled);
+        private static Regex PLUS = new Regex("[+]", RegexOptions.Compiled);
+
+        private class AllStringsRMatcher : IRPattern
+        {
+            public bool IsMatch(StringBuilder input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(string input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return true;
+            }
+        }
+
+        public static readonly IRPattern ALL_STRINGS_RMATCHER = new 
AllStringsRMatcher();
+
+
+        public static readonly string ALL = "ALL";
+
+        private static readonly string DOUBLE_QUOTE = "\"";
+
+        private static readonly string HASH_INCLUDE = "#include";
+
+        private static readonly IDictionary<NameType, IDictionary<RuleType, 
IDictionary<string, IDictionary<string, IList<Rule>>>>> RULES =
+                new Dictionary<NameType, IDictionary<RuleType, 
IDictionary<string, IDictionary<string, IList<Rule>>>>>();
+
+        static Rule()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                IDictionary<RuleType, IDictionary<string, IDictionary<string, 
IList<Rule>>>> rts =
+                        new Dictionary<RuleType, IDictionary<string, 
IDictionary<string, IList<Rule>>>>();
+
+                foreach (RuleType rt in Enum.GetValues(typeof(RuleType)))
+                {
+                    IDictionary<string, IDictionary<string, IList<Rule>>> rs = 
new Dictionary<string, IDictionary<string, IList<Rule>>>();
+
+                    Languages ls = Languages.GetInstance(s);
+                    foreach (string l in ls.GetLanguages())
+                    {
+                        try
+                        {
+                            rs[l] = ParseRules(CreateScanner(s, rt, l), 
CreateResourceName(s, rt, l));
+                        }
+                        catch (InvalidOperationException e)
+                        {
+                            throw new InvalidOperationException("Problem 
processing " + CreateResourceName(s, rt, l), e);
+                        }
+                    }
+                    if (!rt.Equals(RuleType.RULES))
+                    {
+                        rs["common"] = ParseRules(CreateScanner(s, rt, 
"common"), CreateResourceName(s, rt, "common"));
+                    }
+
+                    rts[rt] = Collections.UnmodifiableMap(rs);
+                }
+
+                RULES[s] = Collections.UnmodifiableMap(rts);
+            }
+        }
+
+        private static bool Contains(ICharSequence chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(string chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(StringBuilder chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        private static string CreateResourceName(NameType nameType, RuleType 
rt, string lang)
+        {
+            return string.Format("{0}_{1}_{2}.txt",
+                                 nameType.GetName(), rt.GetName(), lang);
+        }
+
+        private static TextReader CreateScanner(NameType nameType, RuleType 
rt, string lang)
+        {
+            string resName = CreateResourceName(nameType, rt, lang);
+            Stream rulesIS = 
typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages),
 resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + 
resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static TextReader CreateScanner(string lang)
+        {
+            string resName = string.Format("{0}.txt", lang); 
+            Stream rulesIS = 
typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages),
 resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + 
resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static bool EndsWith(ICharSequence input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, 
j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(string input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, 
j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(StringBuilder input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, 
j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to 
consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt,
+                                     LanguageSet langs)
+        {
+            IDictionary<string, IList<Rule>> ruleMap = 
GetInstanceMap(nameType, rt, langs);
+            IList<Rule> allRules = new List<Rule>();
+            foreach (IList<Rule> rules in ruleMap.Values)
+            {
+                allRules.AddRange(rules);
+            }
+            return allRules;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single 
language.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to 
consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt, 
string lang)
+        {
+            return GetInstance(nameType, rt, LanguageSet.From(new 
HashSet<string>() { lang }));
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to 
consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, 
grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType 
nameType, RuleType rt,
+                                                             LanguageSet langs)
+        {
+            return langs.IsSingleton ? GetInstanceMap(nameType, rt, 
langs.GetAny()) :
+                                         GetInstanceMap(nameType, rt, 
Languages.ANY);
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single 
language.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to 
consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, 
grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType 
nameType, RuleType rt,
+                                                             string lang)
+        {
+            IDictionary<RuleType, IDictionary<string, IDictionary<string, 
IList<Rule>>>> nameTypes;
+            IDictionary<string, IDictionary<string, IList<Rule>>> ruleTypes;
+            IDictionary<string, IList<Rule>> rules = null;
+
+            if (RULES.TryGetValue(nameType, out nameTypes) && nameTypes != 
null &&
+                nameTypes.TryGetValue(rt, out ruleTypes) && ruleTypes != null 
&&
+                ruleTypes.TryGetValue(lang, out rules) && rules != null)
+            {
+            }
+            else
+            {
+                throw new ArgumentException(string.Format("No rules found for 
{0}, {1}, {2}.",
+                                                   nameType.GetName(), 
rt.GetName(), lang));
+            }
+
+            return rules;
+        }
+
+        private static Phoneme ParsePhoneme(string ph)
+        {
+            int open = ph.IndexOf("[");
+            if (open >= 0)
+            {
+                if (!ph.EndsWith("]", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme expression contains a 
'[' but does not end in ']'");
+                }
+                string before = ph.Substring(0, open - 0);
+                string input = ph.Substring(open + 1, (ph.Length - 1) - (open 
+ 1));
+                ISet<string> langs = new HashSet<string>(PLUS.Split(input));
+
+                return new Phoneme(before, LanguageSet.From(langs));
+            }
+            else
+            {
+                return new Phoneme(ph, Languages.ANY_LANGUAGE);
+            }
+        }
+
+        private static IPhonemeExpr ParsePhonemeExpr(string ph)
+        {
+            if (ph.StartsWith("(", StringComparison.Ordinal))
+            { // we have a bracketed list of options
+                if (!ph.EndsWith(")", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme starts with '(' so 
must end with ')'");
+                }
+
+                IList<Phoneme> phs = new List<Phoneme>();
+                string body = ph.Substring(1, (ph.Length - 1) - 1);
+                foreach (string part in PIPE.Split(body))
+                {
+                    phs.Add(ParsePhoneme(part));
+                }
+                if (body.StartsWith("|", StringComparison.Ordinal) || 
body.EndsWith("|", StringComparison.Ordinal))
+                {
+                    phs.Add(new Phoneme("", Languages.ANY_LANGUAGE));
+                }
+
+                return new PhonemeList(phs);
+            }
+            else
+            {
+                return ParsePhoneme(ph);
+            }
+        }
+
+        private class RuleAnonymousHelper : Rule
+        {
+            private readonly int myLine;
+            private readonly string loc;
+
+            public RuleAnonymousHelper(string pat, string lCon, string rCon, 
IPhonemeExpr ph, int cLine, string location)
+                : base(pat, lCon, rCon, ph)
+            {
+                this.myLine = cLine;
+                this.loc = location;
+            }
+
+            public override string ToString()
+            {
+                StringBuilder sb = new StringBuilder();
+                sb.Append("Rule");
+                sb.Append("{line=").Append(myLine);
+                sb.Append(", loc='").Append(loc).Append('\'');
+                sb.Append('}');
+                return sb.ToString();
+            }
+        }
+
+        private static IDictionary<string, IList<Rule>> ParseRules(TextReader 
reader, string location)
+        {
+            IDictionary<string, IList<Rule>> lines = new HashMap<string, 
IList<Rule>>();
+            int currentLine = 0;
+
+            bool inMultilineComment = false;
+            string rawLine;
+            try
+            {
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    currentLine++;
+                    string line = rawLine;
+
+                    if (inMultilineComment)
+                    {
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END))
+                        {
+                            inMultilineComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START))
+                        {
+                            inMultilineComment = true;
+                        }
+                        else
+                        {
+                            // discard comments
+                            int cmtI = line.IndexOf(ResourceConstants.CMT);
+                            if (cmtI >= 0)
+                            {
+                                line = line.Substring(0, cmtI);
+                            }
+
+                            // trim leading-trailing whitespace
+                            line = line.Trim();
+
+                            if (line.Length == 0)
+                            {
+                                continue; // empty lines can be safely skipped
+                            }
+
+                            if (line.StartsWith(HASH_INCLUDE, 
StringComparison.Ordinal))
+                            {
+                                // include statement
+                                string incl = 
line.Substring(HASH_INCLUDE.Length).Trim();
+                                if (incl.Contains(" "))
+                                {
+                                    throw new ArgumentException("Malformed 
import statement '" + rawLine + "' in " +
+                                                                       
location);
+                                }
+                                else
+                                {
+                                    
lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl));
+                                }
+                            }
+                            else
+                            {
+                                // rule
+                                string[] parts = WHITESPACE.Split(line);
+                                if (parts.Length != 4)
+                                {
+                                    throw new ArgumentException("Malformed 
rule statement split into " + parts.Length +
+                                                                       " 
parts: " + rawLine + " in " + location);
+                                }
+                                else
+                                {
+                                    try
+                                    {
+                                        string pat = StripQuotes(parts[0]);
+                                        string lCon = StripQuotes(parts[1]);
+                                        string rCon = StripQuotes(parts[2]);
+                                        IPhonemeExpr ph = 
ParsePhonemeExpr(StripQuotes(parts[3]));
+                                        int cLine = currentLine;
+                                        Rule r = new RuleAnonymousHelper(pat, 
lCon, rCon, ph, cLine, location);
+
+                                        string patternKey = 
r.pattern.Substring(0, 1 - 0);
+                                        IList<Rule> rules;
+                                        if (!lines.TryGetValue(patternKey, out 
rules) || rules == null)
+                                        {
+                                            rules = new List<Rule>();
+                                            lines[patternKey] = rules;
+                                        }
+                                        rules.Add(r);
+                                    }
+                                    catch (ArgumentException e)
+                                    {
+                                        throw new 
InvalidOperationException("Problem parsing line '" + currentLine + "' in " +
+                                                                        
location, e);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+
+            return lines;
+        }
+
+        private class RPatternHelper : IRPattern
+        {
+            private readonly Func<StringBuilder, bool> isMatchSB;
+            private readonly Func<string, bool> isMatchStr;
+            private readonly Func<ICharSequence, bool> isMatchCS;
+
+            public RPatternHelper(Func<StringBuilder, bool> isMatchSB, 
Func<string, bool> isMatchStr, Func<ICharSequence, bool> isMatchCS)
+            {
+                this.isMatchSB = isMatchSB;
+                this.isMatchStr = isMatchStr;
+                this.isMatchCS = isMatchCS;
+            }
+
+            public bool IsMatch(StringBuilder input)
+            {
+                return isMatchSB(input);
+            }
+
+            public bool IsMatch(string input)
+            {
+                return isMatchStr(input);
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return isMatchCS(input);
+            }
+        }
+
+        /// <summary>
+        /// Attempts to compile the regex into direct string ops, falling back 
to <see cref="Regex"/> and <see cref="Match"/> in the worst case.
+        /// </summary>
+        /// <param name="regex">The regular expression to compile.</param>
+        /// <returns>An RPattern that will match this regex.</returns>
+        private static IRPattern GetPattern(string regex)
+        {
+            bool startsWith = regex.StartsWith("^");
+            bool endsWith = regex.EndsWith("$");
+            string content = regex.Substring(startsWith ? 1 : 0, (endsWith ? 
regex.Length - 1 : regex.Length) - (startsWith ? 1 : 0));
+            bool boxes = content.Contains("[");
+
+            if (!boxes)
+            {
+                if (startsWith && endsWith)
+                {
+                    // exact match
+                    if (content.Length == 0)
+                    {
+                        // empty
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Length == 0;
+                        });
+                    }
+                    else
+                    {
+
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Equals(content);
+                        });
+                    }
+                }
+                else if ((startsWith || endsWith) && content.Length == 0)
+                {
+                    // matches every string
+                    return ALL_STRINGS_RMATCHER;
+                }
+                else if (startsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    });
+
+                }
+                else if (endsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    });
+                }
+            }
+            else
+            {
+                bool startsWithBox = content.StartsWith("[", 
StringComparison.Ordinal);
+                bool endsWithBox = content.EndsWith("]", 
StringComparison.Ordinal);
+
+                if (startsWithBox && endsWithBox)
+                {
+                    string boxContent = content.Substring(1, (content.Length - 
1) - 1);
+                    if (!boxContent.Contains("["))
+                    {
+                        // box containing alternatives
+                        bool negate = boxContent.StartsWith("^", 
StringComparison.Ordinal);
+                        if (negate)
+                        {
+                            boxContent = boxContent.Substring(1);
+                        }
+                        string bContent = boxContent;
+                        bool shouldMatch = !negate;
+
+                        if (startsWith && endsWith)
+                        {
+                            // exact match
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, 
input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, 
input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, 
input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (startsWith)
+                        {
+                            // first char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (endsWith)
+                        {
+                            // last char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[input.Length - 1]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[input.Length - 1]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, 
input[input.Length - 1]) == shouldMatch;
+                            });
+                        }
+                    }
+                }
+            }
+            Regex pattern = new Regex(regex, RegexOptions.Compiled);
+
+            return new RPatternHelper(isMatchSB: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            }, isMatchStr: (input) =>
+            {
+                Match matcher = pattern.Match(input);
+                return matcher.Success;
+            }, isMatchCS: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            });
+        }
+
+        private static bool StartsWith(ICharSequence input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(string input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(StringBuilder input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static string StripQuotes(string str)
+        {
+            if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(1);
+            }
+
+            if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(0, str.Length - 1);
+            }
+
+            return str;
+        }
+
+        private readonly IRPattern lContext;
+
+        private readonly string pattern;
+
+        private readonly IPhonemeExpr phoneme;
+
+        private readonly IRPattern rContext;
+
+        /// <summary>
+        /// Creates a new rule.
+        /// </summary>
+        /// <param name="pattern">The pattern.</param>
+        /// <param name="lContext">The left context.</param>
+        /// <param name="rContext">The right context.</param>
+        /// <param name="phoneme">The resulting phoneme.</param>
+        public Rule(string pattern, string lContext, string rContext, 
IPhonemeExpr phoneme)
+        {
+            this.pattern = pattern;
+            this.lContext = GetPattern(lContext + "$");
+            this.rContext = GetPattern("^" + rContext);
+            this.phoneme = phoneme;
+        }
+
+        /// <summary>
+        /// Gets the left context pattern. This is a regular expression that 
must match to the left of the pattern.
+        /// </summary>
+        public virtual IRPattern LContext
+        {
+            get { return this.lContext; }
+        }
+
+        /// <summary>
+        /// Gets the pattern. This is a string-literal that must exactly match.
+        /// </summary>
+        public virtual string Pattern
+        {
+            get { return this.pattern; }
+        }
+
+        /// <summary>
+        /// Gets the phoneme. If the rule matches, this is the phoneme 
associated with the pattern match.
+        /// </summary>
+        public virtual IPhonemeExpr Phoneme
+        {
+            get { return this.phoneme; }
+        }
+
+        /// <summary>
+        /// Gets the right context pattern. This is a regular expression that 
must match to the right of the pattern.
+        /// </summary>
+        public virtual IRPattern RContext
+        {
+            get { return this.rContext; }
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a 
position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to 
<paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see 
cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="ICharSequence"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, 
<c>false</c> otherwise.</returns>
+        public virtual bool PatternAndContextMatches(ICharSequence input, int 
i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern 
at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.SubSequence(i, ipl).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.SubSequence(ipl, 
input.Length)))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.SubSequence(0, i));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a 
position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to 
<paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see 
cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="string"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, 
<c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(string input, int i) 
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern 
at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.Substring(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.Substring(ipl, (input.Length 
- ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.Substring(0, (i - 0)));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a 
position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to 
<paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see 
cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="StringBuilder"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, 
<c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(StringBuilder input, int 
i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern 
at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.ToString(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.ToString(ipl, (input.Length 
- ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.ToString(0, (i - 0)));
+        }
+
+    }
+
+    public sealed class Phoneme : IPhonemeExpr
+    {
+        private class PhonemeComparer : IComparer<Phoneme>
+        {
+            public int Compare(Phoneme o1, Phoneme o2)
+            {
+                for (int i = 0; i < o1.phonemeText.Length; i++)
+                {
+                    if (i >= o2.phonemeText.Length)
+                    {
+                        return +1;
+                    }
+                    int c = o1.phonemeText[i] - o2.phonemeText[i];
+                    if (c != 0)
+                    {
+                        return c;
+                    }
+                }
+
+                if (o1.phonemeText.Length < o2.phonemeText.Length)
+                {
+                    return -1;
+                }
+
+                return 0;
+            }
+        }
+
+        public static readonly IComparer<Phoneme> COMPARER = new 
PhonemeComparer();
+        private readonly StringBuilder phonemeText;
+        private readonly LanguageSet languages;
+
+        public Phoneme(string phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText);
+            this.languages = languages;
+        }
+
+        public Phoneme(StringBuilder phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(ICharSequence phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight)
+            : this(phonemeLeft.phonemeText, phonemeLeft.languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight, LanguageSet 
languages)
+            : this(phonemeLeft.phonemeText, languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme Append(string str)
+        {
+            this.phonemeText.Append(str);
+            return this;
+        }
+
+        public LanguageSet Languages
+        {
+            get { return this.languages; }
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return new Phoneme[] { this }; }
+        }
+
+        public string GetPhonemeText()
+        {
+            return this.phonemeText.ToString();
+        }
+
+        [Obsolete("since 1.9")]
+        public Phoneme Join(Phoneme right)
+        {
+            return new Phoneme(this.phonemeText.ToString() + 
right.phonemeText.ToString(),
+                               this.languages.RestrictTo(right.Languages));
+        }
+    }
+
+    public interface IPhonemeExpr
+    {
+        IList<Phoneme> Phonemes { get; }
+    }
+
+    public sealed class PhonemeList : IPhonemeExpr
+    {
+        private readonly IList<Phoneme> phonemes;
+
+        public PhonemeList(IList<Phoneme> phonemes)
+        {
+            this.phonemes = phonemes;
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return this.phonemes; }
+        }
+    }
+
+    /// <summary>
+    /// A minimal wrapper around the functionality of <see cref="Rule"/> 
Pattern that we use, to allow for alternate implementations.
+    /// </summary>
+    public interface IRPattern
+    {
+        bool IsMatch(ICharSequence input);
+        bool IsMatch(string input);
+        bool IsMatch(StringBuilder input);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
new file mode 100644
index 0000000..ff3af97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
@@ -0,0 +1,68 @@
+ï»¿// commons-codec version compatibility level: 1.9
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Types of rule.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public enum RuleType
+    {
+        /// <summary>
+        /// Approximate rules, which will lead to the largest number of 
phonetic interpretations.
+        /// </summary>
+        APPROX,
+
+        /// <summary>
+        /// Exact rules, which will lead to a minimum number of phonetic 
interpretations.
+        /// </summary>
+        EXACT,
+
+        /// <summary>
+        /// For internal use only. Please use <see cref="APPROX"/> or <see 
cref="EXACT"/>.
+        /// </summary>
+        RULES
+    }
+
+    public static class RuleTypeExtensions
+    {
+        /// <summary>
+        /// Gets the rule name.
+        /// </summary>
+        /// <param name="ruleType">The <see cref="RuleType"/>.</param>
+        /// <returns>The rule name.</returns>
+        public static string GetName(this RuleType ruleType)
+        {
+            switch (ruleType)
+            {
+                case RuleType.APPROX:
+                    return "approx";
+                case RuleType.EXACT:
+                    return "exact";
+                case RuleType.RULES:
+                    return "rules";
+            }
+
+            throw new ArgumentException("Invalid ruleType");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
new file mode 100644
index 0000000..3f4f4c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSTONANTS
+"ph"    ""  ""  "f" // foreign
+"sh"    ""  ""  "S" // foreign
+"kh"    ""  ""  "x" // foreign
+
+"gli"   ""  ""  "(gli|l[italian])"
+"gni"   ""  ""  "(gni|ni[italian+french])"
+"gn"    ""  "[aeou]"    "(n[italian+french]|nj[italian+french]|gn)
+"gh"    ""  ""  "g" // It + translit. from Arabic
+"dh"    ""  ""  "d" // translit. from Arabic
+"bh"    ""  ""  "d" // translit. from Arabic
+"th"    ""  ""  "t" // translit. from Arabic
+"lh"    ""  ""  "l" // Port
+"nh"    ""  ""  "nj" // Port
+
+"ig"    "[aeiou]"   ""  "(ig|tS[spanish])"
+"ix"    "[aeiou]"   ""  "S" // Sp
+"tx"    ""  ""  "tS" // Sp
+"tj"    ""  "$"  "tS" // Sp
+"tj"    ""  ""  "dZ" // Sp
+"tg"    ""  ""  "(tg|dZ[spanish])"
+
+"gi"    ""  "[aeou]"    "dZ" // Italian
+"g" ""  "y" "Z" // French
+"gg"    ""  "[ei]"  "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+"g" ""  "[ei]"  "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+
+"guy"   ""  ""  "gi"
+"gue"   ""  "$" "(k[french]|ge)"
+"gu"    ""  "[ei]"  "(g|gv") // not It
+"gu"    ""  "[ao]"  "gv" // not It
+
+"Ã±" ""  ""  "(n|nj)"
+"ny"    ""  ""  "nj"
+
+"sc"    ""  "[ei]"  "(s|S[italian])"
+"sÃ§"    ""  "[aeiou]"   "s" // not It
+"ss"    ""  ""  "s"
+"Ã§" ""  ""  "s"   // not It
+
+"ch"    ""  "[ei]"  "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])"
+"ch"    ""  ""  "(S|tS[spanish]|dZ[spanish])"
+
+"ci"    ""  "[aeou]"    "(tS[italian]|si)"
+"cc"   ""      "[eiyÃ©Ã¨Ãª]"   "(tS[italian]|ks[portuguese+french+spanish])"
+"c"    ""      "[eiyÃ©Ã¨Ãª]"   "(tS[italian]|s[portuguese+french+spanish])"
+   //array("c" ""      "[aou]" "(k|C[".($portuguese+$spanish)."])" // "C" 
means that the actual letter could be "Ã§" (cedille omitted)
+
+"s"    "^"     ""      "s"
+"s"    "[aÃ¡uiÃoÃ³eÃ©Ãªy]"    "[aÃ¡uiÃoÃ³eÃ©Ãªy]"    
"(s[spanish]|z[portuguese+french+italian])"
+"s"    ""      "[dglmnrv]"     "(z|Z[portuguese])"
+
+"z"    ""      "$"     "(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s 
in Sp, z Fr
+"z"    ""      "[bdgv]"        "(z|dz[italian]|Z[portuguese])" // dz It, Z/z 
Port, z Sp & Fr
+"z"    ""      "[ptckf]"       "(s|ts[italian]|S[portuguese])" // ts It, s/S/z 
Port, z/s Sp
+"z"    ""      ""      "(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z 
Port & Fr, z/s Sp
+
+"que"  ""      "$"     "(k[french]|ke)"
+"qu"   ""      "[eiu]" "k"
+"qu"   ""      "[ao]"  "(kv|k)" // k is It
+
+"ex"   ""      "[aÃ¡uiÃoÃ³eÃ©Ãªy]"    
"(ez[portuguese]|eS[portuguese]|eks|egz)"
+"ex"   ""      "[cs]"  "(e[portuguese]|ek)"
+
+"m"    ""      "[cdglnrst]"    "(m|n[portuguese])"
+"m"    ""      "[bfpv]"        "(m|n[portuguese+spanish])"
+"m"    ""      "$"     "(m|n[portuguese])"
+
+"b"    "^"     ""      "(b|V[spanish])"
+"v"    "^"     ""      "(v|B[spanish])"
+
+ // VOWELS
+"eau"  ""      ""      "o" // Fr
+
+"ouh"  ""      "[aioe]"        "(v[french]|uh)"
+"uh"   ""      "[aioe]"        "(v|uh)"
+"ou"   ""      "[aioe]"        "v" // french
+"uo"   ""      ""      "(vo|o)"
+"u"    ""      "[aie]" "v"
+
+"i"    "[aÃ¡uoÃ³eÃ©Ãª]"        ""      "j"
+"i"    ""      "[aeou]"        "j"
+"y"    "[aÃ¡uiÃoÃ³eÃ©Ãª]"     ""      "j"
+"y"    ""      "[aeiÃou]"     "j"
+"e"    ""      "$"     "(e|E[$french])"
+
+"Ã£o"  ""      ""      "(au|an)" // Port
+"Ã£e"  ""      ""      "(aj|an)" // Port
+"Ã£i"  ""      ""      "(aj|an)" // Port
+"Ãµe"  ""      ""      "(oj|on)" // Port
+"oÃ¹"  ""      ""      "u" // Fr
+"ou"   ""      ""      "(ou|u[french])"
+
+"Ã¢"   ""      ""      "a" // Port & Fr
+"Ã "   ""      ""      "a" // Port
+"Ã¡"   ""      ""      "a" // Port & Sp
+"Ã£"   ""      ""      "(a|an)" // Port
+"Ã©"   ""      ""      "e"
+"Ãª"   ""      ""      "e" // Port & Fr
+"Ã¨"   ""      ""      "e" // Sp & Fr & It
+"Ã"   ""      ""      "i" // Port & Sp
+"Ã®"   ""      ""      "i" // Fr
+"Ã´"   ""      ""      "o" // Port & Fr
+"Ã³"   ""      ""      "o" // Port & Sp & It
+"Ãµ"   ""      ""      "(o|on)" // Port
+"Ã²"   ""      ""      "o"  // Sp & It
+"Ãº"   ""      ""      "u" // Port & Sp
+"Ã¼"   ""      ""      "u" // Port & Sp
+
+ // LATIN ALPHABET
+"a"    ""      ""      "a"
+"b"    ""      ""      "(b|v[spanish])"
+"c"    ""      ""      "k"
+"d"    ""      ""      "d"
+"e"    ""      ""      "e"
+"f"    ""      ""      "f"
+"g"    ""      ""      "g"
+"h"    ""      ""      "h"
+"i"    ""      ""      "i"
+"j"    ""      ""      "(x[spanish]|Z)" // not It
+"k"    ""      ""      "k"
+"l"    ""      ""      "l"
+"m"    ""      ""      "m"
+"n"    ""      ""      "n"
+"o"    ""      ""      "o"
+"p"    ""      ""      "p"
+"q"    ""      ""      "k"
+"r"    ""      ""      "r"
+"s"    ""      ""      "(s|S[portuguese])"
+"t"    ""      ""      "t"
+"u"    ""      ""      "u"
+"v"    ""      ""      "(v|b[spanish])"
+"w"    ""      ""      "v"    // foreign
+"x"    ""      ""      "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz 
Sp, It only ks
+"y"    ""      ""      "i"
+"z"    ""      ""      "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
new file mode 100644
index 0000000..e95a756
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_approx_common
+
+// REGRESSIVE ASSIMILATION OF CONSONANTS
+"n"    ""      "[bp]"  "m" 
+
+// PECULIARITY OF "h" 
+"h"    ""      ""      "" 
+"H"    ""      ""      "(x|)" 
+
+// POLISH OGONEK IMPOSSIBLE
+"F" "" "[bdgkpstvzZ]h" "e"
+"F" "" "[bdgkpstvzZ]x" "e"
+"B" "" "[bdgkpstvzZ]h" "a"
+"B" "" "[bdgkpstvzZ]x" "a"
+
+// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; 
Frankel = Frankl, Finkelstein = Finklstein
+"e" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+
+"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+
+"lEs"  ""      ""      "(lEs|lz)"  // Applebaum < Appelbaum (English + blend 
English-something forms as Finklestein)
+"lE"   "[bdfgkmnprStvzZ]"      ""      "(lE|l)"  // Applebaum < Appelbaum 
(English + blend English-something forms as Finklestein)
+
+// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D"
+"aue"  ""      ""      "D"
+"oue"  ""      ""      "D"
+    
+"AvE"  ""      ""      "(D|AvE)"
+"Ave"  ""      ""      "(D|Ave)"
+"avE"  ""      ""      "(D|avE)"
+"ave"  ""      ""      "(D|ave)"
+    
+"OvE"  ""      ""      "(D|OvE)"
+"Ove"  ""      ""      "(D|Ove)"
+"ovE"  ""      ""      "(D|ovE)"
+"ove"  ""      ""      "(D|ove)"
+    
+"ea"   ""      ""      "(D|ea)"
+"EA"   ""      ""      "(D|EA)"
+"Ea"   ""      ""      "(D|Ea)"
+"eA"   ""      ""      "(D|eA)"
+             
+"aji"  ""      ""      "D"
+"ajI"  ""      ""      "D"
+"aje"  ""      ""      "D"
+"ajE"  ""      ""      "D"
+    
+"Aji"  ""      ""      "D"
+"AjI"  ""      ""      "D"
+"Aje"  ""      ""      "D"
+"AjE"  ""      ""      "D"
+    
+"oji"  ""      ""      "D"
+"ojI"  ""      ""      "D"
+"oje"  ""      ""      "D"
+"ojE"  ""      ""      "D"
+    
+"Oji"  ""      ""      "D"
+"OjI"  ""      ""      "D"
+"Oje"  ""      ""      "D"
+"OjE"  ""      ""      "D"
+    
+"eji"  ""      ""      "D"
+"ejI"  ""      ""      "D"
+"eje"  ""      ""      "D"
+"ejE"  ""      ""      "D"
+    
+"Eji"  ""      ""      "D"
+"EjI"  ""      ""      "D"
+"Eje"  ""      ""      "D"
+"EjE"  ""      ""      "D"
+    
+"uji"  ""      ""      "D"
+"ujI"  ""      ""      "D"
+"uje"  ""      ""      "D"
+"ujE"  ""      ""      "D"
+    
+"Uji"  ""      ""      "D"
+"UjI"  ""      ""      "D"
+"Uje"  ""      ""      "D"
+"UjE"  ""      ""      "D"
+        
+"iji"  ""      ""      "D"
+"ijI"  ""      ""      "D"
+"ije"  ""      ""      "D"
+"ijE"  ""      ""      "D"
+    
+"Iji"  ""      ""      "D"
+"IjI"  ""      ""      "D"
+"Ije"  ""      ""      "D"
+"IjE"  ""      ""      "D"
+                         
+"aja"  ""      ""      "D"
+"ajA"  ""      ""      "D"
+"ajo"  ""      ""      "D"
+"ajO"  ""      ""      "D"
+"aju"  ""      ""      "D"
+"ajU"  ""      ""      "D"
+    
+"Aja"  ""      ""      "D"
+"AjA"  ""      ""      "D"
+"Ajo"  ""      ""      "D"
+"AjO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"oja"  ""      ""      "D"
+"ojA"  ""      ""      "D"
+"ojo"  ""      ""      "D"
+"ojO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"Oja"  ""      ""      "D"
+"OjA"  ""      ""      "D"
+"Ojo"  ""      ""      "D"
+"OjO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"eja"  ""      ""      "D"
+"ejA"  ""      ""      "D"
+"ejo"  ""      ""      "D"
+"ejO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"Eja"  ""      ""      "D"
+"EjA"  ""      ""      "D"
+"Ejo"  ""      ""      "D"
+"EjO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"uja"  ""      ""      "D"
+"ujA"  ""      ""      "D"
+"ujo"  ""      ""      "D"
+"ujO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+        
+"Uja"  ""      ""      "D"
+"UjA"  ""      ""      "D"
+"Ujo"  ""      ""      "D"
+"UjO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+        
+"ija"  ""      ""      "D"
+"ijA"  ""      ""      "D"
+"ijo"  ""      ""      "D"
+"ijO"  ""      ""      "D"
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+    
+"Ija"  ""      ""      "D"
+"IjA"  ""      ""      "D"
+"Ijo"  ""      ""      "D"
+"IjO"  ""      ""      "D"                         
+"Aju"  ""      ""      "D"
+"AjU"  ""      ""      "D"
+                         
+"j"    ""      ""      "i"                         
+                         
+// lander = lender = lÃ¤nder 
+"lYndEr"       ""      "$"     "lYnder" 
+"lander"       ""      "$"     "lYnder" 
+"lAndEr"       ""      "$"     "lYnder" 
+"lAnder"       ""      "$"     "lYnder" 
+"landEr"       ""      "$"     "lYnder" 
+"lender"       ""      "$"     "lYnder" 
+"lEndEr"       ""      "$"     "lYnder" 
+"lendEr"       ""      "$"     "lYnder" 
+"lEnder"       ""      "$"     "lYnder" 
+             
+// CONSONANTS {z & Z; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z"
+    
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+    
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
new file mode 100644
index 0000000..4210173
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
new file mode 100644
index 0000000..84d8174
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five"
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+    
+"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)"  // Applebaum < Appelbaum
+         
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+        
+"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> 
shejnberg
+"e" "D[^aeiEIou]" "" "(i|)" 
+
+"e" "" "" "i"
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiEuQY]" "" "i"
+"E" "" "[aoQY]" "i"
+"E" "" "" "(Y|i)"
+      
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt 
b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
new file mode 100644
index 0000000..fa8ee99
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+      
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[aoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)"
\ No newline at end of file

[13/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Reply via email to