http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt new file mode 100644 index 0000000..78cc0f8 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"I" "" "$" "i" +"I" "[aeiAEIOUouQY]" "" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "" "" "(Q|i)" + +"AU" "" "" "(D|a|u)" +"aU" "" "" "(D|a|u)" +"Au" "" "" "(D|a|u)" +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"OU" "" "" "(D|o|u)" +"oU" "" "" "(D|o|u)" +"Ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"Ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"Oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" +"Ui" "" "" "(D|u|i)" + +"e" "" "" "i" + +"E" "" "[fklmnprst]$" "i" +"E" "" "ts$" "i" +"E" "" "$" "i" +"E" "[DaoAOUiuQY]" "" "i" +"E" "" "[aoAOQY]" "i" +"E" "" "" "(Y|i)" + +"O" "" "$" "o" +"O" "" "[fklmnprst]$" "o" +"O" "" "ts$" "o" +"O" "[aoAOUeiuQY]" "" "o" +"O" "" "" "(o|Y)" + +"a" "" "" "(a|o)" + +"A" "" "$" "(a|o)" +"A" "" "[fklmnprst]$" "(a|o)" +"A" "" "ts$" "(a|o)" +"A" "[aoeOUiuQY]" "" "(a|o)" +"A" "" "" "(a|o|Y)" + +"U" "" "$" "u" +"U" "[DaoiuUQY]" "" "u" +"U" "" "[^k]$" "u" +"Uk" "[lr]" "$" "(uk|Qk)" +"Uk" "" "$" "uk" +"sUts" "" "$" "(suts|sQts)" +"Uts" "" "$" "uts" +"U" "" "" "(u|Q)"
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt new file mode 100644 index 0000000..bb950fb --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt new file mode 100644 index 0000000..7f49817 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"aiB" "" "[bp]" "(D|Dm)" +"oiB" "" "[bp]" "(D|Dm)" +"uiB" "" "[bp]" "(D|Dm)" +"eiB" "" "[bp]" "(D|Dm)" +"EiB" "" "[bp]" "(D|Dm)" +"iiB" "" "[bp]" "(D|Dm)" +"IiB" "" "[bp]" "(D|Dm)" + +"aiB" "" "[dgkstvz]" "(D|Dn)" +"oiB" "" "[dgkstvz]" "(D|Dn)" +"uiB" "" "[dgkstvz]" "(D|Dn)" +"eiB" "" "[dgkstvz]" "(D|Dn)" +"EiB" "" "[dgkstvz]" "(D|Dn)" +"iiB" "" "[dgkstvz]" "(D|Dn)" +"IiB" "" "[dgkstvz]" "(D|Dn)" + +"B" "" "[bp]" "(o|om|im)" +"B" "" "[dgkstvz]" "(o|on|in)" +"B" "" "" "o" + +"aiF" "" "[bp]" "(D|Dm)" +"oiF" "" "[bp]" "(D|Dm)" +"uiF" "" "[bp]" "(D|Dm)" +"eiF" "" "[bp]" "(D|Dm)" +"EiF" "" "[bp]" "(D|Dm)" +"iiF" "" "[bp]" "(D|Dm)" +"IiF" "" "[bp]" "(D|Dm)" + +"aiF" "" "[dgkstvz]" "(D|Dn)" +"oiF" "" "[dgkstvz]" "(D|Dn)" +"uiF" "" "[dgkstvz]" "(D|Dn)" +"eiF" "" "[dgkstvz]" "(D|Dn)" +"EiF" "" "[dgkstvz]" "(D|Dn)" +"iiF" "" "[dgkstvz]" "(D|Dn)" +"IiF" "" "[dgkstvz]" "(D|Dn)" + +"F" "" "[bp]" "(i|im|om)" +"F" "" "[dgkstvz]" "(i|in|on)" +"F" "" "" "i" + +"P" "" "" "(o|u)" + +"I" "" "$" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "[aeiAEBFIou]" "" "i" +"I" "" "" "(i|Q)" + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"a" "" "" "(a|o)" +"e" "" "" "i" + +"E" "" "[fklmnprst]$" "i" +"E" "" "ts$" "i" +"E" "" "$" "i" +"E" "[DaoiuQ]" "" "i" +"E" "" "[aoQ]" "i" +"E" "" "" "(Y|i)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt new file mode 100644 index 0000000..295debf --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_approx_polish \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt new file mode 100644 index 0000000..46d6a8c --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"I" "" "$" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "[aeiEIou]" "" "i" +"I" "" "" "(i|Q)" + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"om" "" "[bp]" "(om|im)" +"on" "" "[dgkstvz]" "(on|in)" +"em" "" "[bp]" "(im|om)" +"en" "" "[dgkstvz]" "(in|on)" +"Em" "" "[bp]" "(im|Ym|om)" +"En" "" "[dgkstvz]" "(in|Yn|on)" + +"a" "" "" "(a|o)" +"e" "" "" "i" + +"E" "" "[fklmnprsStv]$" "i" +"E" "" "ts$" "i" +"E" "[DaoiuQ]" "" "i" +"E" "" "[aoQ]" "i" +"E" "" "" "(Y|i)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt new file mode 100644 index 0000000..bb950fb --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt new file mode 100644 index 0000000..e6abc2d --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// These rules are applied after the word has been transliterated into the phonetic alphabet +// These rules are substitution rules within the phonetic character space rather than mapping rules + +// format of each entry rule in the table +// (pattern, left context, right context, phonetic) +// where +// pattern is a sequence of characters that might appear after a word has been transliterated into phonetic alphabet +// left context is the context that precedes the pattern +// right context is the context that follows the pattern +// phonetic is the result that this rule generates +// +// note that both left context and right context can be regular expressions +// ex: left context of ^ would mean start of word +// right context of $ means end of word +// +// match occurs if all of the following are true: +// portion of word matches the pattern +// that portion satisfies the context + +// A, E, I, O, P, U should create variants, but a, e, i, o, u should not create any new variant +// Q = ü ; Y = ä = ö + + +"A" "" "" "a" +"B" "" "" "a" + +"E" "" "" "e" +"F" "" "" "e" + +"I" "" "" "i" +"O" "" "" "o" +"P" "" "" "o" +"U" "" "" "u" + +"J" "" "" "l" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt new file mode 100644 index 0000000..0a8d121 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic + +"h" "" "$" "" +// VOICED - UNVOICED CONSONANTS +"b" "" "[fktSs]" "p" +"b" "" "p" "" +"b" "" "$" "p" +"p" "" "[gdZz]" "b" +"p" "" "b" "" + +"v" "" "[pktSs]" "f" +"v" "" "f" "" +"v" "" "$" "f" +"f" "" "[bgdZz]" "v" +"f" "" "v" "" + +"g" "" "[pftSs]" "k" +"g" "" "k" "" +"g" "" "$" "k" +"k" "" "[bdZz]" "g" +"k" "" "g" "" + +"d" "" "[pfkSs]" "t" +"d" "" "t" "" +"d" "" "$" "t" +"t" "" "[bgZz]" "d" +"t" "" "d" "" + +"s" "" "dZ" "" +"s" "" "tS" "" + +"z" "" "[pfkSt]" "s" +"z" "" "[sSzZ]" "" +"s" "" "[sSzZ]" "" +"Z" "" "[sSzZ]" "" +"S" "" "[sSzZ]" "" + +// SIMPLIFICATION OF CONSONANT CLUSTERS + +"jnm" "" "" "jm" + +// DOUBLE --> SINGLE + +"ji" "^" "" "i" +"jI" "^" "" "I" + +"a" "" "[aAB]" "" +"a" "[AB]" "" "" +"A" "" "A" "" +"B" "" "B" "" + +"b" "" "b" "" +"d" "" "d" "" +"f" "" "f" "" +"g" "" "g" "" +"k" "" "k" "" +"l" "" "l" "" +"m" "" "m" "" +"n" "" "n" "" +"p" "" "p" "" +"r" "" "r" "" +"t" "" "t" "" +"v" "" "v" "" +"z" "" "z" "" + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt new file mode 100644 index 0000000..7e6ff95 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_approx_common + +"H" "" "" "h" + +// VOICED - UNVOICED CONSONANTS + +"s" "[^t]" "[bgZd]" "z" +"Z" "" "[pfkst]" "S" +"Z" "" "$" "S" +"S" "" "[bgzd]" "Z" +"z" "" "$" "s" + +"ji" "[aAoOeEiIuU]" "" "j" +"jI" "[aAoOeEiIuU]" "" "j" +"je" "[aAoOeEiIuU]" "" "j" +"jE" "[aAoOeEiIuU]" "" "j" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt new file mode 100644 index 0000000..a60f8cc --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_any \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt new file mode 100644 index 0000000..ba32ce7 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"B" "" "" "a" +"F" "" "" "e" +"P" "" "" "o" + +"E" "" "" "e" +"I" "" "" "i" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt new file mode 100644 index 0000000..fc9f14d --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"E" "" "" "e" +"I "" "" "i" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt new file mode 100644 index 0000000..d309ead --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt new file mode 100644 index 0000000..b1c6501 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include ash_exact_approx_common + +"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc] +"tS" "" "" "C" // same reason +"S" "" "" "s" +"p" "" "" "f" +"b" "^" "" "b" +"b" "" "" "(b|v)" + +"ja" "" "" "i" +"jA" "" "" "i" +"jB" "" "" "i" +"je" "" "" "i" +"jE" "" "" "i" +"jF" "" "" "i" +"aj" "" "" "i" +"Aj" "" "" "i" +"Bj" "" "" "i" +"Fj" "" "" "i" +"I" "" "" "i" +"Q" "" "" "i" +"j" "" "" "i" + +"a" "^" "" "1" +"A" "^" "" "1" +"B" "^" "" "1" +"e" "^" "" "1" +"E" "^" "" "1" +"F" "^" "" "1" +"Y" "^" "" "1" + +"a" "" "$" "1" +"A" "" "$" "1" +"B" "" "$" "1" +"e" "" "$" "1" +"E" "" "$" "1" +"F" "" "$" "1" +"Y" "" "$" "1" + +"a" "" "" "" +"A" "" "" "" +"B" "" "" "" +"e" "" "" "" +"E" "" "" "" +"F" "" "" "" +"Y" "" "" "" + +"oj" "^" "" "(u|vi)" +"Oj" "^" "" "(u|vi)" +"uj" "^" "" "(u|vi)" +"Uj" "^" "" "(u|vi)" + +"oj" "" "" "u" +"Oj" "" "" "u" +"uj" "" "" "u" +"Uj" "" "" "u" + +"ou" "^" "" "(u|v|1)" +"o" "^" "" "(u|v|1)" +"O" "^" "" "(u|v|1)" +"U" "^" "" "(u|v|1)" +"u" "^" "" "(u|v|1)" + +"o" "" "$" "(u|1)" +"O" "" "$" "(u|1)" +"u" "" "$" "(u|1)" +"U" "" "$" "(u|1)" + +"ou" "" "" "u" +"o" "" "" "u" +"O" "" "" "u" +"U" "" "" "u" + +"VV" "" "" "u" // alef/ayin + vov from ruleshebrew +"V" "" "" "v" // tsvey-vov from ruleshebrew;; only Ashkenazic +"L" "^" "" "1" // alef/ayin from ruleshebrew +"L" "" "$" "1" // alef/ayin from ruleshebrew +"L" "" "" " " // alef/ayin from ruleshebrew +"WW" "^" "" "(vi|u)" // vav-yod from ruleshebrew +"WW" "" "" "u" // vav-yod from ruleshebrew +"W" "^" "" "(u|v)" // vav from ruleshebrew +"W" "" "" "u" // vav from ruleshebrew + + //"g" "" "" "(g|Z)" + //"z" "" "" "(z|Z)" + //"d" "" "" "(d|dZ)" + +"TB" "" "$" "(t|s)" // tav from ruleshebrew; only Ashkenazic +"TB" "" "" "t" // tav from ruleshebrew; only Ashkenazic +"T" "" "" "t" // tet from ruleshebrew + + //"k" "" "" "(k|x)" + //"x" "" "" "(k|x)" +"K" "" "" "k" // kof and initial kaf from ruleshebrew +"X" "" "" "x" // khet and final kaf from ruleshebrew + +"H" "^" "" "(x|1)" +"H" "" "$" "(x|1)" +"H" "" "" "(x|)" +"h" "^" "" "1" +"h" "" "" "" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt new file mode 100644 index 0000000..8c84c51 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +any +cyrillic +english +french +german +hebrew +hungarian +polish +romanian +russian +spanish http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt new file mode 100644 index 0000000..9960ada --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//ASHKENAZIC + +// CONVERTING FEMININE TO MASCULINE +"yna" "" "$" "(in[russian]|ina)" +"ina" "" "$" "(in[russian]|ina)" +"liova" "" "$" "(lof[russian]|lef[russian]|lova)" +"lova" "" "$" "(lof[russian]|lef[russian]|lova)" +"ova" "" "$" "(of[russian]|ova)" +"eva" "" "$" "(ef[russian]|eva)" +"aia" "" "$" "(aja|i[russian])" +"aja" "" "$" "(aja|i[russian])" +"aya" "" "$" "(aja|i[russian])" + +"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])" +"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])" +"owa" "" "$" "(ova|of[polish]|)" +"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" +"kowna" "" "$" "(kovna|k[polish]|ek[polish])" +"owna" "" "$" "(ovna|[polish])" +"lówna" "" "$" "(l|el[polish])" // polish +"kówna" "" "$" "(k|ek[polish])" // polish +"ówna" "" "$" "" // polish + +"a" "" "$" "(a|i[polish])" + +// CONSONANTS (integrated: German, Polish, Russian, Romanian and English) + +"rh" "^" "" "r" +"ssch" "" "" "S" +"chsch" "" "" "xS" +"tsch" "" "" "tS" + +"sch" "" "[ei]" "(sk[romanian]|S|StS[russian])" // german +"sch" "" "" "(S|StS[russian])" // german + +"ssh" "" "" "S" + +"sh" "" "[äöü]" "sh" // german +"sh" "" "[aeiou]" "(S[russian+english]|sh)" +"sh" "" "" "S" // russian+english + +"kh" "" "" "(x[russian+english]|kh)" + +"chs" "" "" "(ks[german]|xs|tSs[russian+english])" + + // French "ch" is currently disabled + //array("ch" "" "[ei]" "(x|tS|k[romanian]|S[french])" + //array("ch" "" "" "(x|tS[russian+english]|S[french])" + +"ch" "" "[ei]" "(x|k[romanian]|tS[russian+english])" +"ch" "" "" "(x|tS[russian+english])" + +"ck" "" "" "(k|tsk[polish])" + +"czy" "" "" "tSi" +"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)" +"ciewicz" "" "" "(tsevitS|tSevitS)" +"siewicz" "" "" "(sevitS|SevitS)" +"ziewicz" "" "" "(zevitS|ZevitS)" +"riewicz" "" "" "rjevitS" +"diewicz" "" "" "djevitS" +"tiewicz" "" "" "tjevitS" +"iewicz" "" "" "evitS" +"ewicz" "" "" "evitS" +"owicz" "" "" "ovitS" +"icz" "" "" "itS" +"cz" "" "" "tS" // Polish + +"cia" "" "[bcdgkpstwzż]" "(tSB[polish]|tsB)" +"cia" "" "" "(tSa[$polish]|tsa)" +"ciÄ " "" "[bp]" "(tSom[polish]|tsom)" +"ciÄ " "" "" "(tSon[polish]|tson)" +"ciÄ" "" "[bp]" "(tSem[polish]|tsem)" +"ciÄ" "" "" "(tSen[polish]|tsen)" +"cie" "" "[bcdgkpstwzż]" "(tSF[polish]|tsF)" +"cie" "" "" "(tSe[polish]|tse)" +"cio" "" "" "(tSo[polish]|tso)" +"ciu" "" "" "(tSu[polish]|tsu)" + +"ci" "" "$" "(tsi[polish]|tSi[polish+romanian]|tS[romanian]|si)" +"ci" "" "" "(tsi[polish]|tSi[polish+romanian]|si)" +"ce" "" "[bcdgkpstwzż]" "(tsF[polish]|tSe[polish+romanian]|se)" +"ce" "" "" "(tSe[polish+romanian]|tse[polish]|se)" +"cy" "" "" "(si|tsi[polish])" + +"ssz" "" "" "S" // Polish +"sz" "" "" "S" // Polish; actually could also be Hungarian /s/, disabled here + +"ssp" "" "" "(Sp[german]|sp)" +"sp" "" "" "(Sp[german]|sp)" +"sst" "" "" "(St[german]|st)" +"st" "" "" "(St[german]|st)" +"ss" "" "" "s" + +"sia" "" "[bcdgkpstwzż]" "(SB[polish]|sB[polish]|sja)" +"sia" "" "" "(Sa[polish]|sja)" +"siÄ " "" "[bp]" "(Som[polish]|som)" +"siÄ " "" "" "(Son[polish]|son)" +"siÄ" "" "[bp]" "(Sem[polish]|sem)" +"siÄ" "" "" "(Sen[polish]|sen)" +"sie" "" "[bcdgkpstwzż]" "(SF[polish]|sF|zi[german])" +"sie" "" "" "(se|Se[polish]|zi[german])" +"sio" "" "" "(So[polish]|so)" +"siu" "" "" "(Su[polish]|sju)" +"si" "" "" "(Si[polish]|si|zi[german])" +"s" "" "[aeiouäöë]" "(s|z[german])" + +"gue" "" "" "ge" +"gui" "" "" "gi" +"guy" "" "" "gi" +"gh" "" "[ei]" "(g[romanian]|gh)" + +"gauz" "" "$" "haus" +"gaus" "" "$" "haus" +"gol'ts" "" "$" "holts" +"golts" "" "$" "holts" +"gol'tz" "" "$" "holts" +"goltz" "" "" "holts" +"gol'ts" "^" "" "holts" +"golts" "^" "" "holts" +"gol'tz" "^" "" "holts" +"goltz" "^" "" "holts" +"gendler" "" "$" "hendler" +"gejmer" "" "$" "hajmer" +"gejm" "" "$" "hajm" +"geymer" "" "$" "hajmer" +"geym" "" "$" "hajm" +"geimer" "" "$" "hajmer" +"geim" "" "$" "hajm" +"gof" "" "$" "hof" + +"ger" "" "$" "ger" +"gen" "" "$" "gen" +"gin" "" "$" "gin" + +"gie" "" "$" "(ge|gi[german]|ji[french])" +"gie" "" "" "ge" +"ge" "[yaeiou]" "" "(gE|xe[spanish]|dZe[english+romanian])" +"gi" "[yaeiou]" "" "(gI|xi[spanish]|dZi[english+romanian])" +"ge" "" "" "(gE|dZe[english+romanian]|hE[russian]|xe[spanish])" +"gi" "" "" "(gI|dZi[english+romanian]|hI[russian]|xi[spanish])" +"gy" "" "[aeouáéóúüöÅű]" "(gi|dj[hungarian])" +"gy" "" "" "(gi|d[hungarian])" +"g" "[jyaeiou]" "[aouyei]" "g" +"g" "" "[aouei]" "(g|h[russian])" + +"ej" "" "" "(aj|eZ[french+romanian]|ex[spanish])" +"ej" "" "" "aj" + +"ly" "" "[au]" "l" +"li" "" "[au]" "l" +"lj" "" "[au]" "l" +"lio" "" "" "(lo|le[russian])" +"lyo" "" "" "(lo|le[russian])" +"ll" "" "" "(l|J[spanish])" + +"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian])" +"j" "" "" "(j|x[spanish])" + +"pf" "" "" "(pf|p|f)" +"ph" "" "" "(ph|f)" + +"qu" "" "" "(kv[german]|k)" + +"rze" "t" "" "(Se[polish]|re)" // polish +"rze" "" "" "(rze|rtsE[german]|Ze[polish]|re[polish]|rZe[polish])" +"rzy" "t" "" "(Si[polish]|ri)" // polish +"rzy" "" "" "(Zi[polish]|ri[polish]|rZi)" +"rz" "t" "" "(S[polish]|r)" // polish +"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" // polish + +"tz" "" "$" "(ts|tS[english+german])" +"tz" "^" "" "(ts|tS[english+german])" +"tz" "" "" "(ts[english+german+russian]|tz)" + +"zh" "" "" "(Z|zh[polish]|tsh[german])" + +"zia" "" "[bcdgkpstwzż]" "(ZB[polish]|zB[polish]|zja)" +"zia" "" "" "(Za[polish]|zja)" +"ziÄ " "" "[bp]" "(Zom[polish]|zom)" +"ziÄ " "" "" "(Zon[polish]|zon)" +"ziÄ" "" "[bp]" "(Zem[polish]|zem)" +"ziÄ" "" "" "(Zen[polish]|zen)" +"zie" "" "[bcdgkpstwzż]" "(ZF[polish]|zF[polish]|ze|tsi[german])" +"zie" "" "" "(ze|Ze[polish]|tsi[german])" +"zio" "" "" "(Zo[polish]|zo)" +"ziu" "" "" "(Zu[polish]|zju)" +"zi" "" "" "(Zi[polish]|zi|tsi[german])" + +"thal" "" "$" "tal" +"th" "^" "" "t" +"th" "" "[aeiou]" "(t[german]|th)" +"th" "" "" "t" // german +"vogel" "" "" "(vogel|fogel[german])" +"v" "^" "" "(v|f[german])" + +"h" "[aeiouyäöü]" "" "" //german +"h" "" "" "(h|x[".(romanian+polish)."])" +"h" "^" "" "(h|H[".(english+german)."])" // H can be exact "h" or approximate "kh" + + // VOWELS +"yi" "^" "" "i" + + //"e" "" "$" "(e|)" // French & English rule disabled except for final -ine +"e" "in" "$" "(e|[french])" + +"ii" "" "$" "i" // russian +"iy" "" "$" "i" // russian +"yy" "" "$" "i" // russian +"yi" "" "$" "i" // russian +"yj" "" "$" "i" // russian +"ij" "" "$" "i" // russian + +"aue" "" "" "aue" +"oue" "" "" "oue" + +"au" "" "" "(au|o[french])" +"ou" "" "" "(ou|u[french])" + +"ue" "" "" "(Q|uje[russian])" +"ae" "" "" "(Y[german]|aje[russian]|ae)" +"oe" "" "" "(Y[german]|oje[russian]|oe)" +"ee" "" "" "(i[english]|aje[russian]|e)" + +"ei" "" "" "aj" +"ey" "" "" "aj" +"eu" "" "" "(aj[german]|oj[german]|eu)" + +"i" "[aou]" "" "j" +"y" "[aou]" "" "j" + +"ie" "" "[bcdgkpstwzż]" "(i[german]|e[polish]|ije[russian]|je)" +"ie" "" "" "(i[german]|e[polish]|ije[russian]|je)" +"ye" "" "" "(je|ije[russian])" + +"i" "" "[au]" "j" +"y" "" "[au]" "j" +"io" "" "" "(jo|e[russian])" +"yo" "" "" "(jo|e[russian])" + +"ea" "" "" "(ea|ja[romanian])" +"e" "^" "" "(e|je[russian])" +"oo" "" "" "(u[english]|o)" +"uu" "" "" "u" + +// LANGUAGE SPECIFIC CHARACTERS +"Ä" "" "" "(tS[polish]|ts)" // polish +"Å" "" "" "l" // polish +"Å" "" "" "n" // polish +"ñ" "" "" "(n|nj[spanish])" +"Å" "" "" "(S[polish]|s)" // polish +"Å" "" "" "S" // romanian +"Å£" "" "" "ts" // romanian +"ż" "" "" "Z" // polish +"ź" "" "" "(Z[polish]|z)" // polish + +"où" "" "" "u" // french + +"Ä " "" "[bp]" "om" // polish +"Ä " "" "" "on" // polish +"ä" "" "" "Y" // german +"á" "" "" "a" // hungarian +"Ä" "" "" "(e[romanian]|a)" //romanian +"à " "" "" "a" // french +"â" "" "" "a" //french+romanian +"é" "" "" "e" +"è" "" "" "e" // french +"ê" "" "" "e" // french +"Ä" "" "[bp]" "em" // polish +"Ä" "" "" "en" // polish +"Ã" "" "" "i" +"î" "" "" "i" +"ö" "" "" "Y" +"Å" "" "" "Y" // hungarian +"ó" "" "" "(u[polish]|o)" +"ű" "" "" "Q" +"ü" "" "" "Q" +"ú" "" "" "u" +"ű" "" "" "Q" // hungarian + +"Ã" "" "" "s" // german +"'" "" "" "" +"\"" "" "" "" + +"a" "" "[bcdgkpstwzż]" "(A|B[polish])" +"e" "" "[bcdgkpstwzż]" "(E|F[polish])" +"o" "" "[bcÄdgklÅmnÅrsÅtwzźż]" "(O|P[polish])" + + // LATIN ALPHABET +"a" "" "" "A" +"b" "" "" "b" +"c" "" "" "(k|ts[polish])" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "O" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "U" +"v" "" "" "v" +"w" "" "" "v" // English disabled +"x" "" "" "ks" +"y" "" "" "i" +"z" "" "" "(ts[german]|z)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt new file mode 100644 index 0000000..d262587 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"ÑÑ" "" "" "tsa" +"ÑÑ" "" "" "tsu" +"Ñиа" "" "" "tsa" +"Ñие" "" "" "tse" +"Ñио" "" "" "tso" +"ÑиÑ" "" "" "tsu" +"Ñие" "" "" "se" +"Ñио" "" "" "so" +"зие" "" "" "ze" +"зио" "" "" "zo" + +"гаÑз" "" "$" "haus" +"гаÑÑ" "" "$" "haus" +"голÑÑ" "" "$" "holts" +"геймеÑ" "" "$" "hajmer" +"гейм" "" "$" "hajm" +"гоÑ" "" "$" "hof" +"геÑ" "" "$" "ger" +"ген" "" "$" "gen" +"гин" "" "$" "gin" +"г" "(й|Ñ|Ñ|Ñ|Ñ|а|е|о|и|Ñ)" "(а|е|о|и|Ñ)" "g" +"г" "" "(а|е|о|и|Ñ)" "(g|h)" + +"лÑ" "" "" "la" +"лÑ" "" "" "lu" +"лÑ" "" "" "(le|lo)" +"лио" "" "" "(le|lo)" +"ле" "" "" "(lE|lo)" + +"ийе" "" "" "je" +"ие" "" "" "je" +"Ñйе" "" "" "je" +"Ñе" "" "" "je" +"ий" "" "(а|о|Ñ)" "j" +"Ñй" "" "(а|о|Ñ)" "j" + +"ий" "" "$" "i" +"Ñй" "" "$" "i" + +"Ñ" "" "" "(e|jo)" + +"ей" "^" "" "(jaj|aj)" +"е" "(а|е|о|Ñ)" "" "je" +"е" "^" "" "je" +"Ñй" "" "" "aj" +"ей" "" "" "aj" + +"аÑе" "" "" "aue" +"аÑÑ" "" "" "aue" + +"а" "" "" "a" +"б" "" "" "b" +"в" "" "" "v" +"г" "" "" "g" +"д" "" "" "d" +"е" "" "" "E" +"ж" "" "" "Z" +"з" "" "" "z" +"и" "" "" "I" +"й" "" "" "j" +"к" "" "" "k" +"л" "" "" "l" +"м" "" "" "m" +"н" "" "" "n" +"о" "" "" "o" +"п" "" "" "p" +"Ñ" "" "" "r" +"Ñ" "" "Ñ" "" +"Ñ" "" "" "s" +"Ñ" "" "" "t" +"Ñ" "" "" "u" +"Ñ" "" "" "f" +"Ñ " "" "" "x" +"Ñ" "" "" "ts" +"Ñ" "" "" "tS" +"Ñ" "" "" "S" +"Ñ" "" "" "StS" +"Ñ" "" "" "" +"Ñ" "" "" "I" +"Ñ" "" "" "" +"Ñ" "" "" "E" +"Ñ" "" "" "ju" +"Ñ" "" "" "ja" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt new file mode 100644 index 0000000..f84e53f --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// CONSONANTS +"tch" "" "" "tS" +"ch" "" "" "(tS|x)" +"ck" "" "" "k" +"cc" "" "[iey]" "ks" // success, accent +"c" "" "c" "" +"c" "" "[iey]" "s" // circle +"c" "" "" "k" // candy +"gh" "^" "" "g" // ghost +"gh" "" "" "(g|f|w)" // burgh | tough | bough +"gn" "" "" "(gn|n)" +"g" "" "[iey]" "(g|dZ)" // get, gem, giant, gigabyte +// "th" "" "" "(6|8|t)" +"th" "" "" "t" +"kh" "" "" "x" +"ph" "" "" "f" +"sch" "" "" "(S|sk)" +"sh" "" "" "S" +"who" "^" "" "hu" +"wh" "^" "" "w" + +"h" "" "$" "" // hard to find an example that isn't in a name +"h" "" "[^aeiou]" "" // hard to find an example that isn't in a name +"h" "^" "" "H" +"h" "" "" "h" + +"j" "" "" "dZ" +"kn" "^" "" "n" // knight +"mb" "" "$" "m" +"ng" "" "$" "(N|ng)" +"pn" "^" "" "(pn|n)" +"ps" "^" "" "(ps|s)" +"qu" "" "" "kw" +"q" "" "" "k" +"tia" "" "" "(So|Sa)" +"tio" "" "" "So" +"wr" "^" "" "r" +"w" "" "" "(w|v)" // the variant "v" is for spellings coming from German/Polish +"x" "^" "" "z" +"x" "" "" "ks" + +// VOWELS +"y" "^" "" "j" +"y" "^" "[aeiouy]" "j" +"yi" "^" "" "i" +"aue" "" "" "aue" +"oue" "" "" "(aue|oue)" +"ai" "" "" "(aj|e)" // rain | said +"ay" "" "" "aj" +"a" "" "[^aeiou]e" "aj" // plane (actually "ej") +"a" "" "" "(e|o|a)" // hat | call | part +"ei" "" "" "(aj|i)" // weigh | receive +"ey" "" "" "(aj|i)" // hey | barley +"ear" "" "" "ia" // tear +"ea" "" "" "(i|e)" // reason | treasure +"ee" "" "" "i" // between +"e" "" "[^aeiou]e" "i" // meter +"e" "" "$" "(|E)" // blame, badge +"e" "" "" "E" // bed +"ie" "" "" "i" // believe +"i" "" "[^aeiou]e" "aj" // five +"i" "" "" "I" // hit -- Morse disagrees, feels it should go to I +"oa" "" "" "ou" // toad +"oi" "" "" "oj" // join +"oo" "" "" "u" // food +"ou" "" "" "(u|ou)" // through | tough | could +"oy" "" "" "oj" // boy +"o" "" "[^aeiou]e" "ou" // rode +"o" "" "" "(o|a)" // hot -- Morse disagrees, feels it should go to 9 +"u" "" "[^aeiou]e" "(ju|u)" // cute | flute +"u" "" "r" "(e|u)" // turn -- Morse disagrees, feels it should go to E +"u" "" "" "(u|a)" // put +"y" "" "" "i" + +// TRIVIAL +"b" "" "" "b" +"d" "" "" "d" +"f" "" "" "f" +"g" "" "" "g" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"v" "" "" "v" +"z" "" "" "z" + http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt new file mode 100644 index 0000000..668645f --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic + +// CONSONANTS +"kh" "" "" "x" // foreign +"ph" "" "" "f" + +"ç" "" "" "s" +"x" "" "" "ks" +"ch" "" "" "S" +"c" "" "[eiyéèê]" "s" +"c" "" "" "k" +"gn" "" "" "(n|gn)" +"g" "" "[eiy]" "Z" +"gue" "" "$" "k" +"gu" "" "[eiy]" "g" + //array("aill" "" "e" "aj" // non Jewish + //array("ll" "" "e" "(l|j)" // non Jewish +"que" "" "$" "k" +"qu" "" "" "k" +"q" "" "" "k" +"s" "[aeiouyéèê]" "[aeiouyéèê]" "z" +"h" "[bdgt]" "" "" // translit from Arabic +"h" "" "$" "" // foreign +"j" "" "" "Z" +"w" "" "" "v" +"ouh" "" "[aioe]" "(v|uh)" +"ou" "" "[aeio]" "v" +"uo" "" "" "(vo|o)" +"u" "" "[aeio]" "v" + +// VOWELS +"aue" "" "" "aue" +"eau" "" "" "o" + //array("au" "" "" "(o|au)" // non Jewish +"ai" "" "" "aj" // [e] is non Jewish +"ay" "" "" "aj" // [e] is non Jewish +"é" "" "" "e" +"ê" "" "" "e" +"è" "" "" "e" +"à " "" "" "a" +"â" "" "" "a" +"où" "" "" "u" +"ou" "" "" "u" +"oi" "" "" "oj" // [ua] is non Jewish +"ei" "" "" "aj" // [e] is non Jewish +"ey" "" "" "aj" // [e] non Jewish + //array("eu" "" "" "(e|o)" // non Jewish +"y" "[ou]" "" "j" +"e" "" "$" "(e|)" +"i" "" "[aou]" "j" +"y" "" "[aoeu]" "j" +"y" "" "" "i" + + // TRIVIAL +"a" "" "" "a" +"b" "" "" "b" +"d" "" "" "d" +"e" "" "" "E" // only Ashkenazic +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "I" // only Ashkenazic +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"z" "" "" "z" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt new file mode 100644 index 0000000..72eef9d --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic + +// CONSONANTS +"ziu" "" "" "tsu" +"zia" "" "" "tsa" +"zio" "" "" "tso" + +"ssch" "" "" "S" +"chsch" "" "" "xS" +"ewitsch" "" "$" "evitS" +"owitsch" "" "$" "ovitS" +"evitsch" "" "$" "evitS" +"ovitsch" "" "$" "ovitS" +"witsch" "" "$" "vitS" +"vitsch" "" "$" "vitS" +"sch" "" "" "S" + +"chs" "" "" "ks" +"ch" "" "" "x" +"ck" "" "" "k" +"c" "" "[eiy]" "ts" + +"sp" "^" "" "Sp" +"st" "^" "" "St" +"ssp" "" "" "(Sp|sp)" +"sp" "" "" "(Sp|sp)" +"sst" "" "" "(St|st)" +"st" "" "" "(St|st)" +"pf" "" "" "(pf|p|f)" +"ph" "" "" "(ph|f)" +"qu" "" "" "kv" + +"ewitz" "" "$" "(evits|evitS)" +"ewiz" "" "$" "(evits|evitS)" +"evitz" "" "$" "(evits|evitS)" +"eviz" "" "$" "(evits|evitS)" +"owitz" "" "$" "(ovits|ovitS)" +"owiz" "" "$" "(ovits|ovitS)" +"ovitz" "" "$" "(ovits|ovitS)" +"oviz" "" "$" "(ovits|ovitS)" +"witz" "" "$" "(vits|vitS)" +"wiz" "" "$" "(vits|vitS)" +"vitz" "" "$" "(vits|vitS)" +"viz" "" "$" "(vits|vitS)" +"tz" "" "" "ts" + +"thal" "" "$" "tal" +"th" "^" "" "t" +"th" "" "[äöüaeiou]" "(t|th)" +"th" "" "" "t" +"rh" "^" "" "r" +"h" "[aeiouyäöü]" "" "" +"h" "^" "" "H" + +"ss" "" "" "s" +"s" "" "[äöüaeiouy]" "(z|s)" +"s" "[aeiouyäöüj]" "[aeiouyäöü]" "z" +"Ã" "" "" "s" + + // VOWELS +"ij" "" "$" "i" +"aue" "" "" "aue" +"ue" "" "" "Q" +"ae" "" "" "Y" +"oe" "" "" "Y" +"ü" "" "" "Q" +"ä" "" "" "Y" +"ö" "" "" "Y" +"ei" "" "" "aj" +"ey" "" "" "aj" +"eu" "" "" "(aj|oj)" +"i" "[aou]" "" "j" +"y" "[aou]" "" "j" +"ie" "" "" "I" +"i" "" "[aou]" "j" +"y" "" "[aoeu]" "j" + + // FOREIGN LETTERs +"ñ" "" "" "n" +"ã" "" "" "a" +"Å" "" "" "o" +"ű" "" "" "u" +"ç" "" "" "s" + + // ALPHABET +"a" "" "" "A" +"b" "" "" "b" +"c" "" "" "k" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "O" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "U" +"v" "" "" "(f|v)" +"w" "" "" "v" +"x" "" "" "ks" +"y" "" "" "i" +"z" "" "" "ts" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt new file mode 100644 index 0000000..4c59503 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic + +"××" "" "" "i" +"×¢×" "" "" "i" +"×¢×" "" "" "VV" +"××" "" "" "VV" + +"×׳" "" "" "Z" +"×׳" "" "" "dZ" + +"×" "" "" "L" +"×" "" "" "b" +"×" "" "" "g" +"×" "" "" "d" + +"×" "^" "" "1" +"×" "" "$" "1" +"×" "" "" "" + +"××" "" "" "V" +"××" "" "" "WW" +"×" "" "" "W" +"×" "" "" "z" +"×" "" "" "X" +"×" "" "" "T" +"××" "" "" "i" +"×" "" "" "i" +"×" "" "" "X" +"×" "^" "" "K" +"×" "" "" "k" +"×" "" "" "l" +"×" "" "" "m" +"×" "" "" "m" +"×" "" "" "n" +"× " "" "" "n" +"ס" "" "" "s" +"×¢" "" "" "L" +"×£" "" "" "f" +"פ" "" "" "f" +"×¥" "" "" "C" +"צ" "" "" "C" +"×§" "" "" "K" +"ר" "" "" "r" +"ש" "" "" "s" +"ת" "" "" "TB" // only Ashkenazic http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt new file mode 100644 index 0000000..1e6f047 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ASHKENAZIC + +// CONSONANTS +"sz" "" "" "s" +"zs" "" "" "Z" +"cs" "" "" "tS" + +"ay" "" "" "(oj|aj)" +"ai" "" "" "(oj|aj)" +"aj" "" "" "(oj|aj)" + +"ei" "" "" "aj" // German element +"ey" "" "" "aj" // German element + +"y" "[áo]" "" "j" +"i" "[áo]" "" "j" +"ee" "" "" "(aj|e)" // actually ej +"ely" "" "" "(aj|eli)" // actually ej +"ly" "" "" "(j|li)" +"gy" "" "[aeouáéóúüöÅű]" "dj" +"gy" "" "" "(d|gi)" +"ny" "" "[aeouáéóúüöÅű]" "nj" +"ny" "" "" "(n|ni)" +"ty" "" "[aeouáéóúüöÅű]" "tj" +"ty" "" "" "(t|ti)" + +"qu" "" "" "(ku|kv)" +"h" "" "$" "" + +// VOWELS +"á" "" "" "a" +"é" "" "" "e" +"Ã" "" "" "i" +"ó" "" "" "o" +"ö" "" "" "Y" +"Å" "" "" "Y" +"ú" "" "" "u" +"ü" "" "" "Q" +"ű" "" "" "Q" + +// LATIN ALPHABET +"a" "" "" "a" +"b" "" "" "b" +"c" "" "" "ts" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "(S|s)" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"w" "" "" "v" +"x" "" "" "ks" +"y" "" "" "i" +"z" "" "" "z" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt new file mode 100644 index 0000000..59a87dd --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic + +// CONVERTING FEMININE TO MASCULINE +"ska" "" "$" "ski" +"cka" "" "$" "tski" +"lowa" "" "$" "(lova|lof|l|el)" +"kowa" "" "$" "(kova|kof|k|ek)" +"owa" "" "$" "(ova|of|)" +"lowna" "" "$" "(lovna|levna|l|el)" +"kowna" "" "$" "(kovna|k|ek)" +"owna" "" "$" "(ovna|)" +"lówna" "" "$" "(l|el)" +"kówna" "" "$" "(k|ek)" +"ówna" "" "$" "" +"a" "" "$" "(a|i)" + + // CONSONANTS +"czy" "" "" "tSi" +"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)" +"ciewicz" "" "" "(tsevitS|tSevitS)" +"siewicz" "" "" "(sevitS|SevitS)" +"ziewicz" "" "" "(zevitS|ZevitS)" +"riewicz" "" "" "rjevitS" +"diewicz" "" "" "djevitS" +"tiewicz" "" "" "tjevitS" +"iewicz" "" "" "evitS" +"ewicz" "" "" "evitS" +"owicz" "" "" "ovitS" +"icz" "" "" "itS" +"cz" "" "" "tS" +"ch" "" "" "x" + +"cia" "" "[bcdgkpstwzż]" "(tSB|tsB)" +"cia" "" "" "(tSa|tsa)" +"ciÄ " "" "[bp]" "(tSom|tsom)" +"ciÄ " "" "" "(tSon|tson)" +"ciÄ" "" "[bp]" "(tSem|tsem)" +"ciÄ" "" "" "(tSen|tsen)" +"cie" "" "[bcdgkpstwzż]" "(tSF|tsF)" +"cie" "" "" "(tSe|tse)" +"cio" "" "" "(tSo|tso)" +"ciu" "" "" "(tSu|tsu)" +"ci" "" "" "(tSi|tsI)" +"Ä" "" "" "(tS|ts)" + +"ssz" "" "" "S" +"sz" "" "" "S" +"sia" "" "[bcdgkpstwzż]" "(SB|sB|sja)" +"sia" "" "" "(Sa|sja)" +"siÄ " "" "[bp]" "(Som|som)" +"siÄ " "" "" "(Son|son)" +"siÄ" "" "[bp]" "(Sem|sem)" +"siÄ" "" "" "(Sen|sen)" +"sie" "" "[bcdgkpstwzż]" "(SF|sF|se)" +"sie" "" "" "(Se|se)" +"sio" "" "" "(So|so)" +"siu" "" "" "(Su|sju)" +"si" "" "" "(Si|sI)" +"Å" "" "" "(S|s)" + +"zia" "" "[bcdgkpstwzż]" "(ZB|zB|zja)" +"zia" "" "" "(Za|zja)" +"ziÄ " "" "[bp]" "(Zom|zom)" +"ziÄ " "" "" "(Zon|zon)" +"ziÄ" "" "[bp]" "(Zem|zem)" +"ziÄ" "" "" "(Zen|zen)" +"zie" "" "[bcdgkpstwzż]" "(ZF|zF)" +"zie" "" "" "(Ze|ze)" +"zio" "" "" "(Zo|zo)" +"ziu" "" "" "(Zu|zju)" +"zi" "" "" "(Zi|zI)" + +"że" "" "[bcdgkpstwzż]" "(Ze|ZF)" +"że" "" "[bcdgkpstwzż]" "(Ze|ZF|ze|zF)" +"że" "" "" "Ze" +"źe" "" "" "(Ze|ze)" +"ży" "" "" "Zi" +"źi" "" "" "(Zi|zi)" +"ż" "" "" "Z" +"ź" "" "" "(Z|z)" + +"rze" "t" "" "(Se|re)" +"rze" "" "" "(Ze|re|rZe)" +"rzy" "t" "" "(Si|ri)" +"rzy" "" "" "(Zi|ri|rZi)" +"rz" "t" "" "(S|r)" +"rz" "" "" "(Z|r|rZ)" + +"lio" "" "" "(lo|le)" +"Å" "" "" "l" +"Å" "" "" "n" +"qu" "" "" "k" +"s" "" "s" "" + + // VOWELS +"ó" "" "" "(u|o)" +"Ä " "" "[bp]" "om" +"Ä" "" "[bp]" "em" +"Ä " "" "" "on" +"Ä" "" "" "en" + +"ije" "" "" "je" +"yje" "" "" "je" +"iie" "" "" "je" +"yie" "" "" "je" +"iye" "" "" "je" +"yye" "" "" "je" + +"ij" "" "[aou]" "j" +"yj" "" "[aou]" "j" +"ii" "" "[aou]" "j" +"yi" "" "[aou]" "j" +"iy" "" "[aou]" "j" +"yy" "" "[aou]" "j" + +"rie" "" "" "rje" +"die" "" "" "dje" +"tie" "" "" "tje" +"ie" "" "[bcdgkpstwzż]" "F" +"ie" "" "" "e" + +"aue" "" "" "aue" +"au" "" "" "au" + +"ei" "" "" "aj" +"ey" "" "" "aj" +"ej" "" "" "aj" + +"ai" "" "" "aj" +"ay" "" "" "aj" +"aj" "" "" "aj" + +"i" "[ou]" "" "j" +"y" "[ou]" "" "j" +"i" "" "[aou]" "j" +"y" "" "[aeou]" "j" + +"a" "" "[bcdgkpstwzż]" "B" +"e" "" "[bcdgkpstwzż]" "(E|F)" +"o" "" "[bcÄdgklÅmnÅrsÅtwzźż]" "P" + +// ALPHABET +"a" "" "" "a" +"b" "" "" "b" +"c" "" "" "ts" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "(h|x)" +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"w" "" "" "v" +"x" "" "" "ks" +"y" "" "" "I" +"z" "" "" "z" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt new file mode 100644 index 0000000..f53e262 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"j" "" "" "Z" + +"ce" "" "" "tSe" +"ci" "" "" "(tSi|tS)" +"ch" "" "[ei]" "k" +"ch" "" "" "x" // foreign +"c" "" "" "k" + +"gi" "" "" "(dZi|dZ)" +"g" "" "[ei]" "dZ" +"gh" "" "" "g" + +"ei" "" "" "aj" +"i" "[aou]" "" "j" +"i" "" "[aeou]" "j" +"Å£" "" "" "ts" +"Å" "" "" "S" +"h" "" "" "(x|h)" + +"qu" "" "" "k" +"q" "" "" "k" +"w" "" "" "v" +"x" "" "" "ks" +"y" "" "" "i" + +"î" "" "" "i" +"ea" "" "" "ja" +"Ä" "" "" "(e|a)" +"aue" "" "" "aue" + +"a" "" "" "a" +"b" "" "" "b" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"i" "" "" "I" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"z" "" "" "z" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt new file mode 100644 index 0000000..817b2c3 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// CONVERTING FEMININE TO MASCULINE +"yna" "" "$" "(in|ina)" +"ina" "" "$" "(in|ina)" +"liova" "" "$" "(lof|lef)" +"lova" "" "$" "(lof|lef|lova)" +"ova" "" "$" "(of|ova)" +"eva" "" "$" "(ef|ova)" +"aia" "" "$" "(aja|i)" +"aja" "" "$" "(aja|i)" +"aya" "" "$" "(aja|i)" + + //SPECIFIC CONSONANTS +"tsya" "" "" "tsa" +"tsyu" "" "" "tsu" +"tsia" "" "" "tsa" +"tsie" "" "" "tse" +"tsio" "" "" "tso" +"tsye" "" "" "tse" +"tsyo" "" "" "tso" +"tsiu" "" "" "tsu" +"sie" "" "" "se" +"sio" "" "" "so" +"zie" "" "" "ze" +"zio" "" "" "zo" +"sye" "" "" "se" +"syo" "" "" "so" +"zye" "" "" "ze" +"zyo" "" "" "zo" + +"gauz" "" "$" "haus" +"gaus" "" "$" "haus" +"gol'ts" "" "$" "holts" +"golts" "" "$" "holts" +"gol'tz" "" "$" "holts" +"goltz" "" "$" "holts" +"gejmer" "" "$" "hajmer" +"gejm" "" "$" "hajm" +"geimer" "" "$" "hajmer" +"geim" "" "$" "hajm" +"geymer" "" "$" "hajmer" +"geym" "" "$" "hajm" +"gendler" "" "$" "hendler" +"gof" "" "$" "hof" +"gojf" "" "$" "hojf" +"goyf" "" "$" "hojf" +"goif" "" "$" "hojf" +"ger" "" "$" "ger" +"gen" "" "$" "gen" +"gin" "" "$" "gin" +"gg" "" "" "g" +"g" "[jaeoiuy]" "[aeoiu]" "g" +"g" "" "[aeoiu]" "(g|h)" + +"kh" "" "" "x" +"ch" "" "" "(tS|x)" // in DJSRE the rule is simpler:"ch" "" "" "tS"); +"sch" "" "" "(StS|S)" +"ssh" "" "" "S" +"sh" "" "" "S" +"zh" "" "" "Z" +"tz" "" "$" "ts" // not in DJSRE +"tz" "" "" "(ts|tz)" // not in DJSRE +"c" "" "[iey]" "s" // not in DJSRE +"c" "" "" "k" // not in DJSRE +"qu" "" "" "(kv|k)" // not in DJSRE +"q" "" "" "k" // not in DJSRE +"s" "" "s" "" + +"w" "" "" "v" // not in DJSRE +"x" "" "" "ks" // not in DJSRE + + //SPECIFIC VOWELS +"lya" "" "" "la" +"lyu" "" "" "lu" +"lia" "" "" "la" // not in DJSRE +"liu" "" "" "lu" // not in DJSRE +"lja" "" "" "la" // not in DJSRE +"lju" "" "" "lu" // not in DJSRE +"le" "" "" "(lo|lE)" //not in DJSRE +"lyo" "" "" "(lo|le)" //not in DJSRE +"lio" "" "" "(lo|le)" + +"ije" "" "" "je" +"ie" "" "" "je" +"iye" "" "" "je" +"iie" "" "" "je" +"yje" "" "" "je" +"ye" "" "" "je" +"yye" "" "" "je" +"yie" "" "" "je" + +"ij" "" "[aou]" "j" +"iy" "" "[aou]" "j" +"ii" "" "[aou]" "j" +"yj" "" "[aou]" "j" +"yy" "" "[aou]" "j" +"yi" "" "[aou]" "j" + +"io" "" "" "(jo|e)" +"i" "" "[au]" "j" +"i" "[aou]" "" "j" // not in DJSRE +"ei" "" "" "aj" // not in DJSRE +"ey" "" "" "aj" // not in DJSRE +"ej" "" "" "aj" +"yo" "" "" "(jo|e)" //not in DJSRE +"y" "" "[au]" "j" +"y" "[aiou]" "" "j" // not in DJSRE + +"ii" "" "$" "i" // not in DJSRE +"iy" "" "$" "i" // not in DJSRE +"yy" "" "$" "i" // not in DJSRE +"yi" "" "$" "i" // not in DJSRE +"yj" "" "$" "i" +"ij" "" "$" "i" + +"e" "^" "" "(je|E)" // in DJSRE the rule is simpler:"e" "^" "" "je"); +"ee" "" "" "(aje|i)" // in DJSRE the rule is simpler:"ee" "" "" "(eje|aje)"); +"e" "[aou]" "" "je" +"y" "" "" "I" +"oo" "" "" "(oo|u)" // not in DJSRE +"'" "" "" "" +"\"" "" "" "" + +"aue" "" "" "aue" + +// TRIVIAL +"a" "" "" "a" +"b" "" "" "b" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" // not in DJSRE +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"z" "" "" "z" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt new file mode 100644 index 0000000..03dc04a --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Ashkenazic = Argentina + +// CONSONANTS +"ñ" "" "" "(n|nj)" + +"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina +"h" "[bdgt]" "" "" // translit. from Arabic +"h" "" "$" "" // foreign + +"j" "" "" "x" +"x" "" "" "ks" +"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic +"w" "" "" "v" // foreign words + +"v" "" "" "(b|v)" +"b" "" "" "(b|v)" +"m" "" "[bpvf]" "(m|n)" + +"c" "" "[ei]" "s" +"c" "" "" "k" + +"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th" + +"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü" +"g" "" "[ei]" "(x|g)" // "g" only for foreign words + +"qu" "" "" "k" +"q" "" "" "k" + +"uo" "" "" "(vo|o)" +"u" "" "[aei]" "v" + +"y" "" "" "(i|j|S|Z)" // S or Z are peculiar to South America; only Ashkenazic + + // VOWELS +"ü" "" "" "v" +"á" "" "" "a" +"é" "" "" "e" +"Ã" "" "" "i" +"ó" "" "" "o" +"ú" "" "" "u" + + // TRIVIAL +"a" "" "" "a" +"d" "" "" "d" +"e" "" "" "E" // Only Ashkenazic +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "I" // Only Ashkenazic +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt new file mode 100644 index 0000000..6627aac --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// GENERIC +// A, E, I, O, P, U should create variants, but a, e, i, o, u should not create any new variant +// Q = ü ; Y = ä = ö +// EE = final "e" (english or french) + +// VOWELS + // "ALL" DIPHTHONGS are interchangeable BETWEEN THEM and with monophthongs of which they are composed ("D" means "diphthong") + // {a,o} are totally interchangeable if non-stressed; in German "a/o" can actually be from "ä/ö" (that are equivalent to "e") + // {i,e} are interchangeable if non-stressed, while in German "u" can actually be from "ü" (that is equivalent to "i") + +"mb" "" "" "(mb|b[greeklatin])" +"mp" "" "" "(mp|b[greeklatin])" +"ng" "" "" "(ng|g[greeklatin])" + +"B" "" "" "(b|v[spanish])" +"V" "" "" "(v|b[spanish])" + + // French word-final and word-part-final letters +"t" "" "$" "(t|[french])" +"g" "n" "$" "(g|[french])" +"k" "n" "$" "(k|[french])" +"p" "" "$" "(p|[french])" +"r" "[Ee]" "$" "(r|[french])" +"s" "" "$" "(s|[french])" +"t" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(t|[french])" // Petitjean +"s" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(s|[french])" // Groslot, Grosleau + //array("p" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(p|[$french])" + +"I" "[aeiouAEIBFOUQY]" "" "i" +"I" "" "[^aeiouAEBFIOU]e" "(Q[german]|i|D[english])" // "line" +"I" "" "$" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk[german])" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts[german])" +"Its" "" "$" "its" +"I" "" "" "(Q[german]|i)" + +"lEE" "[bdfgkmnprsStvzZ]" "" "(li|il[english])" // Apple = Appel +"rEE" "[bdfgkmnprsStvzZ]" "" "(ri|ir[english])" +"lE" "[bdfgkmnprsStvzZ]" "" "(li|il[english]|lY[german])" // Applebaum < Appelbaum +"rE" "[bdfgkmnprsStvzZ]" "" "(ri|ir[english]|rY[german])" + +"ea" "" "" "(D|a|i)" + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"eu" "" "" "(D|e|u)" + +"ai" "" "" "(D|a|i)" +"Ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"Oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" +"Ui" "" "" "(D|u|i)" +"ei" "" "" "(D|i)" +"Ei" "" "" "(D|i)" + +"iA" "" "$" "(ia|io)" +"iA" "" "" "(ia|io|iY[german])" +"A" "" "[^aeiouAEBFIOU]e" "(a|o|Y[german]|D[english])" // "plane" + + +"E" "i[^aeiouAEIOU]" "" "(i|Y[german]|[english])" // Wineberg (vineberg/vajneberg) --> vajnberg +"E" "a[^aeiouAEIOU]" "" "(i|Y[german]|[english])" // Shaneberg (shaneberg/shejneberg) --> shejnberg + +"E" "" "[fklmnprst]$" "i" +"E" "" "ts$" "i" +"E" "" "$" "i" +"E" "[DaoiuAOIUQY]" "" "i" +"E" "" "[aoAOQY]" "i" +"E" "" "" "(i|Y[$german])" + +"P" "" "" "(o|u)" + +"O" "" "[fklmnprstv]$" "o" +"O" "" "ts$" "o" +"O" "" "$" "o" +"O" "[oeiuQY]" "" "o" +"O" "" "" "(o|Y[$german])" +"O" "" "" "o" + +"A" "" "[fklmnprst]$" "(a|o)" +"A" "" "ts$" "(a|o)" +"A" "" "$" "(a|o)" +"A" "[oeiuQY]" "" "(a|o)" +"A" "" "" "(a|o|Y[$german])" +"A" "" "" "(a|o)" + +"U" "" "$" "u" +"U" "[DoiuQY]" "" "u" +"U" "" "[^k]$" "u" +"Uk" "[lr]" "$" "(uk|Qk[german])" +"Uk" "" "$" "uk" +"sUts" "" "$" "(suts|sQts[german])" +"Uts" "" "$" "uts" +"U" "" "" "(u|Q[german])" +"U" "" "" "u" + +"e" "" "[fklmnprstv]$" "i" +"e" "" "ts$" "i" +"e" "" "$" "i" +"e" "[DaoiuAOIUQY]" "" "i" +"e" "" "[aoAOQY]" "i" +"e" "" "" "(i|Y[german])" + +"a" "" "" "(a|o)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt new file mode 100644 index 0000000..a8cad65 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"j1" "" "" "(ja|je|jo|ju|j)" +"1" "" "" "(a|e|i|o|u|)" +"u" "" "" "(o|u)" +"i" "" "" "(i|e)" +"p" "" "$" "p" +"p" "" "" "(p|b)"
