http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt new file mode 100644 index 0000000..fa0096a --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// GENERIC + +#include gen_exact_approx_common + +// DUTCH +"van" "^" "[bp]" "(vam|)" +"van" "^" "" "(van|)" + +// REGRESSIVE ASSIMILATION OF CONSONANTS +"n" "" "[bp]" "m" + +// PECULIARITY OF "h" +"h" "" "" "" +"H" "" "" "(x|)" + +// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; Frankel = Frankl, Finkelstein = Finklstein +// but Andersen & Anderson should match +"sen" "[rmnl]" "$" "(zn|zon)" +"sen" "" "$" "(sn|son)" +"sEn" "[rmnl]" "$" "(zn|zon)" +"sEn" "" "$" "(sn|son)" + +"e" "[bdfgklmnprsStvzZ]" "[ln]$" "" +"i" "[bdfgklmnprsStvzZ]" "[ln]$" "" +"E" "[bdfgklmnprsStvzZ]" "[ln]$" "" +"I" "[bdfgklmnprsStvzZ]" "[ln]$" "" +"Q" "[bdfgklmnprsStvzZ]" "[ln]$" "" +"Y" "[bdfgklmnprsStvzZ]" "[ln]$" "" + +"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" +"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" +"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" +"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" +"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" +"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" "" + +"lEs" "" "" "(lEs|lz)" // Applebaum < Appelbaum (English + blend English-something forms as Finklestein) +"lE" "[bdfgkmnprStvzZ]" "" "(lE|l)" // Applebaum < Appelbaum (English + blend English-something forms as Finklestein) + +// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D" +"aue" "" "" "D" +"oue" "" "" "D" + +"AvE" "" "" "(D|AvE)" +"Ave" "" "" "(D|Ave)" +"avE" "" "" "(D|avE)" +"ave" "" "" "(D|ave)" + +"OvE" "" "" "(D|OvE)" +"Ove" "" "" "(D|Ove)" +"ovE" "" "" "(D|ovE)" +"ove" "" "" "(D|ove)" + +"ea" "" "" "(D|ea)" +"EA" "" "" "(D|EA)" +"Ea" "" "" "(D|Ea)" +"eA" "" "" "(D|eA)" + +"aji" "" "" "D" +"ajI" "" "" "D" +"aje" "" "" "D" +"ajE" "" "" "D" + +"Aji" "" "" "D" +"AjI" "" "" "D" +"Aje" "" "" "D" +"AjE" "" "" "D" + +"oji" "" "" "D" +"ojI" "" "" "D" +"oje" "" "" "D" +"ojE" "" "" "D" + +"Oji" "" "" "D" +"OjI" "" "" "D" +"Oje" "" "" "D" +"OjE" "" "" "D" + +"eji" "" "" "D" +"ejI" "" "" "D" +"eje" "" "" "D" +"ejE" "" "" "D" + +"Eji" "" "" "D" +"EjI" "" "" "D" +"Eje" "" "" "D" +"EjE" "" "" "D" + +"uji" "" "" "D" +"ujI" "" "" "D" +"uje" "" "" "D" +"ujE" "" "" "D" + +"Uji" "" "" "D" +"UjI" "" "" "D" +"Uje" "" "" "D" +"UjE" "" "" "D" + +"iji" "" "" "D" +"ijI" "" "" "D" +"ije" "" "" "D" +"ijE" "" "" "D" + +"Iji" "" "" "D" +"IjI" "" "" "D" +"Ije" "" "" "D" +"IjE" "" "" "D" + +"aja" "" "" "D" +"ajA" "" "" "D" +"ajo" "" "" "D" +"ajO" "" "" "D" +"aju" "" "" "D" +"ajU" "" "" "D" + +"Aja" "" "" "D" +"AjA" "" "" "D" +"Ajo" "" "" "D" +"AjO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"oja" "" "" "D" +"ojA" "" "" "D" +"ojo" "" "" "D" +"ojO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"Oja" "" "" "D" +"OjA" "" "" "D" +"Ojo" "" "" "D" +"OjO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"eja" "" "" "D" +"ejA" "" "" "D" +"ejo" "" "" "D" +"ejO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"Eja" "" "" "D" +"EjA" "" "" "D" +"Ejo" "" "" "D" +"EjO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"uja" "" "" "D" +"ujA" "" "" "D" +"ujo" "" "" "D" +"ujO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"Uja" "" "" "D" +"UjA" "" "" "D" +"Ujo" "" "" "D" +"UjO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"ija" "" "" "D" +"ijA" "" "" "D" +"ijo" "" "" "D" +"ijO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"Ija" "" "" "D" +"IjA" "" "" "D" +"Ijo" "" "" "D" +"IjO" "" "" "D" +"Aju" "" "" "D" +"AjU" "" "" "D" + +"j" "" "" "i" + +// lander = lender = länder +"lYndEr" "" "$" "lYnder" +"lander" "" "$" "lYnder" +"lAndEr" "" "$" "lYnder" +"lAnder" "" "$" "lYnder" +"landEr" "" "$" "lYnder" +"lender" "" "$" "lYnder" +"lEndEr" "" "$" "lYnder" +"lendEr" "" "$" "lYnder" +"lEnder" "" "$" "lYnder" + +// CONSONANTS {z & Z; s & S} are approximately interchangeable +"s" "" "[rmnl]" "z" +"S" "" "[rmnl]" "z" +"s" "[rmnl]" "" "z" +"S" "[rmnl]" "" "z" + +"dS" "" "$" "S" +"dZ" "" "$" "S" +"Z" "" "$" "S" +"S" "" "$" "(S|s)" +"z" "" "$" "(S|s)" + +"S" "" "" "s" +"dZ" "" "" "z" +"Z" "" "" "z"
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt new file mode 100644 index 0000000..d470aa8 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt new file mode 100644 index 0000000..b542861 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt new file mode 100644 index 0000000..b542861 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt new file mode 100644 index 0000000..84d8174 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// VOWELS +"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five" +"I" "" "$" "i" +"I" "[aEIeiou]" "" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "" "" "(i|Q)" + +"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)" // Applebaum < Appelbaum + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> shejnberg +"e" "D[^aeiEIou]" "" "(i|)" + +"e" "" "" "i" +"E" "" "[fklmnprsStv]$" "i" +"E" "" "ts$" "i" +"E" "[DaoiEuQY]" "" "i" +"E" "" "[aoQY]" "i" +"E" "" "" "(Y|i)" + +"a" "" "" "(a|o)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt new file mode 100644 index 0000000..93a4980 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"a" "" "" "(a|o)" +"e" "" "" "i" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt new file mode 100644 index 0000000..14a5db7 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +"I" "" "$" "i" +"I" "[aeiAEIOUouQY]" "" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "" "" "(Q|i)" + +"AU" "" "" "(D|a|u)" +"aU" "" "" "(D|a|u)" +"Au" "" "" "(D|a|u)" +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"OU" "" "" "(D|o|u)" +"oU" "" "" "(D|o|u)" +"Ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"Ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"Oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" +"Ui" "" "" "(D|u|i)" + +"e" "" "" "i" + +"E" "" "[fklmnprst]$" "i" +"E" "" "ts$" "i" +"E" "" "$" "i" +"E" "[DaoAOUiuQY]" "" "i" +"E" "" "[aoAOQY]" "i" +"E" "" "" "(Y|i)" + +"O" "" "$" "o" +"O" "" "[fklmnprst]$" "o" +"O" "" "ts$" "o" +"O" "[aoAOUeiuQY]" "" "o" +"O" "" "" "(o|Y)" + +"a" "" "" "(a|o)" + +"A" "" "$" "(a|o)" +"A" "" "[fklmnprst]$" "(a|o)" +"A" "" "ts$" "(a|o)" +"A" "[aoeOUiuQY]" "" "(a|o)" +"A" "" "" "(a|o|Y)" + +"U" "" "$" "u" +"U" "[DaoiuUQY]" "" "u" +"U" "" "[^k]$" "u" +"Uk" "[lr]" "$" "(uk|Qk)" +"Uk" "" "$" "uk" +"sUts" "" "$" "(suts|sQts)" +"Uts" "" "$" "uts" +"U" "" "" "(u|Q)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt new file mode 100644 index 0000000..b542861 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt new file mode 100644 index 0000000..e492b97 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french + +"N" "" "" "" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt new file mode 100644 index 0000000..46ebf29 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt new file mode 100644 index 0000000..46ebf29 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt new file mode 100644 index 0000000..ce577af --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +"aiB" "" "[bp]" "(D|Dm)" +"oiB" "" "[bp]" "(D|Dm)" +"uiB" "" "[bp]" "(D|Dm)" +"eiB" "" "[bp]" "(D|Dm)" +"EiB" "" "[bp]" "(D|Dm)" +"iiB" "" "[bp]" "(D|Dm)" +"IiB" "" "[bp]" "(D|Dm)" + +"aiB" "" "[dgkstvz]" "(D|Dn)" +"oiB" "" "[dgkstvz]" "(D|Dn)" +"uiB" "" "[dgkstvz]" "(D|Dn)" +"eiB" "" "[dgkstvz]" "(D|Dn)" +"EiB" "" "[dgkstvz]" "(D|Dn)" +"iiB" "" "[dgkstvz]" "(D|Dn)" +"IiB" "" "[dgkstvz]" "(D|Dn)" + +"B" "" "[bp]" "(o|om|im)" +"B" "" "[dgkstvz]" "(o|on|in)" +"B" "" "" "o" + +"aiF" "" "[bp]" "(D|Dm)" +"oiF" "" "[bp]" "(D|Dm)" +"uiF" "" "[bp]" "(D|Dm)" +"eiF" "" "[bp]" "(D|Dm)" +"EiF" "" "[bp]" "(D|Dm)" +"iiF" "" "[bp]" "(D|Dm)" +"IiF" "" "[bp]" "(D|Dm)" + +"aiF" "" "[dgkstvz]" "(D|Dn)" +"oiF" "" "[dgkstvz]" "(D|Dn)" +"uiF" "" "[dgkstvz]" "(D|Dn)" +"eiF" "" "[dgkstvz]" "(D|Dn)" +"EiF" "" "[dgkstvz]" "(D|Dn)" +"iiF" "" "[dgkstvz]" "(D|Dn)" +"IiF" "" "[dgkstvz]" "(D|Dn)" + +"F" "" "[bp]" "(i|im|om)" +"F" "" "[dgkstvz]" "(i|in|on)" +"F" "" "" "i" + +"P" "" "" "(o|u)" + +"I" "" "$" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "[aeiAEBFIou]" "" "i" +"I" "" "" "(i|Q)" + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"a" "" "" "(a|o)" +"e" "" "" "i" + +"E" "" "[fklmnprst]$" "i" +"E" "" "ts$" "i" +"E" "" "$" "i" +"E" "[DaoiuQ]" "" "i" +"E" "" "[aoQ]" "i" +"E" "" "" "(Y|i)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt new file mode 100644 index 0000000..b542861 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt new file mode 100644 index 0000000..f5c5894 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_polish \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt new file mode 100644 index 0000000..9138487 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// VOWELS +"I" "" "$" "i" +"I" "" "[^k]$" "i" +"Ik" "[lr]" "$" "(ik|Qk)" +"Ik" "" "$" "ik" +"sIts" "" "$" "(sits|sQts)" +"Its" "" "$" "its" +"I" "[aeiEIou]" "" "i" +"I" "" "" "(i|Q)" + +"au" "" "" "(D|a|u)" +"ou" "" "" "(D|o|u)" +"ai" "" "" "(D|a|i)" +"oi" "" "" "(D|o|i)" +"ui" "" "" "(D|u|i)" + +"om" "" "[bp]" "(om|im)" +"on" "" "[dgkstvz]" "(on|in)" +"em" "" "[bp]" "(im|om)" +"en" "" "[dgkstvz]" "(in|on)" +"Em" "" "[bp]" "(im|Ym|om)" +"En" "" "[dgkstvz]" "(in|Yn|on)" + +"a" "" "" "(a|o)" +"e" "" "" "i" + +"E" "" "[fklmnprsStv]$" "i" +"E" "" "ts$" "i" +"E" "[DaoiuQ]" "" "i" +"E" "" "[aoQ]" "i" +"E" "" "" "(Y|i)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt new file mode 100644 index 0000000..fb3e661 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french + +"B" "" "" "(b|v)" +"V" "" "" "(b|v)" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt new file mode 100644 index 0000000..b542861 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_approx_french \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt new file mode 100644 index 0000000..28fafb9 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// GENERAL + // A, E, I, O, P, U should create variants, + // EE = final "e" (english & french) + // V, B from Spanish + // but a, e, i, o, u should not create any new variant +"EE" "" "$" "e" + +"A" "" "" "a" +"E" "" "" "e" +"I" "" "" "i" +"O" "" "" "o" +"P" "" "" "o" +"U" "" "" "u" + +"B" "" "" "b" +"V" "" "" "v" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt new file mode 100644 index 0000000..1093912 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// GENERAL +"h" "" "$" "" + +// VOICED - UNVOICED CONSONANTS +"b" "" "[fktSs]" "p" +"b" "" "p" "" +"b" "" "$" "p" +"p" "" "[vgdZz]" "b" // Ashk: "v" excluded (everythere) +"p" "" "b" "" + +"v" "" "[pktSs]" "f" +"v" "" "f" "" +"v" "" "$" "f" +"f" "" "[vbgdZz]" "v" +"f" "" "v" "" + +"g" "" "[pftSs]" "k" +"g" "" "k" "" +"g" "" "$" "k" +"k" "" "[vbdZz]" "g" +"k" "" "g" "" + +"d" "" "[pfkSs]" "t" +"d" "" "t" "" +"d" "" "$" "t" +"t" "" "[vbgZz]" "d" +"t" "" "d" "" + +"s" "" "dZ" "" +"s" "" "tS" "" + +"z" "" "[pfkSt]" "s" +"z" "" "[sSzZ]" "" +"s" "" "[sSzZ]" "" +"Z" "" "[sSzZ]" "" +"S" "" "[sSzZ]" "" + +// SIMPLIFICATION OF CONSONANT CLUSTERS +"jnm" "" "" "jm" + +// DOUBLE --> SINGLE +"ji" "^" "" "i" +"jI" "^" "" "I" + +"a" "" "[aA]" "" +"a" "A" "" "" +"A" "" "A" "" + +"b" "" "b" "" +"d" "" "d" "" +"f" "" "f" "" +"g" "" "g" "" +"j" "" "j" "" +"k" "" "k" "" +"l" "" "l" "" +"m" "" "m" "" +"n" "" "n" "" +"p" "" "p" "" +"r" "" "r" "" +"t" "" "t" "" +"v" "" "v" "" +"z" "" "z" "" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt new file mode 100644 index 0000000..4f2ead1 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"l" "" "" "" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt new file mode 100644 index 0000000..742fc71 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_approx_common + +"H" "" "" "" + +// VOICED - UNVOICED CONSONANTS +"s" "[^t]" "[bgZd]" "z" +"Z" "" "[pfkst]" "S" +"Z" "" "$" "S" +"S" "" "[bgzd]" "Z" +"z" "" "$" "s" + +"ji" "[aAoOeEiIuU]" "" "j" +"jI" "[aAoOeEiIuU]" "" "j" +"je" "[aAoOeEiIuU]" "" "j" +"jE" "[aAoOeEiIuU]" "" "j" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt new file mode 100644 index 0000000..474f61b --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt new file mode 100644 index 0000000..474f61b --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt new file mode 100644 index 0000000..474f61b --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_russian \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt new file mode 100644 index 0000000..7a648f2 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_any \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt new file mode 100644 index 0000000..325ff34 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"N" "" "" "n" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt new file mode 100644 index 0000000..babed2a --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"B" "" "" "a" +"F" "" "" "e" +"P" "" "" "o" + +"E" "" "" "e" +"I" "" "" "i" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt new file mode 100644 index 0000000..0a016e0 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"E" "" "" "e" +"I" "" "" "i" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt new file mode 100644 index 0000000..e555114 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"B" "" "" "b" +"V" "" "" "v" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt new file mode 100644 index 0000000..0990004 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt new file mode 100644 index 0000000..2ae2d9d --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include gen_exact_approx_common + +"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc] +"tS" "" "" "C" // same reason +"S" "" "" "s" +"p" "" "" "f" +"b" "^" "" "b" +"b" "" "" "(b|v)" + +"ja" "" "" "i" +"jA" "" "" "i" +"je" "" "" "i" +"jE" "" "" "i" +"aj" "" "" "i" +"Aj" "" "" "i" +"I" "" "" "i" +"j" "" "" "i" + +"a" "^" "" "1" +"A" "^" "" "1" +"e" "^" "" "1" +"E" "^" "" "1" +"Y" "^" "" "1" + +"a" "" "$" "1" +"A" "" "$" "1" +"e" "" "$" "1" +"E" "" "$" "1" +"Y" "" "$" "1" + +"a" "" "" "" +"A" "" "" "" +"e" "" "" "" +"E" "" "" "" +"Y" "" "" "" + +"oj" "^" "" "(u|vi)" +"Oj" "^" "" "(u|vi)" +"uj" "^" "" "(u|vi)" +"Uj" "^" "" "(u|vi)" + +"oj" "" "" "u" +"Oj" "" "" "u" +"uj" "" "" "u" +"Uj" "" "" "u" + +"ou" "^" "" "(u|v|1)" +"o" "^" "" "(u|v|1)" +"O" "^" "" "(u|v|1)" +"U" "^" "" "(u|v|1)" +"u" "^" "" "(u|v|1)" + +"o" "" "$" "(u|1)" +"O" "" "$" "(u|1)" +"u" "" "$" "(u|1)" +"U" "" "$" "(u|1)" + +"ou" "" "" "u" +"o" "" "" "u" +"O" "" "" "u" +"U" "" "" "u" + +"VV" "" "" "u" // alef/ayin + vov from ruleshebrew +"V" "" "" "v" // tsvey-vov from ruleshebrew;; only Ashkenazic +"L" "^" "" "1" // alef/ayin from ruleshebrew +"L" "" "$" "1" // alef/ayin from ruleshebrew +"L" "" "" " " // alef/ayin from ruleshebrew +"WW" "^" "" "(vi|u)" // vav-yod from ruleshebrew +"WW" "" "" "u" // vav-yod from ruleshebrew +"W" "^" "" "(u|v)" // vav from ruleshebrew +"W" "" "" "u" // vav from ruleshebrew + + //"g" "" "" "(g|Z)" + //"z" "" "" "(z|Z)" + //"d" "" "" "(d|dZ)" + +"TB" "" "$" "(t|s)" // tav from ruleshebrew; only Ashkenazic +"TB" "" "" "t" // tav from ruleshebrew; only Ashkenazic +"T" "" "" "t" // tet from ruleshebrew + + //"k" "" "" "(k|x)" + //"x" "" "" "(k|x)" +"K" "" "" "k" // kof and initial kaf from ruleshebrew +"X" "" "" "x" // khet and final kaf from ruleshebrew + +"H" "^" "" "(x|1)" +"H" "" "$" "(x|1)" +"H" "" "" "(x|)" +"h" "^" "" "1" +"h" "" "" "" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt new file mode 100644 index 0000000..50f1118 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +any +arabic +cyrillic +czech +dutch +english +french +german +greek +greeklatin +hebrew +hungarian +italian +polish +portuguese +romanian +russian +spanish +turkish http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt new file mode 100644 index 0000000..57bb939 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + // format of each entry rule in the table + // (pattern, left context, right context, phonetic) + // where + // pattern is a sequence of characters that might appear in the word to be transliterated + // left context is the context that precedes the pattern + // right context is the context that follows the pattern + // phonetic is the result that this rule generates + // + // note that both left context and right context can be regular expressions + // ex: left context of ^ would mean start of word + // left context of [aeiouy] means following a vowel + // right context of [^aeiouy] means preceding a consonant + // right context of e$ means preceding a final e + +//GENERIC + +// CONVERTING FEMININE TO MASCULINE +"yna" "" "$" "(in[russian]|ina)" +"ina" "" "$" "(in[russian]|ina)" +"liova" "" "$" "(lova|lof[russian]|lef[russian])" +"lova" "" "$" "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])" +"kova" "" "$" "(kova|kof[russian]|k[czech]|ek[czech])" +"ova" "" "$" "(ova|of[russian]|[czech])" +"ová" "" "$" "(ova|[czech])" +"eva" "" "$" "(eva|ef[russian])" +"aia" "" "$" "(aja|i[russian])" +"aja" "" "$" "(aja|i[russian])" +"aya" "" "$" "(aja|i[russian])" + +"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])" +"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])" +"owa" "" "$" "(ova|of[polish]|)" +"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" +"kowna" "" "$" "(kovna|k[polish]|ek[polish])" +"owna" "" "$" "(ovna|[polish])" +"lówna" "" "$" "(l|el)" // polish +"kówna" "" "$" "(k|ek)" // polish +"ówna" "" "$" "" // polish +"á" "" "$" "(a|i[czech])" +"a" "" "$" "(a|i[polish+czech])" + +// CONSONANTS +"pf" "" "" "(pf|p|f)" +"que" "" "$" "(k[french]|ke|kve)" +"qu" "" "" "(kv|k)" + +"m" "" "[bfpv]" "(m|n)" +"m" "[aeiouy]" "[aeiouy]" "m" +"m" "[aeiouy]" "" "(m|n[french+portuguese])" // nasal + +"ly" "" "[au]" "l" +"li" "" "[au]" "l" +"lio" "" "" "(lo|le[russian])" +"lyo" "" "" "(lo|le[russian])" + //array("ll" "" "" "(l|J[spanish])" // Disabled Argentinian rule +"lt" "u" "$" "(lt|[french])" + +"v" "^" "" "(v|f[german]|b[spanish])" + +"ex" "" "[aáuiÃoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" +"ex" "" "[cs]" "(e[portuguese]|ek)" +"x" "u" "$" "(ks|[french])" + +"ck" "" "" "(k|tsk[polish+czech])" +"cz" "" "" "(tS|tsz[czech])" // Polish + + //Proceccing of "h" in various combinations +"rh" "^" "" "r" +"dh" "^" "" "d" +"bh" "^" "" "b" + +"ph" "" "" "(ph|f)" +"kh" "" "" "(x[russian+english]|kh)" + +"lh" "" "" "(lh|l[portuguese])" +"nh" "" "" "(nh|nj[portuguese])" + +"ssch" "" "" "S" // german +"chsch" "" "" "xS" // german +"tsch" "" "" "tS" // german + + ///"desch" "^" "" "deS" + ///"desh" "^" "" "(dES|de[french])" + ///"des" "^" "[^aeiouy]" "(dEs|de[french])" + +"sch" "[aeiouy]" "[ei]" "(S|StS[russian]|sk[romanian+italian])" +"sch" "[aeiouy]" "" "(S|StS[russian])" +"sch" "" "[ei]" "(sk[romanian+italian]|S|StS[russian])" +"sch" "" "" "(S|StS[russian])" +"ssh" "" "" "S" + +"sh" "" "[äöü]" "sh" // german +"sh" "" "[aeiou]" "(S[russian+english]|sh)" +"sh" "" "" "S" + +"zh" "" "" "(Z[english+russian]|zh|tsh[german])" + +"chs" "" "" "(ks[german]|xs|tSs[russian+english])" +"ch" "" "[ei]" "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])" +"ch" "" "" "(x|tS[spanish+english+russian]|S[portuguese+french])" + +"th" "^" "" "t" // english+german+greeklatin +"th" "" "[äöüaeiou]" "(t[english+german+greeklatin]|th)" +"th" "" "" "t" // english+german+greeklatin + +"gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" + +"ouh" "" "[aioe]" "(v[french]|uh)" +"uh" "" "[aioe]" "(v|uh)" +"h" "." "$" "" // match h at the end of words, but not as a single letter +"h" "[aeiouyäöü]" "" "" // german +"h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" + + //Processing of "ci" "ce" & "cy" +"cia" "" "" "(tSa[polish]|tsa)" // Polish +"ciÄ " "" "[bp]" "(tSom|tsom)" // Polish +"ciÄ " "" "" "(tSon[polish]|tson)" // Polish +"ciÄ" "" "[bp]" "(tSem[polish]|tsem)" // Polish +"ciÄ" "" "" "(tSen[polish]|tsen)" // Polish +"cie" "" "" "(tSe[polish]|tse)" // Polish +"cio" "" "" "(tSo[polish]|tso)" // Polish +"ciu" "" "" "(tSu[polish]|tsu)" // Polish + +"sci" "" "$" "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" +"sc" "" "[ei]" "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)" +"ci" "" "$" "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" +"cy" "" "" "(si|tsi[polish])" +"c" "" "[ei]" "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)" + + //Processing of "s" +"sç" "" "[aeiou]" "(s|stS[turkish])" +"ssz" "" "" "S" // polish +"sz" "^" "" "(S|s[hungarian])" // polish +"sz" "" "$" "(S|s[hungarian])" // polish +"sz" "" "" "(S|s[hungarian]|sts[german])" // polish +"ssp" "" "" "(Sp[german]|sp)" +"sp" "" "" "(Sp[german]|sp)" +"sst" "" "" "(St[german]|st)" +"st" "" "" "(St[german]|st)" +"ss" "" "" "s" +"sj" "^" "" "S" // dutch +"sj" "" "$" "S" // dutch +"sj" "" "" "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])" + +"sia" "" "" "(Sa[polish]|sa[polish]|sja)" +"siÄ " "" "[bp]" "(Som[polish]|som)" // polish +"siÄ " "" "" "(Son[polish]|son)" // polish +"siÄ" "" "[bp]" "(Sem[polish]|sem)" // polish +"siÄ" "" "" "(Sen[polish]|sen)" // polish +"sie" "" "" "(se|sje|Se[polish]|zi[german])" + +"sio" "" "" "(So[polish]|so)" +"siu" "" "" "(Su[polish]|sju)" + +"si" "[äöëaáuiÃoóeéêy]" "" "(Si[polish]|si|zi[portuguese+french+italian+german])" +"si" "" "" "(Si[polish]|si|zi[german])" +"s" "[aáuiÃoóeéêy]" "[aáuÃoóeéêy]" "(s|z[portuguese+french+italian+german])" +"s" "" "[aeouäöë]" "(s|z[german])" +"s" "[aeiouy]" "[dglmnrv]" "(s|z|Z[portuguese]|[french])" // Groslot +"s" "" "[dglmnrv]" "(s|z|Z[portuguese])" + + //Processing of "g" +"gue" "" "$" "(k[french]|gve)" // portuguese+spanish +"gu" "" "[ei]" "(g[french]|gv[portuguese+spanish])" // portuguese+spanish +"gu" "" "[ao]" "gv" // portuguese+spanish +"guy" "" "" "gi" // french + +"gli" "" "" "(glI|l[italian])" +"gni" "" "" "(gnI|ni[italian+french])" +"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)" + +"ggie" "" "" "(je[greeklatin]|dZe)" // dZ is Italian +"ggi" "" "[aou]" "(j[greeklatin]|dZ)" // dZ is Italian + +"ggi" "[yaeiou]" "[aou]" "(gI|dZ[italian]|j[greeklatin])" +"gge" "[yaeiou]" "" "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])" +"ggi" "[yaeiou]" "" "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])" +"ggi" "" "[aou]" "(gI|dZ[italian]|j[greeklatin])" + +"gie" "" "$" "(ge|gi[german]|ji[french]|dZe[italian])" +"gie" "" "" "(ge|gi[german]|dZe[italian]|je[greeklatin])" +"gi" "" "[aou]" "(i[greeklatin]|dZ)" // dZ is Italian + +"ge" "[yaeiou]" "" "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" +"gi" "[yaeiou]" "" "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" +"ge" "" "" "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" +"gi" "" "" "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" +"gy" "" "[aeouáéóúüöÅű]" "(gi|dj[hungarian])" +"gy" "" "" "(gi|d[hungarian])" +"g" "[yaeiou]" "[aouyei]" "g" +"g" "" "[aouei]" "(g|h[russian])" + + //Processing of "j" +"ij" "" "" "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])" +"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])" + + //Processing of "z" +"rz" "t" "" "(S[polish]|r)" // polish +"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" + +"tz" "" "$" "(ts|tS[english+german])" +"tz" "^" "" "(ts[english+german+russian]|tS[english+german])" +"tz" "" "" "(ts[english+german+russian]|tz)" + +"zia" "" "[bcdgkpstwzż]" "(Za[polish]|za[polish]|zja)" +"zia" "" "" "(Za[polish]|zja)" +"ziÄ " "" "[bp]" "(Zom[polish]|zom)" // polish +"ziÄ " "" "" "(Zon[polish]|zon)" // polish +"ziÄ" "" "[bp]" "(Zem[polish]|zem)" // polish +"ziÄ" "" "" "(Zen[polish]|zen)" // polish +"zie" "" "[bcdgkpstwzż]" "(Ze[polish]|ze[polish]|ze|tsi[german])" +"zie" "" "" "(ze|Ze[polish]|tsi[german])" +"zio" "" "" "(Zo[polish]|zo)" +"ziu" "" "" "(Zu[polish]|zju)" +"zi" "" "" "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])" + +"z" "" "$" "(s|ts[german]|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr +"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr +"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp + + // VOWELS +"aue" "" "" "aue" +"oue" "" "" "(oue|ve[french])" +"eau" "" "" "o" // French + +"ae" "" "" "(Y[german]|aje[russian]|ae)" +"ai" "" "" "aj" +"au" "" "" "(au|o[french])" +"ay" "" "" "aj" +"ão" "" "" "(au|an)" // Port +"ãe" "" "" "(aj|an)" // Port +"ãi" "" "" "(aj|an)" // Port +"ea" "" "" "(ea|ja[romanian])" +"ee" "" "" "(i[english]|aje[russian]|e)" +"ei" "" "" "(aj|ej)" +"eu" "" "" "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])" +"ey" "" "" "(aj|ej)" +"ia" "" "" "ja" +"ie" "" "" "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)" +"ii" "" "$" "i" // russian +"io" "" "" "(jo|e[russian])" +"iu" "" "" "ju" +"iy" "" "$" "i" // russian +"oe" "" "" "(Y[german]|oje[russian]|u[dutch]|oe)" +"oi" "" "" "oj" +"oo" "" "" "(u[english]|o)" +"ou" "" "" "(ou|u[french+greeklatin]|au[dutch])" +"où" "" "" "u" // french +"oy" "" "" "oj" +"õe" "" "" "(oj|on)" // Port +"ua" "" "" "va" +"ue" "" "" "(Q[german]|uje[russian]|ve)" +"ui" "" "" "(uj|vi|Y[dutch])" +"uu" "" "" "(u|Q[dutch])" +"uo" "" "" "(vo|o)" +"uy" "" "" "uj" +"ya" "" "" "ja" +"ye" "" "" "(je|ije[russian])" +"yi" "^" "" "i" +"yi" "" "$" "i" // russian +"yo" "" "" "(jo|e[russian])" +"yu" "" "" "ju" +"yy" "" "$" "i" // russian + +"i" "[áóéê]" "" "j" +"y" "[áóéê]" "" "j" + +"e" "^" "" "(e|je[russian])" +"e" "" "$" "(e|EE[english+french])" + +// LANGUAGE SPECIFIC CHARACTERS +"Ä " "" "[bp]" "om" // polish +"Ä " "" "" "on" // polish +"ä" "" "" "Y" +"á" "" "" "a" // Port & Sp +"à " "" "" "a" +"â" "" "" "a" +"ã" "" "" "(a|an)" // Port +"Ä" "" "" "(e[romanian]|a)" // romanian +"Ä" "" "" "tS" // czech +"Ä" "" "" "(tS[polish]|ts)" // polish +"ç" "" "" "(s|tS[turkish])" +"Ä" "" "" "(d|dj[czech])" +"Ä" "" "[bp]" "em" // polish +"Ä" "" "" "en" // polish +"é" "" "" "e" +"è" "" "" "e" +"ê" "" "" "e" +"Ä" "" "" "(e|je[czech])" +"Ä" "" "" "" // turkish +"Ã" "" "" "i" +"î" "" "" "i" +"ı" "" "" "(i|e[turkish]|[turkish])" +"Å" "" "" "l" +"Å" "" "" "(n|nj[polish])" // polish +"ñ" "" "" "(n|nj[spanish])" +"ó" "" "" "(u[polish]|o)" +"ô" "" "" "o" // Port & Fr +"õ" "" "" "(o|on[portuguese]|Y[hungarian])" +"ò" "" "" "o" // Sp & It +"ö" "" "" "Y" +"Å" "" "" "(r|rZ[czech])" +"Å" "" "" "(S[polish]|s)" +"Å" "" "" "S" // romanian+turkish +"Å¡" "" "" "S" // czech +"Å£" "" "" "ts" // romanian +"Å¥" "" "" "(t|tj[czech])" +"ű" "" "" "Q" // hungarian +"ü" "" "" "(Q|u[portuguese+spanish])" +"ú" "" "" "u" +"ů" "" "" "u" // czech +"ù" "" "" "u" // french +"ý" "" "" "i" // czech +"ż" "" "" "Z" // polish +"ź" "" "" "(Z[polish]|z)" + +"Ã" "" "" "s" // german +"'" "" "" "" // russian +"\"" "" "" "" // russian + +"o" "" "[bcÄdgklÅmnÅrsÅtwzźż]" "(O|P[polish])" + + // LATIN ALPHABET +"a" "" "" "A" +"b" "" "" "B" +"c" "" "" "(k|ts[polish+czech]|dZ[turkish])" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" + //array("g" "" "" "(g|x[dutch])" // Dutch sound disabled +"g" "" "" "g" +"h" "" "" "(h|x[romanian]|H[french+portuguese+italian+spanish])" +"i" "" "" "I" +"j" "" "" "(j|x[spanish]|Z[french+romanian+turkish+portuguese])" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "O" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "(s|S[portuguese])" +"t" "" "" "t" +"u" "" "" "U" +"v" "" "" "V" +"w" "" "" "(v|w[english+dutch])" +"x" "" "" "(ks|gz|S[portuguese+spanish])" // S/ks Port & Sp, gz Sp, It only ks +"y" "" "" "i" +"z" "" "" "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt new file mode 100644 index 0000000..00f85e8 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +"ا" "" "" "a" // alif isol & init + +"ب" "" "" "b1" // ba' isol + +"ت" "" "" "t1" // ta' isol + +"Ø«" "" "" "t1" // tha' isol + +"ج" "" "" "(dZ1|Z1)" // jim isol + +"Ø" "" "" "(h1|1)" // h.a' isol + +"Ø®" "" "" "x1" // kha' isol + +"د" "" "" "d1" // dal isol & init + +"ذ" "" "" "d1" // dhal isol & init + +"ر" "" "" "r1" // dhal isol & init + +"ز" "" "" "z1" // za' isol & init + +"س" "" "" "s1" // sin isol + +"Ø´" "" "" "S1" // shin isol + +"ص" "" "" "s1" // s.ad isol + +"ض" "" "" "d1" // d.ad isol + +"Ø·" "" "" "t1" // t.a' isol + +"ظ" "" "" "z1" // z.a' isol + +"ع" "" "" "(h1|1)" // ayin isol + +"غ" "" "" "g1" // ghayin isol + +"Ù" "" "" "f1" // fa' isol + +"Ù" "" "" "k1" // qaf isol + +"Ù" "" "" "k1" // kaf isol + +"Ù" "" "" "l1" // lam isol + +"Ù " "" "" "m1" // mim isol + +"Ù" "" "" "n1" // nun isol + +"Ù" "" "" "(h1|1)" // h isol + +"Ù" "" "" "(u|v1)" // waw, isol + init + + +"Ùâ" "" "" "(i|j1)" // ya' isol http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt new file mode 100644 index 0000000..6237de4 --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// GENERAL +"ÑÑ" "" "" "tsa" +"ÑÑ" "" "" "tsu" +"Ñиа" "" "" "tsa" +"Ñие" "" "" "tse" +"Ñио" "" "" "tso" +"ÑиÑ" "" "" "tsu" +"Ñие" "" "" "se" +"Ñио" "" "" "so" +"зие" "" "" "ze" +"зио" "" "" "zo" +"Ñ" "" "Ñ" "" + +"гаÑз" "" "$" "haus" +"гаÑÑ" "" "$" "haus" +"голÑÑ" "" "$" "holts" +"геймеÑ" "" "$" "(hejmer|hajmer)" +"гейм" "" "$" "(hejm|hajm)" +"гоÑ" "" "$" "hof" +"геÑ" "" "$" "ger" +"ген" "" "$" "gen" +"гин" "" "$" "gin" +"г" "(й|Ñ|Ñ|Ñ|Ñ|а|е|о|и|Ñ)" "(а|е|о|и|Ñ)" "g" +"г" "" "(а|е|о|и|Ñ)" "(g|h)" + +"лÑ" "" "" "la" +"лÑ" "" "" "lu" +"лÑ" "" "" "(le|lo)" +"лио" "" "" "(le|lo)" +"ле" "" "" "(lE|lo)" + +"ийе" "" "" "je" +"ие" "" "" "je" +"Ñйе" "" "" "je" +"Ñе" "" "" "je" +"ий" "" "(а|о|Ñ)" "j" +"Ñй" "" "(а|о|Ñ)" "j" +"ий" "" "$" "i" +"Ñй" "" "$" "i" + +"ей" "^" "" "(jej|ej)" +"е" "(а|е|о|Ñ)" "" "je" +"е" "^" "" "je" +"Ñй" "" "" "ej" +"ей" "" "" "ej" + +"аÑе" "" "" "aue" +"аÑÑ" "" "" "aue" + +"а" "" "" "a" +"б" "" "" "b" +"в" "" "" "v" +"г" "" "" "g" +"д" "" "" "d" +"е" "" "" "E" +"Ñ" "" "" "(e|jo)" +"ж" "" "" "Z" +"з" "" "" "z" +"и" "" "" "I" +"й" "" "" "j" +"к" "" "" "k" +"л" "" "" "l" +"м" "" "" "m" +"н" "" "" "n" +"о" "" "" "o" +"п" "" "" "p" +"Ñ" "" "" "r" +"Ñ" "" "" "s" +"Ñ" "" "" "t" +"Ñ" "" "" "u" +"Ñ" "" "" "f" +"Ñ " "" "" "x" +"Ñ" "" "" "ts" +"Ñ" "" "" "tS" +"Ñ" "" "" "S" +"Ñ" "" "" "StS" +"Ñ" "" "" "" +"Ñ" "" "" "I" +"Ñ" "" "" "" +"Ñ" "" "" "E" +"Ñ" "" "" "ju" +"Ñ" "" "" "ja" http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt new file mode 100644 index 0000000..bc7a79c --- /dev/null +++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"ch" "" "" "x" +"qu" "" "" "(k|kv)" +"aue" "" "" "aue" +"ei" "" "" "(ej|aj)" +"i" "[aou]" "" "j" +"i" "" "[aeou]" "j" + +"Ä" "" "" "tS" +"Å¡" "" "" "S" +"Å" "" "" "n" +"Å¥" "" "" "(t|tj)" +"Ä" "" "" "(d|dj)" +"Å" "" "" "(r|rZ)" + +"á" "" "" "a" +"é" "" "" "e" +"Ã" "" "" "i" +"ó" "" "" "o" +"ú" "" "" "u" +"ý" "" "" "i" +"Ä" "" "" "(e|je)" +"ů" "" "" "u" + +// LATIN ALPHABET +"a" "" "" "a" +"b" "" "" "b" +"c" "" "" "ts" +"d" "" "" "d" +"e" "" "" "E" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "(h|g)" +"i" "" "" "I" +"j" "" "" "j" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"q" "" "" "(k|kv)" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"w" "" "" "v" +"x" "" "" "ks" +"y" "" "" "i" +"z" "" "" "z"
