http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs new file mode 100644 index 0000000..a4940f5 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs @@ -0,0 +1,93 @@ +using Lucene.Net.Util; +using NUnit.Framework; +using System; + +namespace Lucene.Net.Analysis.Ja.Util +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class UnknownDictionaryTest : LuceneTestCase + { + public static readonly string FILENAME = "unk-tokeninfo-dict.obj"; + + [Test] + public void TestPutCharacterCategory() + { + UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024); + + try + { + unkDic.PutCharacterCategory(0, "DUMMY_NAME"); + fail(); + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + + } + + try + { + unkDic.PutCharacterCategory(-1, "KATAKANA"); + fail(); + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + + } + + unkDic.PutCharacterCategory(0, "DEFAULT"); + unkDic.PutCharacterCategory(1, "GREEK"); + unkDic.PutCharacterCategory(2, "HIRAGANA"); + unkDic.PutCharacterCategory(3, "KATAKANA"); + unkDic.PutCharacterCategory(4, "KANJI"); + } + + [Test] + public void TestPut() + { + UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024); + try + { + unkDic.Put(CSVUtil.Parse("KANJI,1285,11426,åè©,ä¸è¬,*,*,*,*,*,*,*")); + fail(); + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + + } + + String entry1 = "ALPHA,1285,1285,13398,åè©,ä¸è¬,*,*,*,*,*,*,*"; + String entry2 = "HIRAGANA,1285,1285,13069,åè©,ä¸è¬,*,*,*,*,*,*,*"; + String entry3 = "KANJI,1285,1285,11426,åè©,ä¸è¬,*,*,*,*,*,*,*"; + + unkDic.PutCharacterCategory(0, "ALPHA"); + unkDic.PutCharacterCategory(1, "HIRAGANA"); + unkDic.PutCharacterCategory(2, "KANJI"); + + unkDic.Put(CSVUtil.Parse(entry1)); + unkDic.Put(CSVUtil.Parse(entry2)); + unkDic.Put(CSVUtil.Parse(entry3)); + } + } +}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs new file mode 100644 index 0000000..2922b27 --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs @@ -0,0 +1,121 @@ +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Analysis.Ja.Util +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class TestToStringUtil : LuceneTestCase + { + [Test] + public void TestPOS() + { + assertEquals("noun-suffix-verbal", ToStringUtil.GetPOSTranslation("åè©-æ¥å°¾-ãµå¤æ¥ç¶")); + } + + [Test] + public void TestHepburn() + { + assertEquals("majan", ToStringUtil.GetRomanization("ãã¼ã¸ã£ã³")); + assertEquals("uroncha", ToStringUtil.GetRomanization("ã¦ã¼ãã³ãã£")); + assertEquals("chahan", ToStringUtil.GetRomanization("ãã£ã¼ãã³")); + assertEquals("chashu", ToStringUtil.GetRomanization("ãã£ã¼ã·ã¥ã¼")); + assertEquals("shumai", ToStringUtil.GetRomanization("ã·ã¥ã¼ãã¤")); + } + + // see http://en.wikipedia.org/wiki/Hepburn_romanization, + // but this isnt even thorough or really probably what we want! + [Test] + public void TestHepburnTable() + { + IDictionary<String, String> table = new Dictionary<String, String>() { + { "ã¢", "a" }, { "ã¤", "i" }, { "ã¦", "u" }, { "ã¨", "e" }, { "ãª", "o" }, + { "ã«", "ka" }, { "ã", "ki" }, { "ã¯", "ku" }, { "ã±", "ke" }, { "ã³", "ko" }, + { "ãµ", "sa" }, { "ã·", "shi" }, { "ã¹", "su" }, { "ã»", "se" }, { "ã½", "so" }, + { "ã¿", "ta" }, { "ã", "chi" }, { "ã", "tsu" }, { "ã", "te" }, { "ã", "to" }, + { "ã", "na" }, { "ã", "ni" }, { "ã", "nu" }, { "ã", "ne" }, { "ã", "no" }, + { "ã", "ha" }, { "ã", "hi" }, { "ã", "fu" }, { "ã", "he" }, { "ã", "ho" }, + { "ã", "ma" }, { "ã", "mi" }, { "ã ", "mu" }, { "ã¡", "me" }, { "ã¢", "mo" }, + { "ã¤", "ya" }, { "ã¦", "yu" }, { "ã¨", "yo" }, + { "ã©", "ra" }, { "ãª", "ri" }, { "ã«", "ru" }, { "ã¬", "re" }, { "ã", "ro" }, + { "ã¯", "wa" }, { "ã°", "i" }, { "ã±", "e" }, { "ã²", "o" }, + { "ã³", "n" }, + { "ã¬", "ga" }, { "ã®", "gi" }, { "ã°", "gu" }, { "ã²", "ge" }, { "ã´", "go" }, + { "ã¶", "za" }, { "ã¸", "ji" }, { "ãº", "zu" }, { "ã¼", "ze" }, { "ã¾", "zo" }, + { "ã", "da" }, { "ã", "ji" }, { "ã ", "zu" }, { "ã", "de" }, { "ã", "do" }, + { "ã", "ba" }, { "ã", "bi" }, { "ã", "bu" }, { "ã", "be" }, { "ã", "bo" }, + { "ã", "pa" }, { "ã", "pi" }, { "ã", "pu" }, { "ã", "pe" }, { "ã", "po" }, + + { "ãã£", "kya" }, { "ãã¥", "kyu" }, { "ãã§", "kyo" }, + { "ã·ã£", "sha" }, { "ã·ã¥", "shu" }, { "ã·ã§", "sho" }, + { "ãã£", "cha" }, { "ãã¥", "chu" }, { "ãã§", "cho" }, + { "ãã£", "nya" }, { "ãã¥", "nyu" }, { "ãã§", "nyo" }, + { "ãã£", "hya" }, { "ãã¥", "hyu" }, { "ãã§", "hyo" }, + { "ãã£", "mya" }, { "ãã¥", "myu" }, { "ãã§", "myo" }, + { "ãªã£", "rya" }, { "ãªã¥", "ryu" }, { "ãªã§", "ryo" }, + { "ã®ã£", "gya" }, { "ã®ã¥", "gyu" }, { "ã®ã§", "gyo" }, + { "ã¸ã£", "ja" }, { "ã¸ã¥", "ju" }, { "ã¸ã§", "jo" }, + { "ãã£", "ja" }, { "ãã¥", "ju" }, { "ãã§", "jo" }, + { "ãã£", "bya" }, { "ãã¥", "byu" }, { "ãã§", "byo" }, + { "ãã£", "pya" }, { "ãã¥", "pyu" }, { "ãã§", "pyo" }, + + { "ã¤ã£", "yi" }, { "ã¤ã§", "ye" }, + { "ã¦ã¡", "wa" }, { "ã¦ã£", "wi" }, { "ã¦ã¥", "wu" }, { "ã¦ã§", "we" }, { "ã¦ã©", "wo" }, + { "ã¦ã¥", "wyu" }, + // TODO: really should be vu + { "ã´ã¡", "va" }, { "ã´ã£", "vi" }, { "ã´", "v" }, { "ã´ã§", "ve" }, { "ã´ã©", "vo" }, + { "ã´ã£", "vya" }, { "ã´ã¥", "vyu" }, { "ã´ã£ã§", "vye" }, { "ã´ã§", "vyo" }, + { "ãã§", "kye" }, + { "ã®ã§", "gye" }, + { "ã¯ã¡", "kwa" }, { "ã¯ã£", "kwi" }, { "ã¯ã§", "kwe" }, { "ã¯ã©", "kwo" }, + { "ã¯ã®", "kwa" }, + { "ã°ã¡", "gwa" }, { "ã°ã£", "gwi" }, { "ã°ã§", "gwe" }, { "ã°ã©", "gwo" }, + { "ã°ã®", "gwa" }, + { "ã·ã§", "she" }, + { "ã¸ã§", "je" }, + { "ã¹ã£", "si" }, + { "ãºã£", "zi" }, + { "ãã§", "che" }, + { "ãã¡", "tsa" }, { "ãã£", "tsi" }, { "ãã§", "tse" }, { "ãã©", "tso" }, + { "ãã¥", "tsyu" }, + { "ãã£", "ti" }, { "ãã¥", "tu" }, + { "ãã¥", "tyu" }, + { "ãã£", "di" }, { "ãã¥", "du" }, + { "ãã¥", "dyu" }, + { "ãã§", "nye" }, + { "ãã§", "hye" }, + { "ãã§", "bye" }, + { "ãã§", "pye" }, + { "ãã¡", "fa" }, { "ãã£", "fi" }, { "ãã§", "fe" }, { "ãã©", "fo" }, + { "ãã£", "fya" }, { "ãã¥", "fyu" }, { "ãã£ã§", "fye" }, { "ãã§", "fyo" }, + { "ãã¥", "hu" }, + { "ãã§", "mye" }, + { "ãªã§", "rye" }, + { "ã©ã", "la" }, { "ãªã", "li" }, { "ã«ã", "lu" }, { "ã¬ã", "le" }, { "ãã", "lo" }, + { "ã·", "va" }, { "ã¸", "vi" }, { "ã¹", "ve" }, { "ãº", "vo" }, + }; + + foreach (String s in table.Keys) + { + assertEquals(s, table[s], ToStringUtil.GetRomanization(s)); + } + } + } +}
