[36/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed NGram in Git

nightowl888 Sat, 04 Feb 2017 12:32:39 -0800

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
deleted file mode 100644
index 2fc1356..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ /dev/null
@@ -1,303 +0,0 @@
-ï»¿using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-        * Licensed to the Apache Software Foundation (ASF) under one or more
-        * contributor license agreements.  See the NOTICE file distributed with
-        * this work for additional information regarding copyright ownership.
-        * The ASF licenses this file to You under the Apache License, Version 
2.0
-        * (the "License"); you may not use this file except in compliance with
-        * the License.  You may obtain a copy of the License at
-        *
-        *     http://www.apache.org/licenses/LICENSE-2.0
-        *
-        * Unless required by applicable law or agreed to in writing, software
-        * distributed under the License is distributed on an "AS IS" BASIS,
-        * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
-        * See the License for the specific language governing permissions and
-        * limitations under the License.
-        */
-
-    /// <summary>
-    /// Tests <seealso cref="NGramTokenizer"/> for correctness.
-    /// </summary>
-    public class NGramTokenizerTest : BaseTokenStreamTestCase
-    {
-        private StringReader input;
-
-        public override void SetUp()
-        {
-            base.SetUp();
-            input = new StringReader("abcde");
-        }
-
-        [Test]
-        public virtual void TestInvalidInput()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput2()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestUnigrams()
-        {
-            NGramTokenizer tokenizer = new 
NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", 
"d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // 
abcde
-        }
-
-        [Test]
-        public virtual void TestBigrams()
-        {
-            NGramTokenizer tokenizer = new 
NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);
-            AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", 
"cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestNgrams()
-        {
-            NGramTokenizer tokenizer = new 
NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", 
"abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 
0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 
}, null, null, null, 5, false); // abcde
-        }
-
-        [Test]
-        public virtual void TestOversizedNgrams()
-        {
-            NGramTokenizer tokenizer = new 
NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);
-            AssertTokenStreamContents(tokenizer, new string[0], new int[0], 
new int[0], 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestReset()
-        {
-            NGramTokenizer tokenizer = new 
NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", 
"d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // 
abcde
-            tokenizer.SetReader(new StringReader("abcde"));
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", 
"d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // 
abcde
-        }
-
-        /// <summary>
-        /// blast some random strings through the analyzer </summary>
-        [Test]
-        public virtual void TestRandomStrings()
-        {
-            for (int i = 0; i < 10; i++)
-            {
-                int min = TestUtil.NextInt(Random(), 2, 10);
-                int max = TestUtil.NextInt(Random(), min, 20);
-                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, 
max);
-                CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
-                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 1027);
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper : Analyzer
-        {
-            private readonly NGramTokenizerTest outerInstance;
-
-            private int min;
-            private int max;
-
-            public AnalyzerAnonymousInnerClassHelper(NGramTokenizerTest 
outerInstance, int min, int max)
-            {
-                this.outerInstance = outerInstance;
-                this.min = min;
-                this.max = max;
-            }
-
-            protected internal override TokenStreamComponents 
CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 
reader, min, max);
-                return new TokenStreamComponents(tokenizer, tokenizer);
-            }
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, int length, 
string nonTokenChars)
-        {
-            //string s = RandomStrings.randomAsciiOfLength(Random(), length);
-            string s = TestUtil.RandomAnalysisString(Random(), length, true);
-            TestNGrams(minGram, maxGram, s, nonTokenChars);
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, string s, 
string nonTokenChars)
-        {
-            TestNGrams(minGram, maxGram, s, nonTokenChars, false);
-        }
-
-        internal static int[] toCodePoints(string s)
-        {
-            int[] codePoints = new int[Character.CodePointCount(s, 0, 
s.Length)];
-            for (int i = 0, j = 0; i < s.Length; ++j)
-            {
-                codePoints[j] = Character.CodePointAt(s, i);
-                i += Character.CharCount(codePoints[j]);
-            }
-            return codePoints;
-        }
-
-        internal static bool isTokenChar(string nonTokenChars, int codePoint)
-        {
-            for (int i = 0; i < nonTokenChars.Length;)
-            {
-                int cp = char.ConvertToUtf32(nonTokenChars, i);
-                if (cp == codePoint)
-                {
-                    return false;
-                }
-                i += Character.CharCount(cp);
-            }
-            return true;
-        }
-
-        internal static void TestNGrams(int minGram, int maxGram, string s, 
string nonTokenChars, bool edgesOnly)
-        {
-            // convert the string to code points
-            int[] codePoints = toCodePoints(s);
-            int[] offsets = new int[codePoints.Length + 1];
-            for (int i = 0; i < codePoints.Length; ++i)
-            {
-                offsets[i + 1] = offsets[i] + 
Character.CharCount(codePoints[i]);
-            }
-            TokenStream grams = new 
NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new 
StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
-            ICharTermAttribute termAtt = 
grams.AddAttribute<ICharTermAttribute>();
-            IPositionIncrementAttribute posIncAtt = 
grams.AddAttribute<IPositionIncrementAttribute>();
-            IPositionLengthAttribute posLenAtt = 
grams.AddAttribute<IPositionLengthAttribute>();
-            IOffsetAttribute offsetAtt = 
grams.AddAttribute<IOffsetAttribute>();
-            grams.Reset();
-            for (int start = 0; start < codePoints.Length; ++start)
-            {
-                for (int end = start + minGram; end <= start + maxGram && end 
<= codePoints.Length; ++end)
-                {
-                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, 
codePoints[start - 1]))
-                    {
-                        // not on an edge
-                        goto nextGramContinue;
-                    }
-                    for (int j = start; j < end; ++j)
-                    {
-                        if (!isTokenChar(nonTokenChars, codePoints[j]))
-                        {
-                            goto nextGramContinue;
-                        }
-                    }
-                    assertTrue(grams.IncrementToken());
-                    assertArrayEquals(Arrays.CopyOfRange(codePoints, start, 
end), toCodePoints(termAtt.ToString()));
-                    assertEquals(1, posIncAtt.PositionIncrement);
-                    assertEquals(1, posLenAtt.PositionLength);
-                    assertEquals(offsets[start], offsetAtt.StartOffset);
-                    assertEquals(offsets[end], offsetAtt.EndOffset);
-                    nextGramContinue:;
-                }
-                //nextGramBreak:;
-            }
-            assertFalse(grams.IncrementToken());
-            grams.End();
-            assertEquals(s.Length, offsetAtt.StartOffset);
-            assertEquals(s.Length, offsetAtt.EndOffset);
-        }
-
-        private class NGramTokenizerAnonymousInnerClassHelper : NGramTokenizer
-        {
-            private string nonTokenChars;
-
-            public NGramTokenizerAnonymousInnerClassHelper(LuceneVersion 
TEST_VERSION_CURRENT, StringReader java, int minGram, int maxGram, bool 
edgesOnly, string nonTokenChars)
-                  : base(TEST_VERSION_CURRENT, java, minGram, maxGram, 
edgesOnly)
-            {
-                this.nonTokenChars = nonTokenChars;
-            }
-
-            protected override bool IsTokenChar(int chr)
-            {
-                return nonTokenChars.IndexOf((char)chr) < 0;
-            }
-        }
-
-        [Test]
-        public virtual void TestLargeInput()
-        {
-            // test sliding
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 
4 * 1024), "");
-        }
-
-        [Test]
-        public virtual void TestLargeMaxGram()
-        {
-            // test sliding with maxGram > 1024
-            int minGram = TestUtil.NextInt(Random(), 1290, 1300);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 
4 * 1024), "");
-        }
-
-        [Test]
-        public virtual void TestPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 
1024), "a");
-        }
-
-        [Test]
-        public virtual void TestHeavyPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 
1024), "abcdef");
-        }
-
-        [Test]
-        public virtual void TestFewTokenChars()
-        {
-            char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
-            Arrays.Fill(chrs, ' ');
-            for (int i = 0; i < chrs.Length; ++i)
-            {
-                if (Random().NextDouble() < 0.1)
-                {
-                    chrs[i] = 'a';
-                }
-            }
-            int minGram = TestUtil.NextInt(Random(), 1, 2);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 2);
-            TestNGrams(minGram, maxGram, new string(chrs), " ");
-        }
-
-        [Test]
-        public virtual void TestFullUTF8Range()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
-            TestNGrams(minGram, maxGram, s, "");
-            TestNGrams(minGram, maxGram, s, "abcdef");
-        }
-    }
-}
\ No newline at end of file


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
deleted file mode 100644
index c0683a6..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
+++ /dev/null
@@ -1,196 +0,0 @@
-ï»¿using Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-using Reader = System.IO.TextReader;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-        * Licensed to the Apache Software Foundation (ASF) under one or more
-        * contributor license agreements.  See the NOTICE file distributed with
-        * this work for additional information regarding copyright ownership.
-        * The ASF licenses this file to You under the Apache License, Version 
2.0
-        * (the "License"); you may not use this file except in compliance with
-        * the License.  You may obtain a copy of the License at
-        *
-        *     http://www.apache.org/licenses/LICENSE-2.0
-        *
-        * Unless required by applicable law or agreed to in writing, software
-        * distributed under the License is distributed on an "AS IS" BASIS,
-        * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
-        * See the License for the specific language governing permissions and
-        * limitations under the License.
-        */
-
-    /// <summary>
-    /// Simple tests to ensure the NGram filter factories are working.
-    /// </summary>
-    public class TestNGramFilters : BaseTokenStreamFactoryTestCase
-    {
-        /// <summary>
-        /// Test NGramTokenizerFactory
-        /// </summary>
-        [Test]
-        public virtual void TestNGramTokenizer()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("NGram").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", 
"es", "s", "st", "t" });
-        }
-
-        /// <summary>
-        /// Test NGramTokenizerFactory with min and max gram options
-        /// </summary>
-        [Test]
-        public virtual void TestNGramTokenizer2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("NGram", "minGramSize", "2", 
"maxGramSize", "3").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "te", "tes", 
"es", "est", "st" });
-        }
-
-        /// <summary>
-        /// Test the NGramFilterFactory
-        /// </summary>
-        [Test]
-        public virtual void TestNGramFilter()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("NGram").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", 
"es", "s", "st", "t" });
-        }
-
-        /// <summary>
-        /// Test the NGramFilterFactory with min and max gram options
-        /// </summary>
-        [Test]
-        public virtual void TestNGramFilter2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("NGram", "minGramSize", "2", 
"maxGramSize", "3").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "te", "tes", 
"es", "est", "st" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("EdgeNGram").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory with min and max gram size
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("EdgeNGram", "minGramSize", 
"1", "maxGramSize", "2").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t", "te" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory with side option
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer3()
-        {
-            Reader reader = new StringReader("ready");
-#pragma warning disable 612, 618
-            TokenStream stream = TokenizerFactory("EdgeNGram", 
LuceneVersion.LUCENE_43, "side", "back").Create(reader);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(stream, new string[] { "y" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("EdgeNGram").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory with min and max gram size
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("EdgeNGram", "minGramSize", "1", 
"maxGramSize", "2").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t", "te" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory with side option
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter3()
-        {
-            Reader reader = new StringReader("ready");
-            TokenStream stream = new MockTokenizer(reader, 
MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
-            stream = TokenFilterFactory("EdgeNGram", LuceneVersion.LUCENE_43, 
"side", "back").Create(stream);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(stream, new string[] { "y" });
-        }
-
-        /// <summary>
-        /// Test that bogus arguments result in exception </summary>
-        [Test]
-        public virtual void TestBogusArguments()
-        {
-            try
-            {
-                TokenizerFactory("NGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenizerFactory("EdgeNGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenFilterFactory("NGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenFilterFactory("EdgeNGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-        }
-    }
-}
\ No newline at end of file

[36/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed NGram in Git

Reply via email to