(lucenenet) branch master updated: Respect ignoreCase flag in CommonGramsFilterFactory (#781)

nightowl888 Mon, 11 Nov 2024 09:53:42 -0800

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git



The following commit(s) were added to refs/heads/master by this push:
     new de280b0f6 Respect ignoreCase flag in CommonGramsFilterFactory (#781)
de280b0f6 is described below

commit de280b0f692ad9f2ab24d9a1f7e280a59b32263f
Author: H.H.Chen <[email protected]>
AuthorDate: Tue Nov 12 01:53:31 2024 +0800

    Respect ignoreCase flag in CommonGramsFilterFactory (#781)
    
    * Respect ignoreCase flag in CommonGramsFilterFactory
    
    * Add LUCENENET-specific backport comment
    
    * Use GetType instead of typeof for resource loader
    
    ---------
    
    Co-authored-by: Paul Irwin <[email protected]>
---
 .../CommonGrams/CommonGramsFilterFactory.cs        |  7 ++++---
 .../Commongrams/TestCommonGramsFilterFactory.cs    | 22 ++++++++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git 
a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
 
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
index 560b3a8c2..670151703 100644
--- 
a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
+++ 
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.CommonGrams
     /// </summary>
     public class CommonGramsFilterFactory : TokenFilterFactory, 
IResourceLoaderAware
     {
-        // TODO: shared base class for Stop/Keep/CommonGrams? 
+        // TODO: shared base class for Stop/Keep/CommonGrams?
         private CharArraySet commonWords;
         private readonly string commonWordFiles;
         private readonly string format;
@@ -71,7 +71,8 @@ namespace Lucene.Net.Analysis.CommonGrams
             }
             else
             {
-                commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+                // LUCENENET-specific: backported ignoreCase fix from Lucene 
8.10.0 (lucene#188, LUCENE-10008)
+                commonWords = new CharArraySet(m_luceneMatchVersion, 
StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
             }
         }
 
@@ -85,4 +86,4 @@ namespace Lucene.Net.Analysis.CommonGrams
             return commonGrams;
         }
     }
-}
\ No newline at end of file
+}
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
index 9655b6bd0..063fc1b22 100644
--- 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
+++ 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
@@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.CommonGrams
     /// Tests pretty much copied from StopFilterFactoryTest We use the test 
files
     /// used by the StopFilterFactoryTest TODO: consider creating separate 
test files
     /// so this won't break if stop filter test files change
-    /// 
+    ///
     /// </summary>
     public class TestCommonGramsFilterFactory : BaseTokenStreamFactoryTestCase
     {
@@ -79,6 +79,24 @@ namespace Lucene.Net.Analysis.CommonGrams
             AssertTokenStreamContents(stream, new string[] { "testing", 
"testing_the", "the", "the_factory", "factory" });
         }
 
+        // LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0 
(lucene#188, LUCENE-10008)
+        [Test]
+        public void TestIgnoreCase()
+        {
+            IResourceLoader loader = new ClasspathResourceLoader(GetType());
+            CommonGramsFilterFactory factory =
+                (CommonGramsFilterFactory)
+                TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, 
loader, "ignoreCase", "true");
+            CharArraySet words = factory.CommonWords;
+            assertTrue("words is null and it shouldn't be", words != null);
+            assertTrue(words.contains("the"));
+            assertTrue(words.contains("The"));
+            Tokenizer tokenizer = new MockTokenizer(new StringReader("testing 
The factory"),MockTokenizer.WHITESPACE, false);
+            TokenStream stream = factory.Create(tokenizer);
+            AssertTokenStreamContents(
+                stream, new string[] {"testing", "testing_The", "The", 
"The_factory", "factory"});
+        }
+
         /// <summary>
         /// Test that bogus arguments result in exception </summary>
         [Test]
@@ -95,4 +113,4 @@ namespace Lucene.Net.Analysis.CommonGrams
             }
         }
     }
-}
\ No newline at end of file
+}

(lucenenet) branch master updated: Respect ignoreCase flag in CommonGramsFilterFactory (#781)

Reply via email to