This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new de280b0f6 Respect ignoreCase flag in CommonGramsFilterFactory (#781)
de280b0f6 is described below
commit de280b0f692ad9f2ab24d9a1f7e280a59b32263f
Author: H.H.Chen <[email protected]>
AuthorDate: Tue Nov 12 01:53:31 2024 +0800
Respect ignoreCase flag in CommonGramsFilterFactory (#781)
* Respect ignoreCase flag in CommonGramsFilterFactory
* Add LUCENENET-specific backport comment
* Use GetType instead of typeof for resource loader
---------
Co-authored-by: Paul Irwin <[email protected]>
---
.../CommonGrams/CommonGramsFilterFactory.cs | 7 ++++---
.../Commongrams/TestCommonGramsFilterFactory.cs | 22 ++++++++++++++++++++--
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git
a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
index 560b3a8c2..670151703 100644
---
a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
+++
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// </summary>
public class CommonGramsFilterFactory : TokenFilterFactory,
IResourceLoaderAware
{
- // TODO: shared base class for Stop/Keep/CommonGrams?
+ // TODO: shared base class for Stop/Keep/CommonGrams?
private CharArraySet commonWords;
private readonly string commonWordFiles;
private readonly string format;
@@ -71,7 +71,8 @@ namespace Lucene.Net.Analysis.CommonGrams
}
else
{
- commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ // LUCENENET-specific: backported ignoreCase fix from Lucene
8.10.0 (lucene#188, LUCENE-10008)
+ commonWords = new CharArraySet(m_luceneMatchVersion,
StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}
@@ -85,4 +86,4 @@ namespace Lucene.Net.Analysis.CommonGrams
return commonGrams;
}
}
-}
\ No newline at end of file
+}
diff --git
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
index 9655b6bd0..063fc1b22 100644
---
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
+++
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs
@@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// Tests pretty much copied from StopFilterFactoryTest We use the test
files
/// used by the StopFilterFactoryTest TODO: consider creating separate
test files
/// so this won't break if stop filter test files change
- ///
+ ///
/// </summary>
public class TestCommonGramsFilterFactory : BaseTokenStreamFactoryTestCase
{
@@ -79,6 +79,24 @@ namespace Lucene.Net.Analysis.CommonGrams
AssertTokenStreamContents(stream, new string[] { "testing",
"testing_the", "the", "the_factory", "factory" });
}
+ // LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0
(lucene#188, LUCENE-10008)
+ [Test]
+ public void TestIgnoreCase()
+ {
+ IResourceLoader loader = new ClasspathResourceLoader(GetType());
+ CommonGramsFilterFactory factory =
+ (CommonGramsFilterFactory)
+ TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT,
loader, "ignoreCase", "true");
+ CharArraySet words = factory.CommonWords;
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue(words.contains("the"));
+ assertTrue(words.contains("The"));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("testing
The factory"),MockTokenizer.WHITESPACE, false);
+ TokenStream stream = factory.Create(tokenizer);
+ AssertTokenStreamContents(
+ stream, new string[] {"testing", "testing_The", "The",
"The_factory", "factory"});
+ }
+
/// <summary>
/// Test that bogus arguments result in exception </summary>
[Test]
@@ -95,4 +113,4 @@ namespace Lucene.Net.Analysis.CommonGrams
}
}
}
-}
\ No newline at end of file
+}