NehanPathan commented on code in PR #1154: URL: https://github.com/apache/lucenenet/pull/1154#discussion_r2058039821
########## src/Lucene.Net.Tests.Analysis.SmartCn/DictionaryTests.cs: ########## @@ -0,0 +1,72 @@ +using Lucene.Net.Util; +using Lucene.Net.Analysis.Cn.Smart.Hhmm; +using Lucene.Net.Attributes; +using NUnit.Framework; +using System; +using System.IO; +using System.Reflection; + + +[TestFixture] +[LuceneNetSpecific] +public class DictionaryTests : LuceneTestCase Review Comment: [@NightOwl888 ] ``` ### ✅ Update: All tests are now passing! Thanks for your guidance — the issues were indeed tied to: - ✅ **Incorrect namespace**: Changing it to end in `.Hhmm` resolved the resource loading issue. - ✅ **Missing `OneTimeTearDown()`**: Added this method to reset `ANALYSIS_DATA_DIR`, ensuring no side effects from static fields across test runs. 🛠 I also zipped the required dictionary files into `custom-dictionary-input.zip`: - ✔️ Included my **custom `bigramdict.dct`** - ✔️ Replaced the broken `coredict.dct` with the **original one from LUCENE-1629** The zip is placed in the same directory as `TestBuildDictionary.cs` to keep it simple and compact. --- ### ✅ Test Summary | Test | Description | Status | |----------------------------|-------------------------------------------------|----------| | `TestBigramDictionary()` | Loads from .dct, then falls back to .mem | ✅ Passes | | `TestWordDictionary()` | Loads from .dct, then .mem fallback confirmed | ✅ Passes | | `Teardown` | Resets static config for isolation | ✅ Clean | --- ### 🔍 Code (with inline comments) ```csharp using J2N; using Lucene.Net.Analysis.Cn.Smart; using Lucene.Net.Analysis.Cn.Smart.Hhmm; using Lucene.Net.Attributes; using Lucene.Net.Util; using Assert = Lucene.Net.TestFramework.Assert; using NUnit.Framework; using System; using System.IO; namespace Lucene.Net.Analysis.Cn.Smart.Hhmm { /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ [LuceneNetSpecific] public class TestBuildDictionary : LuceneTestCase { private DirectoryInfo tempDir; public override void OneTimeSetUp() { base.OneTimeSetUp(); // Create temp dir and extract the zipped test dictionary tempDir = CreateTempDir("smartcn-data"); AnalyzerProfile.ANALYSIS_DATA_DIR = tempDir.FullName; using (var zipFileStream = typeof(TestBuildDictionary).FindAndGetManifestResourceStream("custom-dictionary-input.zip")) { TestUtil.Unzip(zipFileStream, tempDir); } } public override void OneTimeTearDown() { // Ensure test state is cleaned up for other tests AnalyzerProfile.ANALYSIS_DATA_DIR = null; base.OneTimeTearDown(); } [Test] public void TestBigramDictionary() { // First load: build from .dct BigramDictionary bigramDict = BigramDictionary.GetInstance(); CheckBigramDictionary(bigramDict); // Confirm .mem file created string memFile = Path.Combine(tempDir.FullName, "bigramdict.mem"); Assert.IsTrue(File.Exists(memFile), "Memory file should be created after first load"); // Delete .dct to force fallback string dictFile = Path.Combine(tempDir.FullName, "bigramdict.dct"); if (File.Exists(dictFile)) File.Delete(dictFile); // Second load: should read from .mem bigramDict = BigramDictionary.GetInstance(); CheckBigramDictionary(bigramDict); } private void CheckBigramDictionary(BigramDictionary bigramDict) { Assert.AreEqual(10, bigramDict.GetFrequency("啊hello".AsSpan()), "Frequency for '啊hello' is incorrect."); Assert.AreEqual(20, bigramDict.GetFrequency("阿world".AsSpan()), "Frequency for '阿world' is incorrect."); } [Test] public void TestWordDictionary() { // First load: build from .dct WordDictionary wordDict = WordDictionary.GetInstance(); CheckWordDictionary(wordDict); // Confirm .mem file created string memFile = Path.Combine(tempDir.FullName, "coredict.mem"); Assert.IsTrue(File.Exists(memFile), "Memory file should be created after first load"); // Delete .dct to force fallback string dictFile = Path.Combine(tempDir.FullName, "coredict.dct"); if (File.Exists(dictFile)) File.Delete(dictFile); // Second load: should read from .mem wordDict = WordDictionary.GetInstance(); CheckWordDictionary(wordDict); } private void CheckWordDictionary(WordDictionary wordDict) { Assert.AreEqual(30, wordDict.GetFrequency("尼".ToCharArray()), "Frequency for '尼' is incorrect."); Assert.AreEqual(0, wordDict.GetFrequency("missing".ToCharArray()), "Expected frequency 0 for unknown word."); } } } ``` --- ### 👀 Questions - Would you prefer I keep the **inline comments**? I wrote them to help future contributors understand each step, but happy to clean them up. - Do you recommend adding more **edge cases or assertion types**? - Any feedback on the **structure or naming** before I finalize the PR? --- Thanks again for all the help — this was a rewarding debugging experience! 🙌 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@lucenenet.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org