NehanPathan commented on code in PR #1154:
URL: https://github.com/apache/lucenenet/pull/1154#discussion_r2058868756


##########
src/Lucene.Net.Tests.Analysis.SmartCn/Hhmm/TestBuildDictionary.cs:
##########
@@ -0,0 +1,110 @@
+using J2N;
+using Lucene.Net.Analysis.Cn.Smart;
+using Lucene.Net.Analysis.Cn.Smart.Hhmm;
+using Lucene.Net.Attributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+using Assert = Lucene.Net.TestFramework.Assert;
+
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    [LuceneNetSpecific]
+    public class TestBuildDictionary : LuceneTestCase
+    {
+        private DirectoryInfo tempDir;
+
+        public override void OneTimeSetUp()
+        {
+            base.OneTimeSetUp();
+            tempDir = CreateTempDir("smartcn-data");
+            AnalyzerProfile.ANALYSIS_DATA_DIR = tempDir.FullName;
+            using (var zipFileStream = 
typeof(TestBuildDictionary).FindAndGetManifestResourceStream("custom-dictionary-input.zip"))
+            {
+                TestUtil.Unzip(zipFileStream, tempDir);
+            }
+        }
+
+        public override void OneTimeTearDown()
+        {
+            AnalyzerProfile.ANALYSIS_DATA_DIR = null; // Ensure this test data 
is not loaded for other tests
+            base.OneTimeTearDown();
+        }
+
+        [Test]
+        public void TestBigramDictionary()
+        {
+            // First test - builds and loads dictionary from .dict file
+            BigramDictionary bigramDict = BigramDictionary.GetInstance();
+            CheckBigramDictionary(bigramDict);
+            
+            // Ensure .mem file was created
+            string memFile = System.IO.Path.Combine(tempDir.FullName,  
"bigramdict.mem");
+            Assert.IsTrue(File.Exists(memFile), "Memory file should be created 
after first load");
+            
+            // Delete the original .dict file
+            string dictFile = System.IO.Path.Combine(tempDir.FullName,  
"bigramdict.dct");
+            if (File.Exists(dictFile))
+            {
+                File.Delete(dictFile);
+            }
+            
+            // Second test - should load from .mem file now
+            bigramDict = BigramDictionary.GetInstance();
+            CheckBigramDictionary(bigramDict);
+        }
+        
+        private static void CheckBigramDictionary(BigramDictionary bigramDict)
+        {
+            Assert.AreEqual(10, bigramDict.GetFrequency("啊hello".AsSpan()), 
"Frequency for '啊hello' is incorrect.");
+            Assert.AreEqual(20, bigramDict.GetFrequency("阿world".AsSpan()), 
"Frequency for '阿world' is incorrect.");
+        }
+
+        [Test]
+        public void TestWordDictionary()
+        {
+            // First test - builds and loads dictionary from .dict file
+            WordDictionary wordDict = WordDictionary.GetInstance();
+            CheckWordDictionary(wordDict);
+            
+            // Ensure .mem file was created
+            string memFile = System.IO.Path.Combine(tempDir.FullName, 
"coredict.mem");
+            Assert.IsTrue(File.Exists(memFile), "Memory file should be created 
after first load");
+            
+            // Delete the original .dict file
+            string dictFile = System.IO.Path.Combine(tempDir.FullName, 
"coredict.dct");
+            if (File.Exists(dictFile))

Review Comment:
   [@NightOwl888 ]
   ---
   
   "Hi Shad,
   
   I’ve made the changes you requested and added some clarifications:
   
   - The condition `if (length > 0 && dctFile.Position + length <= 
dctFile.Length)` has been kept as is, as it ensures that the length does not 
cause any out-of-bounds reads. The check for `dctFile.Position + length <= 
dctFile.Length` is important because it verifies that we are not attempting to 
read beyond the end of the file. While we could remove it, this check is there 
to ensure the stability of the read operation, and I recommend keeping it 
unless there is a specific reason to remove it.
     
   - The `MAX_VALID_LENGTH` limitation (set to 1000) was introduced to restrict 
excessively large reads, which could potentially cause issues with memory 
consumption. However, I understand this value isn't part of the upstream logic. 
If this limitation is unnecessary for now, it can be removed. That said, if we 
encounter scenarios in the future where controlling the maximum length becomes 
important, we can reintroduce it at that point.
   
   - Updated the comments to correctly reference the `intBuffer` array, as it 
was restored in the code.
   - Moved the `tmpword` declaration below the buffer as per your suggestion.
   - Replaced the `if` block for checking the existence of the `.dict` file 
with an assert to ensure the test fails if the file is missing in the temp 
directory.
   
   Let me know if you’d like to make any further changes or if you have any 
additional feedback!
   
   
   ---
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@lucenenet.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to