Copilot commented on code in PR #1154: URL: https://github.com/apache/lucenenet/pull/1154#discussion_r2286827222
########## src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs: ########## @@ -286,37 +303,37 @@ public virtual void LoadFromFile(string dctFilePath) int j = 0; while (j < cnt) { - dctFile.Read(intBuffer, 0, intBuffer.Length); - buffer[0] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian) - .GetInt32();// frequency - dctFile.Read(intBuffer, 0, intBuffer.Length); - buffer[1] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian) - .GetInt32();// length - dctFile.Read(intBuffer, 0, intBuffer.Length); - // buffer[2] = ByteBuffer.wrap(intBuffer).order( - // ByteOrder.LITTLE_ENDIAN).getInt();// handle + // LUCENENET: Use BinaryReader to decode little endian instead of ByteBuffer, since this is the default in .NET + buffer[0] = reader.ReadInt32(); // frequency + buffer[1] = reader.ReadInt32(); // length + buffer[2] = reader.ReadInt32(); // Skip handle value (unused) length = buffer[1]; - if (length > 0) + if (length > 0 && dctFile.Position + length <= dctFile.Length) Review Comment: The condition `dctFile.Position + length <= dctFile.Length` may cause issues because `dctFile.Position` returns the current position in the underlying stream, but `BinaryReader` may have buffered data. This could lead to incorrect length validation. Consider using `reader.BaseStream.Position` instead of `dctFile.Position` for consistency. ```suggestion if (length > 0 && reader.BaseStream.Position + length <= dctFile.Length) ``` ########## src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs: ########## @@ -286,37 +303,37 @@ public virtual void LoadFromFile(string dctFilePath) int j = 0; while (j < cnt) { - dctFile.Read(intBuffer, 0, intBuffer.Length); - buffer[0] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian) - .GetInt32();// frequency - dctFile.Read(intBuffer, 0, intBuffer.Length); - buffer[1] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LittleEndian) - .GetInt32();// length - dctFile.Read(intBuffer, 0, intBuffer.Length); - // buffer[2] = ByteBuffer.wrap(intBuffer).order( - // ByteOrder.LITTLE_ENDIAN).getInt();// handle + // LUCENENET: Use BinaryReader to decode little endian instead of ByteBuffer, since this is the default in .NET + buffer[0] = reader.ReadInt32(); // frequency + buffer[1] = reader.ReadInt32(); // length + buffer[2] = reader.ReadInt32(); // Skip handle value (unused) Review Comment: Reading a value just to discard it is inefficient. Consider using `reader.BaseStream.Seek(4, SeekOrigin.Current)` to skip 4 bytes instead of reading the unused handle value. ```suggestion reader.BaseStream.Seek(4, SeekOrigin.Current); // Skip handle value (unused) ``` ########## src/Lucene.Net.Analysis.SmartCn/Hhmm/AbstractDictionary.cs: ########## @@ -33,7 +33,9 @@ namespace Lucene.Net.Analysis.Cn.Smart.Hhmm internal abstract class AbstractDictionary { // LUCENENET specific: cached GB2312 encoding to avoid repeated calls to Encoding.GetEncoding("GB2312") - protected static readonly Encoding gb2312Encoding = Encoding.GetEncoding("GB2312"); + protected static readonly Encoding gb2312Encoding = Encoding.GetEncoding("GB2312", + EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback); Review Comment: Adding `EncoderFallback.ExceptionFallback` and `DecoderFallback.ExceptionFallback` changes the behavior to throw exceptions on invalid characters instead of using replacement characters. This is a breaking change that could cause existing code to fail where it previously worked with fallback behavior. Consider documenting this change or using a separate encoding instance for stricter validation. ```suggestion protected static readonly Encoding gb2312Encoding = Encoding.GetEncoding("GB2312"); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@lucenenet.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org