This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit e8d9d7b9439c160b7fdaab4f2d99e6e0ec12aa42 Author: Shad Storhaug <[email protected]> AuthorDate: Wed Oct 26 21:36:25 2022 +0700 PERFORMANCE: Lucene.Net.Analysis.CharFilters.HTMLStripCharFilter: Removed allocation during parse of hexadecimal number by using J2N.Numerics.Int32 to specify index and length. Also added a CharArrayFormatter struct to defer the allocation of constructing a string until after an assertion failure. --- .../Analysis/CharFilter/HTMLStripCharFilter.cs | 14 ++++---- src/Lucene.Net/Support/Text/CharArrayFormatter.cs | 42 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs index 9f371c984..e1103fdc9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs @@ -2,6 +2,7 @@ using J2N; using Lucene.Net.Analysis.Util; using Lucene.Net.Diagnostics; +using Lucene.Net.Support.Text; using Lucene.Net.Util; using System; using System.Collections.Generic; @@ -31371,10 +31372,11 @@ namespace Lucene.Net.Analysis.CharFilters inputSegment.Write(zzBuffer, zzStartRead, matchLength); if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars - string decimalCharRef = YyText(); - if (!int.TryParse(decimalCharRef, NumberStyles.Integer, CultureInfo.InvariantCulture, out int codePoint)) + // LUCENENET: Originally, we got the value of YyText(), which allocates..so we can eliminate the allocation + // by grabbing the values YyText() converts to a string: new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); + if (!J2N.Numerics.Int32.TryParse(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead, radix: 10, out int codePoint)) { - if (Debugging.AssertsEnabled) Debugging.Assert(false, "Exception parsing code point '{0}'", decimalCharRef); + if (Debugging.AssertsEnabled) Debugging.Assert(false, "Exception parsing code point '{0}'", new CharArrayFormatter(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead)); } if (codePoint <= 0x10FFFF) { @@ -31625,11 +31627,9 @@ namespace Lucene.Net.Analysis.CharFilters inputSegment.Write(zzBuffer, zzStartRead, matchLength); if (matchLength <= 6) { // 10FFFF: max 6 hex chars - string hexCharRef - = new string(zzBuffer, zzStartRead + 1, matchLength - 1); - if (!int.TryParse(hexCharRef, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out int codePoint)) + if (!J2N.Numerics.Int32.TryParse(zzBuffer, zzStartRead + 1, matchLength - 1, radix: 16, out int codePoint)) { - if (Debugging.AssertsEnabled) Debugging.Assert(false, "Exception parsing hex code point '{0}'", hexCharRef); + if (Debugging.AssertsEnabled) Debugging.Assert(false, "Exception parsing hex code point '{0}'", new CharArrayFormatter(zzBuffer, zzStartRead + 1, matchLength - 1)); } if (codePoint <= 0x10FFFF) { diff --git a/src/Lucene.Net/Support/Text/CharArrayFormatter.cs b/src/Lucene.Net/Support/Text/CharArrayFormatter.cs new file mode 100644 index 000000000..43673aa8e --- /dev/null +++ b/src/Lucene.Net/Support/Text/CharArrayFormatter.cs @@ -0,0 +1,42 @@ +namespace Lucene.Net.Support.Text +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// LUCENENET specific simple formatter to pass a value to + /// <see cref="Lucene.Net.Diagnostics.Debugging.Assert{T0}(bool, string, T0)"/> + /// in order to defer allocating until the assert fails. + /// </summary> + internal struct CharArrayFormatter + { + private char[] value; + private int startIndex; + private int length; + public CharArrayFormatter(char[] value, int startIndex, int length) + { + this.value = value; + this.startIndex = startIndex; + this.length = length; + } + + public override string ToString() + { + return new string(value, startIndex, length); + } + } +}
