This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit e8d9d7b9439c160b7fdaab4f2d99e6e0ec12aa42
Author: Shad Storhaug <[email protected]>
AuthorDate: Wed Oct 26 21:36:25 2022 +0700

    PERFORMANCE: Lucene.Net.Analysis.CharFilters.HTMLStripCharFilter: Removed 
allocation during parse of hexadecimal number by using J2N.Numerics.Int32 to 
specify index and length. Also added a CharArrayFormatter struct to defer the 
allocation of constructing a string until after an assertion failure.
---
 .../Analysis/CharFilter/HTMLStripCharFilter.cs     | 14 ++++----
 src/Lucene.Net/Support/Text/CharArrayFormatter.cs  | 42 ++++++++++++++++++++++
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git 
a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index 9f371c984..e1103fdc9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -2,6 +2,7 @@
 using J2N;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Diagnostics;
+using Lucene.Net.Support.Text;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
@@ -31371,10 +31372,11 @@ namespace Lucene.Net.Analysis.CharFilters
                             inputSegment.Write(zzBuffer, zzStartRead, 
matchLength);
                             if (matchLength <= 7)
                             { // 0x10FFFF = 1114111: max 7 decimal chars
-                                string decimalCharRef = YyText();
-                                if (!int.TryParse(decimalCharRef, 
NumberStyles.Integer, CultureInfo.InvariantCulture, out int codePoint))
+                                // LUCENENET: Originally, we got the value of 
YyText(), which allocates..so we can eliminate the allocation
+                                // by grabbing the values YyText() converts to 
a string: new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+                                if (!J2N.Numerics.Int32.TryParse(zzBuffer, 
zzStartRead, zzMarkedPos - zzStartRead, radix: 10, out int codePoint))
                                 {
-                                    if (Debugging.AssertsEnabled) 
Debugging.Assert(false, "Exception parsing code point '{0}'", decimalCharRef);
+                                    if (Debugging.AssertsEnabled) 
Debugging.Assert(false, "Exception parsing code point '{0}'", new 
CharArrayFormatter(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead));
                                 }
                                 if (codePoint <= 0x10FFFF)
                                 {
@@ -31625,11 +31627,9 @@ namespace Lucene.Net.Analysis.CharFilters
                             inputSegment.Write(zzBuffer, zzStartRead, 
matchLength);
                             if (matchLength <= 6)
                             { // 10FFFF: max 6 hex chars
-                                string hexCharRef
-                                    = new string(zzBuffer, zzStartRead + 1, 
matchLength - 1);
-                                if (!int.TryParse(hexCharRef, 
NumberStyles.HexNumber, CultureInfo.InvariantCulture, out int codePoint))
+                                if (!J2N.Numerics.Int32.TryParse(zzBuffer, 
zzStartRead + 1, matchLength - 1, radix: 16, out int codePoint))
                                 {
-                                    if (Debugging.AssertsEnabled) 
Debugging.Assert(false, "Exception parsing hex code point '{0}'", hexCharRef);
+                                    if (Debugging.AssertsEnabled) 
Debugging.Assert(false, "Exception parsing hex code point '{0}'", new 
CharArrayFormatter(zzBuffer, zzStartRead + 1, matchLength - 1));
                                 }
                                 if (codePoint <= 0x10FFFF)
                                 {
diff --git a/src/Lucene.Net/Support/Text/CharArrayFormatter.cs 
b/src/Lucene.Net/Support/Text/CharArrayFormatter.cs
new file mode 100644
index 000000000..43673aa8e
--- /dev/null
+++ b/src/Lucene.Net/Support/Text/CharArrayFormatter.cs
@@ -0,0 +1,42 @@
+namespace Lucene.Net.Support.Text
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// LUCENENET specific simple formatter to pass a value to
+    /// <see cref="Lucene.Net.Diagnostics.Debugging.Assert{T0}(bool, string, 
T0)"/>
+    /// in order to defer allocating until the assert fails.
+    /// </summary>
+    internal struct CharArrayFormatter
+    {
+        private char[] value;
+        private int startIndex;
+        private int length;
+        public CharArrayFormatter(char[] value, int startIndex, int length)
+        {
+            this.value = value;
+            this.startIndex = startIndex;
+            this.length = length;
+        }
+
+        public override string ToString()
+        {
+            return new string(value, startIndex, length);
+        }
+    }
+}

Reply via email to