Lucene.Net.Benchmark: Created a simple English number formatter to spell out numbers into words. Since we don't need localization, this is a sufficient replacement for the ICU RuleBasedNumberFormatter.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1cfbd8b7 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1cfbd8b7 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1cfbd8b7 Branch: refs/heads/master Commit: 1cfbd8b7c35c7f1ae2bd616b44d752eef4d7d180 Parents: a60c5ef Author: Shad Storhaug <s...@shadstorhaug.com> Authored: Tue Aug 1 21:11:54 2017 +0700 Committer: Shad Storhaug <s...@shadstorhaug.com> Committed: Wed Aug 2 09:55:15 2017 +0700 ---------------------------------------------------------------------- .../ByTask/Feeds/LongToEnglishContentSource.cs | 6 +- .../ByTask/Feeds/LongToEnglishQueryMaker.cs | 4 +- .../Lucene.Net.Benchmark.csproj | 1 + .../Support/EnglishNumberFormatExtensions.cs | 186 +++++++++++++++++++ .../Lucene.Net.Tests.Benchmark.csproj | 1 + .../TestEnglishNumberFormatExtensions.cs | 38 ++++ 6 files changed, 231 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs index fadab82..7c407a2 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs @@ -1,4 +1,5 @@ -using System; +using Lucene.Net.Support; +using System; using System.Globalization; namespace Lucene.Net.Benchmarks.ByTask.Feeds @@ -55,8 +56,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds } } - // LUCENENET TODO: Rules based number formatting...(from ICU) - docData.Body = curCounter.ToString(); //rnbf.format(curCounter); + docData.Body = curCounter.ToWords(); //rnbf.format(curCounter); docData.Name = "doc_" + curCounter.ToString(CultureInfo.InvariantCulture); docData.Title = "title_" + curCounter.ToString(CultureInfo.InvariantCulture); docData.SetDate(new DateTime()); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs index f565eb8..78ac924 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs @@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Tasks; using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.QueryParsers.Classic; using Lucene.Net.Search; +using Lucene.Net.Support; using Lucene.Net.Util; using System; @@ -48,9 +49,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds { lock (this) { - // LUCENENET TODO: Rules based number formatter (from ICU) //return parser.Parse("" + rnbf.format(GetNextCounter()) + ""); - return m_parser.Parse(GetNextCounter().ToString()); + return m_parser.Parse(GetNextCounter().ToWords()); } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj index 0241099..f00cd18 100644 --- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj +++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj @@ -159,6 +159,7 @@ <Compile Include="Quality\Utils\QualityQueriesFinder.cs" /> <Compile Include="Quality\Utils\SimpleQQParser.cs" /> <Compile Include="Quality\Utils\SubmissionReport.cs" /> + <Compile Include="Support\EnglishNumberFormatExtensions.cs" /> <Compile Include="Utils\ExtractReuters.cs" /> <Compile Include="Utils\ExtractWikipedia.cs" /> <Compile Include="..\CommonAssemblyInfo.cs"> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs new file mode 100644 index 0000000..71362f0 --- /dev/null +++ b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs @@ -0,0 +1,186 @@ +using System; +using System.Text; + +namespace Lucene.Net.Support +{ + /// <summary> + /// Extension methods to spell out numbers into English. + /// <para/> + /// Inspiration: https://stackoverflow.com/a/2601001 + /// </summary> + public static class EnglishNumberFormatExtensions + { + private const long Quadrillion = Trillion * 1000; + private const long Trillion = Billion * 1000; + private const long Billion = Million * 1000; + private const long Million = Thousand * 1000; + private const long Thousand = Hundred * 10; + private const long Hundred = 100; + + /// <summary> + /// Returns the spelled-out English words for the provided <paramref name="value"/>. + /// </summary> + public static string ToWords(this int value) + { + return ToWords((long)value); + } + + /// <summary> + /// Returns the spelled-out English words for the provided <paramref name="value"/>. + /// </summary> + public static string ToWords(this long value) + { + return ToWords(value, new StringBuilder()).ToString(); + } + private static StringBuilder ToWords(long value, StringBuilder builder) + { + if (value == 0) builder.Append("zero"); + + if (value < 0) + { + builder.Append("negative "); + ToWords(Math.Abs(value), builder); + } + + long unit = 0; + + if (value >= Quadrillion) + { + unit = (value / Quadrillion); + value -= unit * Quadrillion; + + ToWords(unit, builder); + builder.Append(" quadrillion"); + if (value > 0) builder.Append(" "); + } + + if (value >= Trillion) + { + unit = (value / Trillion); + value -= unit * Trillion; + + ToWords(unit, builder); + builder.Append(" trillion"); + if (value > 0) builder.Append(" "); + } + + if (value >= Billion) + { + unit = (value / Billion); + value -= unit * Billion; + + ToWords(unit, builder); + builder.Append(" billion"); + if (value > 0) builder.Append(" "); + } + + if (value >= Million) + { + unit = (value / Million); + value -= unit * Million; + + ToWords(unit, builder); + builder.Append(" million"); + if (value > 0) builder.Append(" "); + } + + if (value >= Thousand) + { + unit = (value / Thousand); + value -= unit * Thousand; + + ToWords(unit, builder); + builder.Append(" thousand"); + if (value > 0) builder.Append(" "); + } + + if (value >= Hundred) + { + unit = (value / Hundred); + value -= unit * Hundred; + + ToWords(unit, builder); + builder.Append(" hundred"); + if (value > 0) builder.Append(" "); + } + + if (value >= 90) + { + value -= 90; + builder.Append("ninety"); + if (value > 0) builder.Append("-"); + } + + if (value >= 80) + { + value -= 80; + builder.Append("eighty"); + if (value > 0) builder.Append("-"); + } + + if (value >= 70) + { + value -= 70; + builder.Append("seventy"); + if (value > 0) builder.Append("-"); + } + + if (value >= 60) + { + value -= 60; + builder.Append("sixty"); + if (value > 0) builder.Append("-"); + } + + if (value >= 50) + { + value -= 50; + builder.Append("fifty"); + if (value > 0) builder.Append("-"); + } + + if (value >= 40) + { + value -= 40; + builder.Append("forty"); + if (value > 0) builder.Append("-"); + } + + if (value >= 30) + { + value -= 30; + builder.Append("thirty"); + if (value > 0) builder.Append("-"); + } + + if (value >= 20) + { + value -= 20; + builder.Append("twenty"); + if (value > 0) builder.Append("-"); + } + + if (value == 19) builder.Append("nineteen"); + if (value == 18) builder.Append("eighteen"); + if (value == 17) builder.Append("seventeen"); + if (value == 16) builder.Append("sixteen"); + if (value == 15) builder.Append("fifteen"); + if (value == 14) builder.Append("fourteen"); + if (value == 13) builder.Append("thirteen"); + if (value == 12) builder.Append("twelve"); + if (value == 11) builder.Append("eleven"); + if (value == 10) builder.Append("ten"); + if (value == 9) builder.Append("nine"); + if (value == 8) builder.Append("eight"); + if (value == 7) builder.Append("seven"); + if (value == 6) builder.Append("six"); + if (value == 5) builder.Append("five"); + if (value == 4) builder.Append("four"); + if (value == 3) builder.Append("three"); + if (value == 2) builder.Append("two"); + if (value == 1) builder.Append("one"); + + return builder; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj index c57a59f..5c9ffe1 100644 --- a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj +++ b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj @@ -68,6 +68,7 @@ <Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Quality\TestQualityRun.cs" /> <Compile Include="Support\TestApiConsistency.cs" /> + <Compile Include="Support\TestEnglishNumberFormatExtensions.cs" /> <Compile Include="Support\TestExceptionSerialization.cs" /> </ItemGroup> <ItemGroup> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs new file mode 100644 index 0000000..68cc70a --- /dev/null +++ b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs @@ -0,0 +1,38 @@ +using Lucene.Net.Attributes; +using NUnit.Framework; + +namespace Lucene.Net.Support +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + [TestFixture] + public class TestEnglishNumberFormatExtensions + { + [Test, LuceneNetSpecific] + public void TestToWords() + { + Assert.AreEqual("twenty-one", 21.ToWords()); + Assert.AreEqual("one thousand two hundred thirty-four", 1234.ToWords()); + Assert.AreEqual("six million four hundred ninety-one thousand three hundred forty-eight", 6491348.ToWords()); + Assert.AreEqual("one hundred thirty", 130.ToWords()); + Assert.AreEqual("one hundred thirty-seven", 137.ToWords()); + Assert.AreEqual("seven hundred forty-nine million one hundred thirty-two thousand one hundred forty-six", 749132146.ToWords()); + Assert.AreEqual("nine hundred ninety-nine billion seven hundred forty-nine million one hundred thirty-two thousand one hundred forty-six", 999749132146.ToWords()); + } + } +}