[
https://issues.apache.org/jira/browse/LUCENE-10350?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Greg Miller resolved LUCENE-10350.
----------------------------------
Fix Version/s: 9.1
10.0 (main)
Resolution: Fixed
> Avoid some null checking for FastTaxonomyFacetCounts#countAll()
> ---------------------------------------------------------------
>
> Key: LUCENE-10350
> URL: https://issues.apache.org/jira/browse/LUCENE-10350
> Project: Lucene - Core
> Issue Type: Improvement
> Reporter: Feng Guo
> Priority: Major
> Fix For: 9.1, 10.0 (main)
>
> Time Spent: 3h 10m
> Remaining Estimate: 0h
>
> I find that
> {{org.apache.lucene.facet.taxonomy.IntTaxonomyFacets#increment()}} is using
> about 2% cpu of luceneutil, this could probably be replaced with
> {{values[doc]++}} since {{#countAll}} will never use hashTable.
> Two changes:
> # No need to check liveDocs null again and again.
> # Call {{values[doc]++}} instead of {{#increment}} since {{#countAll}} will
> never use hashTable.
> *Benchmark* (baseline is the newest main, including LUCENE-10346)
> {code:java}
> TaskQPS baseline StdDevQPS
> my_modified_version StdDev Pct diff p-value
> IntNRQ 128.51 (27.8%) 120.13
> (27.4%) -6.5% ( -48% - 67%) 0.455
> PKLookup 232.55 (5.0%) 226.26
> (4.2%) -2.7% ( -11% - 6%) 0.065
> Wildcard 178.54 (5.5%) 175.13
> (5.7%) -1.9% ( -12% - 9%) 0.283
> BrowseMonthSSDVFacets 16.37 (6.9%) 16.13
> (4.6%) -1.5% ( -12% - 10%) 0.422
> HighPhrase 211.52 (3.7%) 209.59
> (3.3%) -0.9% ( -7% - 6%) 0.414
> MedPhrase 239.31 (3.2%) 237.14
> (2.5%) -0.9% ( -6% - 4%) 0.311
> HighSloppyPhrase 33.08 (3.3%) 32.79
> (3.5%) -0.9% ( -7% - 6%) 0.407
> Prefix3 171.63 (7.5%) 170.33
> (8.3%) -0.8% ( -15% - 16%) 0.762
> Respell 80.21 (3.3%) 79.74
> (2.7%) -0.6% ( -6% - 5%) 0.530
> LowPhrase 26.21 (3.6%) 26.05
> (2.5%) -0.6% ( -6% - 5%) 0.549
> LowSloppyPhrase 165.34 (2.4%) 164.47
> (2.7%) -0.5% ( -5% - 4%) 0.516
> OrHighNotLow 1984.04 (3.9%) 1974.07
> (5.2%) -0.5% ( -9% - 8%) 0.730
> OrHighMed 93.69 (4.2%) 93.23
> (4.1%) -0.5% ( -8% - 8%) 0.711
> MedSpanNear 12.19 (3.6%) 12.14
> (4.0%) -0.3% ( -7% - 7%) 0.777
> Fuzzy2 98.86 (3.0%) 98.56
> (2.6%) -0.3% ( -5% - 5%) 0.735
> HighTerm 2284.28 (4.3%) 2277.92
> (3.4%) -0.3% ( -7% - 7%) 0.819
> BrowseDayOfYearSSDVFacets 14.65 (4.8%) 14.61
> (4.0%) -0.3% ( -8% - 8%) 0.844
> LowSpanNear 101.85 (1.7%) 101.58
> (2.0%) -0.3% ( -3% - 3%) 0.662
> BrowseRandomLabelSSDVFacets 11.04 (5.4%) 11.02
> (7.2%) -0.2% ( -12% - 13%) 0.902
> OrHighHigh 39.59 (4.2%) 39.49
> (4.1%) -0.2% ( -8% - 8%) 0.859
> Fuzzy1 84.27 (3.1%) 84.11
> (2.3%) -0.2% ( -5% - 5%) 0.826
> AndHighMed 94.85 (5.1%) 94.77
> (6.9%) -0.1% ( -11% - 12%) 0.969
> HighTermDayOfYearSort 179.66 (17.0%) 179.56
> (12.8%) -0.1% ( -25% - 35%) 0.991
> LowTerm 2016.63 (3.5%) 2015.71
> (3.9%) -0.0% ( -7% - 7%) 0.969
> AndHighLow 1011.34 (4.1%) 1011.05
> (5.3%) -0.0% ( -9% - 9%) 0.985
> HighTermTitleBDVSort 121.48 (14.4%) 121.49
> (15.9%) 0.0% ( -26% - 35%) 0.998
> MedTerm 2239.73 (4.6%) 2245.65
> (3.1%) 0.3% ( -7% - 8%) 0.830
> AndHighHigh 102.09 (3.1%) 102.48
> (5.3%) 0.4% ( -7% - 9%) 0.778
> OrNotHighLow 1113.23 (2.3%) 1117.98
> (2.4%) 0.4% ( -4% - 5%) 0.568
> HighSpanNear 1.92 (4.7%) 1.93
> (5.4%) 0.5% ( -9% - 11%) 0.738
> OrHighNotMed 1322.20 (4.3%) 1330.58
> (3.1%) 0.6% ( -6% - 8%) 0.592
> AndHighMedDayTaxoFacets 65.82 (1.8%) 66.30
> (2.5%) 0.7% ( -3% - 5%) 0.295
> OrNotHighMed 1262.49 (3.0%) 1272.12
> (3.8%) 0.8% ( -5% - 7%) 0.480
> MedTermDayTaxoFacets 52.07 (4.7%) 52.54
> (6.9%) 0.9% ( -10% - 13%) 0.628
> OrNotHighHigh 944.56 (3.7%) 953.87
> (3.0%) 1.0% ( -5% - 7%) 0.352
> MedSloppyPhrase 64.28 (5.4%) 64.92
> (4.7%) 1.0% ( -8% - 11%) 0.531
> OrHighLow 921.30 (2.8%) 930.66
> (2.6%) 1.0% ( -4% - 6%) 0.232
> AndHighHighDayTaxoFacets 23.67 (3.4%) 23.93
> (4.2%) 1.1% ( -6% - 9%) 0.380
> OrHighNotHigh 1186.72 (3.3%) 1202.71
> (3.6%) 1.3% ( -5% - 8%) 0.222
> HighTermMonthSort 160.65 (14.7%) 164.05
> (14.0%) 2.1% ( -23% - 36%) 0.641
> OrHighMedDayTaxoFacets 15.46 (8.0%) 15.82
> (9.0%) 2.3% ( -13% - 21%) 0.393
> LowIntervalsOrdered 67.72 (6.2%) 69.70
> (7.8%) 2.9% ( -10% - 17%) 0.188
> TermDTSort 140.38 (14.3%) 144.53
> (15.1%) 3.0% ( -23% - 37%) 0.525
> MedIntervalsOrdered 30.74 (7.2%) 31.79
> (8.9%) 3.4% ( -11% - 21%) 0.186
> HighIntervalsOrdered 23.08 (9.6%) 24.19
> (11.4%) 4.8% ( -14% - 28%) 0.151
> BrowseRandomLabelTaxoFacets 12.83 (10.3%) 15.91
> (56.9%) 24.0% ( -39% - 101%) 0.064
> BrowseDateTaxoFacets 14.28 (13.0%) 18.66
> (68.0%) 30.7% ( -44% - 128%) 0.047
> BrowseDayOfYearTaxoFacets 14.37 (13.1%) 18.92
> (70.0%) 31.7% ( -45% - 132%) 0.047
> BrowseMonthTaxoFacets 16.23 (12.6%) 24.57
> (66.4%) 51.4% ( -24% - 149%) 0.001
> {code}
> baseline
> {code:java}
> 5.48% 23030
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#nextPosition()
> 4.31% 18110
> org.apache.lucene.queries.spans.NearSpansOrdered#stretchToOrder()
> 3.68% 15450
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader20#get()
> 3.65% 15362
> org.apache.lucene.queries.spans.TermSpans#nextStartPosition()
> 3.23% 13569
> org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts#countOneSegment()
> 2.66% 11187
> org.apache.lucene.queries.spans.SpanScorer#setFreqCurrentDoc()
> 2.62% 11023
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#skipPositions()
> 2.15% 9056
> org.apache.lucene.queries.spans.NearSpansOrdered#nextStartPosition()
> 2.13% 8934
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$DenseNumericDocValues#nextDoc()
> 1.86% 7818
> org.apache.lucene.facet.taxonomy.IntTaxonomyFacets#increment()
> 1.80% 7552
> org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts#countAll()
> 1.67% 7024 jdk.internal.misc.Unsafe#convEndian()
> 1.63% 6860
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$20#ordValue()
> 1.56% 6576
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader4#get()
> 1.54% 6461 java.nio.Buffer#checkIndex()
> 1.45% 6113 org.apache.lucene.search.ConjunctionDISI#doNext()
> 1.41% 5947
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$4#longValue()
> 1.33% 5590
> org.apache.lucene.store.ByteBufferGuard#ensureValid()
> 1.28% 5377
> org.apache.lucene.queries.intervals.OrderedIntervalsSource$OrderedIntervalIterator#nextInterval()
> 1.25% 5273
> org.apache.lucene.queries.spans.NearSpansOrdered#twoPhaseCurrentDocMatches()
> 1.16% 4877
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#advance()
> 1.16% 4868
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader12#get()
> 1.15% 4855
> org.apache.lucene.queries.spans.TermSpans#endPosition()
> 1.15% 4852 java.nio.Buffer#scope()
> 1.15% 4838
> org.apache.lucene.search.similarities.BM25Similarity$BM25Scorer#score()
> 1.14% 4775 java.nio.DirectByteBuffer#ix()
> 1.13% 4735
> org.apache.lucene.queries.spans.NearSpansOrdered#advancePosition()
> 1.01% 4229 org.apache.lucene.store.ByteBufferGuard#getByte()
> 1.00% 4223
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$BlockImpactsPostingsEnum#advance()
> 0.97% 4065
> jdk.internal.misc.ScopedMemoryAccess#getByteInternal()
> {code}
> candidate
> {code:java}
> 5.15% 21244
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#nextPosition()
> 4.85% 19998
> org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts#countAll()
> 3.78% 15561
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader20#get()
> 3.74% 15406
> org.apache.lucene.queries.spans.NearSpansOrdered#stretchToOrder()
> 3.41% 14066
> org.apache.lucene.queries.spans.TermSpans#nextStartPosition()
> 3.27% 13463
> org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts#countOneSegment()
> 2.88% 11859
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#skipPositions()
> 2.75% 11352
> org.apache.lucene.queries.spans.SpanScorer#setFreqCurrentDoc()
> 2.04% 8424
> org.apache.lucene.queries.spans.NearSpansOrdered#nextStartPosition()
> 1.72% 7102
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$20#ordValue()
> 1.69% 6967 jdk.internal.misc.Unsafe#convEndian()
> 1.57% 6485
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader4#get()
> 1.43% 5878 java.nio.Buffer#checkIndex()
> 1.41% 5813 org.apache.lucene.search.ConjunctionDISI#doNext()
> 1.34% 5535
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$EverythingEnum#advance()
> 1.28% 5269
> org.apache.lucene.store.ByteBufferGuard#ensureValid()
> 1.24% 5122
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$4#longValue()
> 1.21% 4992
> jdk.internal.misc.ScopedMemoryAccess#getByteInternal()
> 1.21% 4981
> org.apache.lucene.queries.intervals.OrderedIntervalsSource$OrderedIntervalIterator#nextInterval()
> 1.17% 4809 java.nio.DirectByteBuffer#ix()
> 1.12% 4628
> org.apache.lucene.queries.spans.NearSpansOrdered#advancePosition()
> 1.12% 4601
> org.apache.lucene.search.similarities.BM25Similarity$BM25Scorer#score()
> 1.11% 4585 org.apache.lucene.store.ByteBufferGuard#getByte()
> 1.11% 4575
> org.apache.lucene.util.packed.DirectReader$DirectPackedReader12#get()
> 1.07% 4417
> org.apache.lucene.codecs.lucene90.ForUtil#expand8()
> 1.05% 4332 java.nio.Buffer#scope()
> 1.02% 4195
> org.apache.lucene.queries.spans.NearSpansOrdered#twoPhaseCurrentDocMatches()
> 1.01% 4150
> org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer$20#nextDoc()
> 0.99% 4101
> org.apache.lucene.queries.spans.TermSpans#endPosition()
> 0.99% 4065
> org.apache.lucene.codecs.lucene90.Lucene90PostingsReader$BlockImpactsPostingsEnum#advance()
> {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]