This is an automated email from the ASF dual-hosted git repository.
laimis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new e2ba448cd BREAKING: Analyzing infix suggester virtual method fix (#848)
e2ba448cd is described below
commit e2ba448cdb7061a76c59794311c61f0b79c62e42
Author: Laimonas Simutis <[email protected]>
AuthorDate: Thu May 4 06:57:56 2023 -0700
BREAKING: Analyzing infix suggester virtual method fix (#848)
---
.../Suggest/Analyzing/AnalyzingInfixSuggester.cs | 59 ++++++++++-------
...lyzingInfixSuggesterIndexWriterConfigFactory.cs | 74 ++++++++++++++++++++++
2 files changed, 109 insertions(+), 24 deletions(-)
diff --git
a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
index b964ed797..7a62cee32 100644
--- a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
@@ -1,7 +1,6 @@
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Index.Sorter;
@@ -92,6 +91,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing
private readonly Directory dir;
internal readonly int minPrefixChars;
private readonly bool commitOnBuild;
+ // LUCENENET specific - index writer config factory for extending
classes
+ private readonly IAnalyzingInfixSuggesterIndexWriterConfigFactory
indexWriterConfigFactory;
/// <summary>
/// Used for ongoing NRT additions/updates. </summary>
@@ -163,47 +164,57 @@ namespace Lucene.Net.Search.Suggest.Analyzing
// LUCENENET specific - LUCENE-5889, a 4.11.0 feature. (Code moved
from other constructor to here.)
public AnalyzingInfixSuggester(LuceneVersion matchVersion, Directory
dir, Analyzer indexAnalyzer,
Analyzer queryAnalyzer, int minPrefixChars, bool commitOnBuild)
+ : this(new AnalyzingInfixSuggesterIndexWriterConfigFactory(SORT),
matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild)
+ {
+ }
+
+ /// <summary>
+ /// Create a new instance, loading from a previously built
+ /// <see cref="AnalyzingInfixSuggester"/> directory, if it exists.
This directory must be
+ /// private to the infix suggester (i.e., not an external
+ /// Lucene index). Note that <see cref="Dispose()"/>
+ /// will also dispose the provided directory.
+ /// </summary>
+ /// <param name="minPrefixChars"> Minimum number of leading characters
+ /// before <see cref="PrefixQuery"/> is used (default 4).
+ /// Prefixes shorter than this are indexed as character
+ /// ngrams (increasing index size but making lookups
+ /// faster). </param>
+ /// <param name="commitOnBuild"> Call commit after the index has
finished building. This
+ /// would persist the suggester index to disk and future instances of
this suggester can
+ /// use this pre-built dictionary. </param>
+ /// <param name="indexWriterConfigFactory"> Factory for creating the
<see cref="IndexWriterConfig"/>. </param>
+ // LUCENENET specific - added indexWriterConfigFactory parameter to
allow for customizing the index writer config.
+ public
AnalyzingInfixSuggester(IAnalyzingInfixSuggesterIndexWriterConfigFactory
indexWriterConfigFactory, LuceneVersion matchVersion,
+ Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int
minPrefixChars, bool commitOnBuild)
{
if (minPrefixChars < 0)
{
throw new ArgumentOutOfRangeException(nameof(minPrefixChars),
"minPrefixChars must be >= 0; got: " + minPrefixChars);// LUCENENET specific -
changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET
convention)
}
+ if (indexWriterConfigFactory is null) throw new
ArgumentNullException(nameof(indexWriterConfigFactory));
+
this.m_queryAnalyzer = queryAnalyzer;
this.m_indexAnalyzer = indexAnalyzer;
this.matchVersion = matchVersion;
this.dir = dir;
this.minPrefixChars = minPrefixChars;
this.commitOnBuild = commitOnBuild;
+ this.indexWriterConfigFactory = indexWriterConfigFactory;
if (DirectoryReader.IndexExists(dir))
{
// Already built; open it:
- writer = new IndexWriter(dir,
GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.APPEND));
+ var config = indexWriterConfigFactory.Get(matchVersion,
GetGramAnalyzer(), OpenMode.APPEND);
+ writer = new IndexWriter(dir, config);
m_searcherMgr = new SearcherManager(writer, true, null);
}
}
- /// <summary>
- /// Override this to customize index settings, e.g. which
- /// codec to use.
- /// </summary>
- protected internal virtual IndexWriterConfig
GetIndexWriterConfig(LuceneVersion matchVersion,
- Analyzer indexAnalyzer, OpenMode openMode)
- {
- IndexWriterConfig iwc = new IndexWriterConfig(matchVersion,
indexAnalyzer)
- {
- Codec = new Lucene46Codec(),
- OpenMode = openMode
- };
-
- // This way all merged segments will be sorted at
- // merge time, allow for per-segment early termination
- // when those segments are searched:
- iwc.MergePolicy = new SortingMergePolicy(iwc.MergePolicy, SORT);
-
- return iwc;
- }
+ /// LUCENENET specific - moved IndexWriterConfig GetIndexWriterConfig
to
+ /// <see cref="AnalyzingInfixSuggesterIndexWriterConfigFactory"/> class
+ /// to allow for customizing the index writer config.
/// <summary>
/// Subclass can override to choose a specific
@@ -234,7 +245,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
{
// First pass: build a temporary normal Lucene index,
// just indexing the suggestions as they iterate:
- writer = new IndexWriter(dir,
GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
+ writer = new IndexWriter(dir,
indexWriterConfigFactory.Get(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
//long t0 = System.nanoTime();
// TODO: use threads?
@@ -339,7 +350,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
m_searcherMgr.Dispose();
m_searcherMgr = null;
}
- writer = new IndexWriter(dir,
GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
+ writer = new IndexWriter(dir,
indexWriterConfigFactory.Get(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
m_searcherMgr = new SearcherManager(writer, true, null);
}
}
diff --git
a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggesterIndexWriterConfigFactory.cs
b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggesterIndexWriterConfigFactory.cs
new file mode 100644
index 000000000..f541e79ea
--- /dev/null
+++
b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggesterIndexWriterConfigFactory.cs
@@ -0,0 +1,74 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Codecs.Lucene46;
+using Lucene.Net.Index;
+using Lucene.Net.Index.Sorter;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Suggest.Analyzing
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Generic interface that can be used to customize the index writer to
+ /// be used by <see cref="AnalyzingInfixSuggester"/>.
+ /// <para/>
+ /// This class is specific to Lucene.NET, where factory classes are used
to allow customization
+ /// as opposed to making virtual method calls from the constructor
+ /// </summary>
+ public interface IAnalyzingInfixSuggesterIndexWriterConfigFactory
+ {
+ IndexWriterConfig Get(LuceneVersion matchVersion, Analyzer
indexAnalyzer, OpenMode openMode);
+ }
+
+ /// <summary>
+ /// Default <see cref="IndexWriterConfig"/> factory for <see
cref="AnalyzingInfixSuggester"/>.
+ /// <para/>
+ /// </summary>
+ public class AnalyzingInfixSuggesterIndexWriterConfigFactory :
IAnalyzingInfixSuggesterIndexWriterConfigFactory
+ {
+ private Sort sort;
+
+ /// <summary>
+ /// Creates a new config factory that uses the given <see
cref="Sort"/> in the sorting merge policy
+ /// </summary>
+ public AnalyzingInfixSuggesterIndexWriterConfigFactory(Sort sort)
+ {
+ this.sort = sort;
+ }
+
+ /// <summary>
+ /// Override this to customize index settings, e.g. which
+ /// codec to use.
+ /// </summary>
+ public virtual IndexWriterConfig Get(LuceneVersion matchVersion,
Analyzer indexAnalyzer, OpenMode openMode)
+ {
+ IndexWriterConfig iwc = new IndexWriterConfig(matchVersion,
indexAnalyzer)
+ {
+ Codec = new Lucene46Codec(),
+ OpenMode = openMode
+ };
+
+ // This way all merged segments will be sorted at
+ // merge time, allow for per-segment early termination
+ // when those segments are searched:
+ iwc.MergePolicy = new SortingMergePolicy(iwc.MergePolicy, sort);
+
+ return iwc;
+ }
+ }
+}
\ No newline at end of file