This is an automated email from the ASF dual-hosted git repository.
mkhl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 05b66b9669d SOLR-16682: transfer MLT Component queries via {!bool}
(#1260)
05b66b9669d is described below
commit 05b66b9669de064ef9b64733200b8916136a3dba
Author: Mikhail Khludnev <[email protected]>
AuthorDate: Tue Feb 28 16:24:55 2023 +0300
SOLR-16682: transfer MLT Component queries via {!bool} (#1260)
---------
Co-authored-by: David Smiley <[email protected]>
---
solr/CHANGES.txt | 3 +
.../apache/solr/handler/MoreLikeThisHandler.java | 93 ++++++---------
.../handler/component/MoreLikeThisComponent.java | 126 +++++++++++++--------
.../component/DistributedMLTComponentTest.java | 28 ++++-
4 files changed, 142 insertions(+), 108 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8b8fd7cceef..97ff54c8134 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -225,6 +225,9 @@ Bug Fixes
* SOLR-16679: Fix solr.jetty.ssl.verifyClientHostName logging (Kevin Risden)
+* SOLR-16682: MoreLikeThis Component fails with SyntaxError: Cannot parse if
document terms contains symbols from query parser syntax
+ (Mikhail Khludnev)
+
Build
---------------------
* Upgrade forbiddenapis to 3.4 (Uwe Schindler)
diff --git
a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
index 2192aa9cabe..47ca8e3108b 100644
--- a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
@@ -52,7 +52,6 @@ import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
@@ -130,8 +129,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase
{
// Hold on to the interesting terms if relevant
TermStyle termStyle =
TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS));
- List<InterestingTerm> interesting =
- (termStyle == TermStyle.NONE) ? null : new
ArrayList<>(mlt.mlt.getMaxQueryTerms());
DocListAndSet mltDocs = null;
@@ -159,7 +156,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
{
// Find documents MoreLikeThis - either with a reader or a query
//
--------------------------------------------------------------------------------
if (reader != null) {
- mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters,
interesting, flags);
+ mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters, flags);
} else if (q != null) {
// Matching options
boolean includeMatch =
params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
@@ -177,7 +174,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
{
if (iterator.hasNext()) {
// do a MoreLikeThis query for each document in results
int id = iterator.nextDoc();
- mltDocs = mlt.getMoreLikeThis(id, start, rows, filters,
interesting, flags);
+ mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, flags);
}
} else {
throw new SolrException(
@@ -195,7 +192,9 @@ public class MoreLikeThisHandler extends RequestHandlerBase
{
}
rsp.addResponse(mltDocs.docList);
- if (interesting != null) {
+ if (termStyle != TermStyle.NONE) {
+ final List<InterestingTerm> interesting =
+ mlt.getInterestingTerms(mlt.getBoostedMLTQuery(),
mlt.mlt.getMaxQueryTerms());
if (termStyle == TermStyle.DETAILS) {
NamedList<Float> it = new NamedList<>();
for (InterestingTerm t : interesting) {
@@ -351,14 +350,14 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
}
private Query rawMLTQuery;
- private Query boostedMLTQuery;
+ private BooleanQuery boostedMLTQuery;
private BooleanQuery realMLTQuery;
public Query getRawMLTQuery() {
return rawMLTQuery;
}
- public Query getBoostedMLTQuery() {
+ public BooleanQuery getBoostedMLTQuery() {
return boostedMLTQuery;
}
@@ -366,7 +365,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
{
return realMLTQuery;
}
- private Query getBoostedQuery(Query mltquery) {
+ private BooleanQuery getBoostedQuery(Query mltquery) {
BooleanQuery boostedQuery = (BooleanQuery) mltquery;
if (boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
@@ -392,18 +391,13 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
}
public DocListAndSet getMoreLikeThis(
- int id, int start, int rows, List<Query> filters,
List<InterestingTerm> terms, int flags)
- throws IOException {
+ int id, int start, int rows, List<Query> filters, int flags) throws
IOException {
Document doc = reader.document(id);
- rawMLTQuery = mlt.like(id);
- boostedMLTQuery = getBoostedQuery(rawMLTQuery);
- if (terms != null) {
- fillInterestingTermsFromMLTQuery(boostedMLTQuery, terms);
- }
+ final Query boostedQuery = getBoostedMLTQuery(id);
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
- realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
+ realMLTQuery.add(boostedQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(
new TermQuery(
new Term(
@@ -423,14 +417,15 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
return results;
}
+ /** Sets {@link #boostedMLTQuery} and returns it */
+ public BooleanQuery getBoostedMLTQuery(int docNum) throws IOException {
+ rawMLTQuery = mlt.like(docNum);
+ boostedMLTQuery = getBoostedQuery(rawMLTQuery);
+ return boostedMLTQuery;
+ }
+
public DocListAndSet getMoreLikeThis(
- Reader reader,
- int start,
- int rows,
- List<Query> filters,
- List<InterestingTerm> terms,
- int flags)
- throws IOException {
+ Reader reader, int start, int rows, List<Query> filters, int flags)
throws IOException {
// SOLR-5351: if only check against a single field, use the reader
directly. Otherwise we
// repeat the stream's content for multiple fields so that query terms
can be pulled from any
// of those fields.
@@ -450,14 +445,9 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
for (String field : fields) {
multifieldDoc.put(field, streamValue);
}
-
rawMLTQuery = mlt.like(multifieldDoc);
}
-
boostedMLTQuery = getBoostedQuery(rawMLTQuery);
- if (terms != null) {
- fillInterestingTermsFromMLTQuery(boostedMLTQuery, terms);
- }
DocListAndSet results = new DocListAndSet();
if (this.needDocSet) {
results = searcher.getDocListAndSet(boostedMLTQuery, filters, null,
start, rows, flags);
@@ -466,37 +456,19 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
}
return results;
}
-
- public NamedList<BooleanQuery> getMoreLikeTheseQuery(DocList docs) throws
IOException {
- IndexSchema schema = searcher.getSchema();
- NamedList<BooleanQuery> result = new NamedList<>();
- DocIterator iterator = docs.iterator();
- while (iterator.hasNext()) {
- int id = iterator.nextDoc();
- String uniqueId = schema.printableUniqueKey(reader.document(id));
-
- BooleanQuery mltquery = (BooleanQuery) mlt.like(id);
- if (mltquery.clauses().size() == 0) {
- return result;
- }
- mltquery = (BooleanQuery) getBoostedQuery(mltquery);
-
- // exclude current document from results
- BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
- mltQuery.add(mltquery, BooleanClause.Occur.MUST);
-
- mltQuery.add(
- new TermQuery(new Term(uniqueKeyField.getName(), uniqueId)),
- BooleanClause.Occur.MUST_NOT);
- result.add(uniqueId, mltQuery.build());
- }
-
- return result;
- }
-
- private void fillInterestingTermsFromMLTQuery(Query query,
List<InterestingTerm> terms) {
- Collection<BooleanClause> clauses = ((BooleanQuery) query).clauses();
+ /**
+ * Yields terms with boosts from the boosted MLT query.
+ *
+ * @param maxTerms how many terms to return, a negative value means all
terms are returned
+ */
+ public List<InterestingTerm> getInterestingTerms(BooleanQuery
boostedMLTQuery, int maxTerms) {
+ assert boostedMLTQuery != null : "strictly expecting it's set";
+ Collection<BooleanClause> clauses = boostedMLTQuery.clauses();
+ List<InterestingTerm> output = new ArrayList<>(maxTerms < 0 ?
clauses.size() : maxTerms);
for (BooleanClause o : clauses) {
+ if (maxTerms > -1 && output.size() >= maxTerms) {
+ break;
+ }
Query q = o.getQuery();
float boost = 1f;
if (q instanceof BoostQuery) {
@@ -507,10 +479,11 @@ public class MoreLikeThisHandler extends
RequestHandlerBase {
InterestingTerm it = new InterestingTerm();
it.boost = boost;
it.term = ((TermQuery) q).getTerm();
- terms.add(it);
+ output.add(it);
}
// alternatively we could use
// mltquery.extractTerms( terms );
+ return output;
}
public MoreLikeThis getMoreLikeThis() {
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
index 0f220e0d99a..e272c646116 100644
---
a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
+++
b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
@@ -23,13 +23,16 @@ import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
-import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
@@ -95,24 +98,24 @@ public class MoreLikeThisComponent extends SearchComponent {
rb.rsp.add("moreLikeThis", new NamedList<DocList>());
return;
}
-
MoreLikeThisHandler.MoreLikeThisHelper mlt =
new MoreLikeThisHandler.MoreLikeThisHelper(params, searcher);
-
- NamedList<BooleanQuery> bQuery =
mlt.getMoreLikeTheseQuery(rb.getResults().docList);
-
- NamedList<String> temp = new NamedList<>();
- Iterator<Entry<String, BooleanQuery>> idToQueryIt =
bQuery.iterator();
-
- while (idToQueryIt.hasNext()) {
- Entry<String, BooleanQuery> idToQuery = idToQueryIt.next();
- String s = idToQuery.getValue().toString();
-
- log.debug("MLT Query:{}", s);
- temp.add(idToQuery.getKey(), idToQuery.getValue().toString());
+ NamedList<NamedList<?>> mltQueryByDocKey = new NamedList<>();
+ for (DocIterator results = rb.getResults().docList.iterator();
results.hasNext(); ) {
+ int docId = results.nextDoc();
+ final List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
+ mlt.getInterestingTerms(mlt.getBoostedMLTQuery(docId), -1);
+ if (interestingTerms.isEmpty()) {
+ continue;
+ }
+ final String uniqueKey =
rb.req.getSchema().getUniqueKeyField().getName();
+ final Document document = rb.req.getSearcher().doc(docId);
+ final String uniqueVal =
rb.req.getSchema().printableUniqueKey(document);
+ final NamedList<String> mltQ =
+ mltViaQueryParams(rb.req.getSchema(), interestingTerms,
uniqueKey, uniqueVal);
+ mltQueryByDocKey.add(uniqueVal, mltQ);
}
-
- rb.rsp.add("moreLikeThis", temp);
+ rb.rsp.add("moreLikeThis", mltQueryByDocKey);
} else {
NamedList<DocList> sim =
getMoreLikeThese(rb, rb.req.getSearcher(),
rb.getResults().docList, flags);
@@ -127,6 +130,53 @@ public class MoreLikeThisComponent extends SearchComponent
{
}
}
+ private static NamedList<String> mltViaQueryParams(
+ IndexSchema schema,
+ List<MoreLikeThisHandler.InterestingTerm> terms,
+ String uniqueField,
+ String uniqueVal) {
+ final NamedList<String> mltQ = new NamedList<>();
+ StringBuilder q = new StringBuilder("{!bool");
+ q.append(" must_not=$");
+ int cnt = 0;
+ String param = "mltq" + (cnt++);
+ q.append(param);
+ mltQ.add(param, "{!field f=" + uniqueField + "}" + uniqueVal);
+ final StringBuilder reuseStr = new StringBuilder();
+ final CharsRefBuilder reuseChar = new CharsRefBuilder();
+ for (MoreLikeThisHandler.InterestingTerm term : terms) {
+ param = "mltq" + (cnt++);
+ q.append(" should=$");
+ q.append(param);
+ mltQ.add(param, toParserParam(schema, term.term, term.boost, reuseStr,
reuseChar));
+ }
+ q.append("}");
+ mltQ.add(CommonParams.Q, q.toString());
+ return mltQ;
+ }
+
+ private static String toParserParam(
+ IndexSchema schema,
+ Term term1,
+ float boost,
+ StringBuilder reuseStr,
+ CharsRefBuilder reuseChar) {
+ reuseStr.setLength(0);
+ if (boost != 1f) {
+ reuseStr.append("{!boost b=");
+ reuseStr.append(boost);
+ reuseStr.append("}");
+ }
+ final String field = term1.field();
+ final CharsRef val =
+ schema.getField(field).getType().indexedToReadable(term1.bytes(),
reuseChar);
+ reuseStr.append("{!term f=");
+ reuseStr.append(ClientUtils.encodeLocalParamVal(field));
+ reuseStr.append("}");
+ reuseStr.append(val);
+ return reuseStr.toString();
+ }
+
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0
@@ -139,17 +189,18 @@ public class MoreLikeThisComponent extends
SearchComponent {
// This should only happen in case of using shards.tolerant=true.
Omit this ShardResponse
continue;
}
- NamedList<?> moreLikeThisReponse =
- (NamedList<?>)
r.getSolrResponse().getResponse().get("moreLikeThis");
+ @SuppressWarnings("unchecked")
+ NamedList<NamedList<String>> moreLikeThisReponse =
+ (NamedList<NamedList<String>>)
r.getSolrResponse().getResponse().get("moreLikeThis");
if (log.isDebugEnabled()) {
log.debug("ShardRequest.response.shard: {}", r.getShard());
}
if (moreLikeThisReponse != null) {
- for (Entry<String, ?> entry : moreLikeThisReponse) {
+ for (Entry<String, NamedList<String>> entry : moreLikeThisReponse) {
if (log.isDebugEnabled()) {
log.debug("id: '{}' Query: '{}'", entry.getKey(),
entry.getValue());
}
- ShardRequest s = buildShardQuery(rb, (String) entry.getValue(),
entry.getKey());
+ ShardRequest s = buildShardQuery(rb, entry.getValue(),
entry.getKey());
rb.addRequest(this, s);
}
}
@@ -309,7 +360,7 @@ public class MoreLikeThisComponent extends SearchComponent {
return result;
}
- ShardRequest buildShardQuery(ResponseBuilder rb, String q, String key) {
+ ShardRequest buildShardQuery(ResponseBuilder rb, NamedList<String> q, String
key) {
ShardRequest s = new ShardRequest();
s.params = new ModifiableSolrParams(rb.req.getParams());
s.purpose |= ShardRequest.PURPOSE_GET_MLT_RESULTS;
@@ -337,24 +388,9 @@ public class MoreLikeThisComponent extends SearchComponent
{
s.params.set(CommonParams.FL, "score," + id);
s.params.set(SORT, "score desc");
// MLT Query is submitted as normal query to shards.
- s.params.set(CommonParams.Q, q);
-
- return s;
- }
-
- ShardRequest buildMLTQuery(ResponseBuilder rb, String q) {
- ShardRequest s = new ShardRequest();
- s.params = new ModifiableSolrParams();
-
- s.params.set(CommonParams.START, 0);
-
- String id = rb.req.getSchema().getUniqueKeyField().getName();
-
- s.params.set(CommonParams.FL, "score," + id);
- // MLT Query is submitted as normal query to shards.
- s.params.set(CommonParams.Q, q);
+ s.params.remove(CommonParams.Q);
+ q.forEach((k, v) -> s.params.add(k, v));
- s.shards = ShardRequest.ALL_SHARDS;
return s;
}
@@ -375,12 +411,8 @@ public class MoreLikeThisComponent extends SearchComponent
{
SimpleOrderedMap<Object> interestingTermsResponse = null;
MoreLikeThisParams.TermStyle interestingTermsConfig =
MoreLikeThisParams.TermStyle.get(p.get(MoreLikeThisParams.INTERESTING_TERMS));
- List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
- (interestingTermsConfig == MoreLikeThisParams.TermStyle.NONE)
- ? null
- : new ArrayList<>(mltHelper.getMoreLikeThis().getMaxQueryTerms());
- if (interestingTerms != null) {
+ if (interestingTermsConfig != MoreLikeThisParams.TermStyle.NONE) {
interestingTermsResponse = new SimpleOrderedMap<>();
}
@@ -388,8 +420,7 @@ public class MoreLikeThisComponent extends SearchComponent {
int id = iterator.nextDoc();
int rows = p.getInt(MoreLikeThisParams.DOC_COUNT, 5);
- DocListAndSet similarDocuments =
- mltHelper.getMoreLikeThis(id, 0, rows, null, interestingTerms,
flags);
+ DocListAndSet similarDocuments = mltHelper.getMoreLikeThis(id, 0, rows,
null, flags);
String name = schema.printableUniqueKey(searcher.doc(id));
mltResponse.add(name, similarDocuments.docList);
@@ -410,6 +441,9 @@ public class MoreLikeThisComponent extends SearchComponent {
}
if (interestingTermsResponse != null) {
+ List<MoreLikeThisHandler.InterestingTerm> interestingTerms =
+ mltHelper.getInterestingTerms(
+ mltHelper.getBoostedMLTQuery(),
mltHelper.getMoreLikeThis().getMaxQueryTerms());
if (interestingTermsConfig == MoreLikeThisParams.TermStyle.DETAILS) {
SimpleOrderedMap<Float> interestingTermsWithScore = new
SimpleOrderedMap<>();
for (MoreLikeThisHandler.InterestingTerm interestingTerm :
interestingTerms) {
diff --git
a/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
b/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
index bf803e3b2b1..2d5de7e3bd3 100644
---
a/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
+++
b/solr/core/src/test/org/apache/solr/handler/component/DistributedMLTComponentTest.java
@@ -91,7 +91,7 @@ public class DistributedMLTComponentTest extends
BaseDistributedSearchTestCase {
id,
"9",
"lowerfilt",
- "The quick red fox jumped over the lazy big and large brown dogs.",
+ "The quick red:fox jumped over the lazy big and large brown dogs.",
"lowerfilt1",
"x");
index(id, "10", "lowerfilt", "blue", "lowerfilt1", "x");
@@ -100,7 +100,7 @@ public class DistributedMLTComponentTest extends
BaseDistributedSearchTestCase {
id,
"13",
"lowerfilt",
- "The quote red fox jumped over the lazy brown dogs.",
+ "The quote RED)FOX jumped over the lazy brown dogs.",
"lowerfilt1",
"y");
index(
@@ -389,5 +389,29 @@ public class DistributedMLTComponentTest extends
BaseDistributedSearchTestCase {
Long actual = ((SolrDocumentList) entry.getValue()).getNumFound();
assertEquals("MLT mismatch for id=" + key, expected, actual);
}
+ // test boost mlt.qf
+ query(
+ "q",
+ "lowerfilt:moon",
+ "fl",
+ id,
+ MoreLikeThisParams.MIN_TERM_FREQ,
+ 2,
+ MoreLikeThisParams.MIN_DOC_FREQ,
+ 1,
+ "sort",
+ "id_i1 desc",
+ "mlt",
+ "true",
+ "mlt.fl",
+ "lowerfilt1,lowerfilt",
+ "mlt.qf",
+ "lowerfilt1^1.2 lowerfilt^3.4",
+ "qt",
+ requestHandlerName,
+ "shards.qt",
+ requestHandlerName,
+ "mlt.count",
+ "20");
}
}