This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new d249593e5af [SOLR-17726] Fix CloudMLTQParser to support copyField in
qf (#3328)
d249593e5af is described below
commit d249593e5affaa9795bc7c9c6e2218e31203eee4
Author: Ilaria Petreti <[email protected]>
AuthorDate: Mon Jun 30 10:30:49 2025 +0200
[SOLR-17726] Fix CloudMLTQParser to support copyField in qf (#3328)
using copy field source for more like this + tests
---------
Co-authored-by: Alessandro Benedetti <[email protected]>
---
solr/CHANGES.txt | 2 +
.../apache/solr/search/mlt/CloudMLTQParser.java | 20 +++-
.../solr/configsets/cloud-dynamic/conf/schema.xml | 11 ++-
.../solr/search/mlt/CloudMLTQParserTest.java | 108 +++++++++++++++++++++
4 files changed, 138 insertions(+), 3 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5e715f65ea4..db12f5cb3f0 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -325,6 +325,8 @@ Bug Fixes
* SOLR-17790: Allow the -j or --jettyconfig option to start with a dash (-).
(Houston Putman)
+* SOLR-17726: MoreLikeThis to support copy-fields (Ilaria Petreti via
Alessandro Benedetti)
+
Dependency Upgrades
---------------------
* SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 90ff5949138..6cdd3f9b9a2 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -67,7 +67,8 @@ public class CloudMLTQParser extends SimpleMLTQParser {
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
for (String field : moreLikeThis.getFieldNames()) {
- Collection<Object> fieldValues = doc.getFieldValues(field);
+ Collection<Object> fieldValues = getFieldValuesIncludingCopyField(doc,
field);
+
if (fieldValues != null) {
Collection<Object> values = new ArrayList<>();
for (Object val : fieldValues) {
@@ -110,4 +111,21 @@ public class CloudMLTQParser extends SimpleMLTQParser {
return (SolrDocument) response.get("doc");
}
+
+ private Collection<Object> getFieldValuesIncludingCopyField(SolrDocument
doc, String field) {
+ Collection<Object> fieldValues = doc.getFieldValues(field);
+ if (fieldValues != null) return fieldValues;
+ // Fields created using copyField are not included in documents returned
by RealTime Get.
+ // So if a copyField destination is used in the MLT query (qf), we need to
get the values
+ // from its source field instead. If there are multiple source fields,
their values must be
+ // combined.
+ Collection<Object> combinedValues = new ArrayList<>();
+ for (String sourceField : req.getSchema().getCopySources(field)) {
+ Collection<Object> sourceValues = doc.getFieldValues(sourceField);
+ if (sourceValues != null) {
+ combinedValues.addAll(sourceValues);
+ }
+ }
+ return combinedValues.isEmpty() ? null : combinedValues;
+ }
}
diff --git
a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
index 58e6405959a..5c26be38b8a 100644
--- a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
+++ b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
@@ -224,11 +224,18 @@
<field name="uniq3" type="string" indexed="true" stored="true"/>
<field name="nouniq" type="string" indexed="true" stored="true"
multiValued="true"/>
+ <field name="payload" type="sortable_binary" indexed="false"
+ stored="true" multiValued="false"/>
+ <!-- to test copyField in MLT qf -->
<field name="copyfield_source" type="string" indexed="true" stored="true"
multiValued="true"/>
+ <field name="copyfield_dest" type="nametext" indexed="true" stored="true"
multiValued="true"/>
+ <copyField source="copyfield_source" dest="copyfield_dest" />
- <field name="payload" type="sortable_binary" indexed="false"
- stored="true" multiValued="false"/>
+ <field name="copyfield_source_2" type="string" indexed="true" stored="true"
multiValued="true"/>
+ <field name="copyfield_dest_multiple_sources" type="nametext" indexed="true"
stored="true" multiValued="true"/>
+ <copyField source="copyfield_source" dest="copyfield_dest_multiple_sources"
/>
+ <copyField source="copyfield_source_2"
dest="copyfield_dest_multiple_sources" />
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>
diff --git
a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
index 0ccf8862969..aa413cd37eb 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
@@ -27,6 +27,7 @@ import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -50,6 +51,8 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
String id = "id";
String FIELD1 = "lowerfilt_u";
String FIELD2 = "lowerfilt1_u";
+ String FIELD3 = "copyfield_source";
+ String FIELD4 = "copyfield_source_2";
new UpdateRequest()
.add(sdoc(id, "1", FIELD1, "toyota"))
@@ -119,6 +122,9 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
"The slim red fox jumped over the lazy brown dogs.",
FIELD2,
"yellow white black"))
+ .add(sdoc(id, "33", FIELD3, "hard rock", FIELD4, "instrumental
version"))
+ .add(sdoc(id, "34", FIELD3, "hard rock", FIELD4, "instrumental
version"))
+ .add(sdoc(id, "35", FIELD3, "pop rock"))
.commit(client, COLLECTION);
}
@@ -339,4 +345,106 @@ public class CloudMLTQParserTest extends
SolrCloudTestCase {
.getSolrClient()
.query(COLLECTION, new SolrQuery("{!mlt
qf=lowerfilt_u}999999")));
}
+
+ @Test
+ public void testUsesACopyFieldInQf_shouldReturnExpectResults() throws
Exception {
+ // Verifies that when a copyField destination is used in the qf parameter,
the field values are
+ // correctly retrieved from the source field(s) and the MLT query returns
the expected results.
+ QueryResponse queryResponse =
+ cluster
+ .getSolrClient()
+ .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0
mintf=1}33"));
+ SolrDocumentList solrDocuments = queryResponse.getResults();
+ int[] expectedIds = new int[] {34, 35};
+ int[] actualIds = new int[solrDocuments.size()];
+ int i = 0;
+ for (SolrDocument solrDocument : solrDocuments) {
+ actualIds[i++] =
Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
+ }
+
+ Arrays.sort(actualIds);
+ Arrays.sort(expectedIds);
+ assertArrayEquals(expectedIds, actualIds);
+ }
+
+ @Test
+ public void testUsesACopyFieldInQf_shouldGenerateNonEmptyQuery() throws
Exception {
+ // Verifies that the MLT query correctly uses the content of the source
field(s) when a
+ // copyField destination is specified in the qf parameter.
+ QueryResponse queryResponse =
+ cluster
+ .getSolrClient()
+ .query(
+ COLLECTION,
+ new SolrQuery("{!mlt qf=copyfield_dest mindf=0
mintf=1}33").setShowDebugInfo(true));
+
+ NamedList<?> debugInfo = (NamedList<?>)
queryResponse.getResponse().get("debug");
+ // Extract the parsed query string
+ String parsedQuery = (String) debugInfo.get("parsedquery_toString");
+ // Assert it matches the expected query string
+ assertEquals("+(copyfield_dest:rock copyfield_dest:hard) -id:33",
parsedQuery);
+ // Assert it is not the incorrect fallback
+ assertNotEquals("+() -id:33", parsedQuery);
+ }
+
+ @Test
+ public void testCopyFieldSourceMissing_shouldReturnNoResults() throws
Exception {
+ // Ensures that no results are returned when the copyField source field is
missing in the source
+ // document.
+ QueryResponse queryResponse =
+ cluster
+ .getSolrClient()
+ .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0
mintf=1}30"));
+ SolrDocumentList solrDocuments = queryResponse.getResults();
+ assertEquals("Expected no results if source field is missing", 0,
solrDocuments.size());
+ }
+
+ @Test
+ public void testCopyFieldDestinMultipleSources_shouldReturnExpectResults()
throws Exception {
+ // Validates that when multiple source fields map to a single copyField
destination, their
+ // values are correctly combined and expected results are returned.
+ QueryResponse queryResponse =
+ cluster
+ .getSolrClient()
+ .query(
+ COLLECTION,
+ new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources
mindf=0 mintf=1}33"));
+ SolrDocumentList solrDocuments = queryResponse.getResults();
+ int[] expectedIds = new int[] {34, 35};
+ int[] actualIds = new int[solrDocuments.size()];
+ int i = 0;
+ for (SolrDocument solrDocument : solrDocuments) {
+ actualIds[i++] =
Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
+ }
+
+ Arrays.sort(actualIds);
+ Arrays.sort(expectedIds);
+ assertArrayEquals(expectedIds, actualIds);
+ }
+
+ @Test
+ public void
+
testCopyFieldDestinationMultipleSources_shouldGenerateQueryUsingMultipleSourcesValues()
+ throws Exception {
+ // Validates that when multiple source fields map to a single copyField
destination, their
+ // values are
+ // correctly combined and the resulting MLT query is properly constructed.
+ QueryResponse queryResponse =
+ cluster
+ .getSolrClient()
+ .query(
+ COLLECTION,
+ new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources
mindf=0 mintf=1}33")
+ .setShowDebugInfo(true));
+
+ NamedList<?> debugInfo = (NamedList<?>)
queryResponse.getResponse().get("debug");
+ // Extract the parsed query string
+ String parsedQuery = (String) debugInfo.get("parsedquery_toString");
+ // Assert it matches the expected query string
+ assertEquals(
+ "+(copyfield_dest_multiple_sources:rock
copyfield_dest_multiple_sources:version copyfield_dest_multiple_sources:hard
copyfield_dest_multiple_sources:instrumental) -id:33",
+ parsedQuery);
+ // Assert it is not the incorrect fallback
+ assertNotEquals("+() -id:33", parsedQuery);
+ }
}