This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new d249593e5af [SOLR-17726] Fix CloudMLTQParser to support copyField in 
qf (#3328)
d249593e5af is described below

commit d249593e5affaa9795bc7c9c6e2218e31203eee4
Author: Ilaria Petreti <[email protected]>
AuthorDate: Mon Jun 30 10:30:49 2025 +0200

    [SOLR-17726] Fix CloudMLTQParser to support copyField in qf (#3328)
    
    using copy field source for more like this + tests
    ---------
    
    Co-authored-by: Alessandro Benedetti <[email protected]>
---
 solr/CHANGES.txt                                   |   2 +
 .../apache/solr/search/mlt/CloudMLTQParser.java    |  20 +++-
 .../solr/configsets/cloud-dynamic/conf/schema.xml  |  11 ++-
 .../solr/search/mlt/CloudMLTQParserTest.java       | 108 +++++++++++++++++++++
 4 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5e715f65ea4..db12f5cb3f0 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -325,6 +325,8 @@ Bug Fixes
 
 * SOLR-17790: Allow the -j or --jettyconfig option to start with a dash (-). 
(Houston Putman)
 
+* SOLR-17726: MoreLikeThis to support copy-fields (Ilaria Petreti via 
Alessandro Benedetti)
+
 Dependency Upgrades
 ---------------------
 * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java 
b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 90ff5949138..6cdd3f9b9a2 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -67,7 +67,8 @@ public class CloudMLTQParser extends SimpleMLTQParser {
     Map<String, Collection<Object>> filteredDocument = new HashMap<>();
 
     for (String field : moreLikeThis.getFieldNames()) {
-      Collection<Object> fieldValues = doc.getFieldValues(field);
+      Collection<Object> fieldValues = getFieldValuesIncludingCopyField(doc, 
field);
+
       if (fieldValues != null) {
         Collection<Object> values = new ArrayList<>();
         for (Object val : fieldValues) {
@@ -110,4 +111,21 @@ public class CloudMLTQParser extends SimpleMLTQParser {
 
     return (SolrDocument) response.get("doc");
   }
+
+  private Collection<Object> getFieldValuesIncludingCopyField(SolrDocument 
doc, String field) {
+    Collection<Object> fieldValues = doc.getFieldValues(field);
+    if (fieldValues != null) return fieldValues;
+    // Fields created using copyField are not included in documents returned 
by RealTime Get.
+    // So if a copyField destination is used in the MLT query (qf), we need to 
get the values
+    // from its source field instead. If there are multiple source fields, 
their values must be
+    // combined.
+    Collection<Object> combinedValues = new ArrayList<>();
+    for (String sourceField : req.getSchema().getCopySources(field)) {
+      Collection<Object> sourceValues = doc.getFieldValues(sourceField);
+      if (sourceValues != null) {
+        combinedValues.addAll(sourceValues);
+      }
+    }
+    return combinedValues.isEmpty() ? null : combinedValues;
+  }
 }
diff --git 
a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml 
b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
index 58e6405959a..5c26be38b8a 100644
--- a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
+++ b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml
@@ -224,11 +224,18 @@
   <field name="uniq3" type="string" indexed="true" stored="true"/>
   <field name="nouniq" type="string" indexed="true" stored="true" 
multiValued="true"/>
 
+  <field name="payload" type="sortable_binary" indexed="false"
+         stored="true" multiValued="false"/>
 
+  <!-- to test copyField in MLT qf -->
   <field name="copyfield_source" type="string" indexed="true" stored="true" 
multiValued="true"/>
+  <field name="copyfield_dest" type="nametext" indexed="true" stored="true" 
multiValued="true"/>
+  <copyField source="copyfield_source" dest="copyfield_dest" />
 
-  <field name="payload" type="sortable_binary" indexed="false"
-         stored="true" multiValued="false"/>
+  <field name="copyfield_source_2" type="string" indexed="true" stored="true" 
multiValued="true"/>
+  <field name="copyfield_dest_multiple_sources" type="nametext" indexed="true" 
stored="true" multiValued="true"/>
+  <copyField source="copyfield_source" dest="copyfield_dest_multiple_sources" 
/>
+  <copyField source="copyfield_source_2" 
dest="copyfield_dest_multiple_sources" />
 
   <!-- for versioning -->
   <field name="_version_" type="long" indexed="true" stored="true"/>
diff --git 
a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java 
b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
index 0ccf8862969..aa413cd37eb 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
@@ -27,6 +27,7 @@ import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -50,6 +51,8 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
     String id = "id";
     String FIELD1 = "lowerfilt_u";
     String FIELD2 = "lowerfilt1_u";
+    String FIELD3 = "copyfield_source";
+    String FIELD4 = "copyfield_source_2";
 
     new UpdateRequest()
         .add(sdoc(id, "1", FIELD1, "toyota"))
@@ -119,6 +122,9 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
                 "The slim red fox jumped over the lazy brown dogs.",
                 FIELD2,
                 "yellow white black"))
+        .add(sdoc(id, "33", FIELD3, "hard rock", FIELD4, "instrumental 
version"))
+        .add(sdoc(id, "34", FIELD3, "hard rock", FIELD4, "instrumental 
version"))
+        .add(sdoc(id, "35", FIELD3, "pop rock"))
         .commit(client, COLLECTION);
   }
 
@@ -339,4 +345,106 @@ public class CloudMLTQParserTest extends 
SolrCloudTestCase {
                 .getSolrClient()
                 .query(COLLECTION, new SolrQuery("{!mlt 
qf=lowerfilt_u}999999")));
   }
+
+  @Test
+  public void testUsesACopyFieldInQf_shouldReturnExpectResults() throws 
Exception {
+    // Verifies that when a copyField destination is used in the qf parameter, 
the field values are
+    // correctly retrieved from the source field(s) and the MLT query returns 
the expected results.
+    QueryResponse queryResponse =
+        cluster
+            .getSolrClient()
+            .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 
mintf=1}33"));
+    SolrDocumentList solrDocuments = queryResponse.getResults();
+    int[] expectedIds = new int[] {34, 35};
+    int[] actualIds = new int[solrDocuments.size()];
+    int i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = 
Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+
+    Arrays.sort(actualIds);
+    Arrays.sort(expectedIds);
+    assertArrayEquals(expectedIds, actualIds);
+  }
+
+  @Test
+  public void testUsesACopyFieldInQf_shouldGenerateNonEmptyQuery() throws 
Exception {
+    // Verifies that the MLT query correctly uses the content of the source 
field(s) when a
+    // copyField destination is specified in the qf parameter.
+    QueryResponse queryResponse =
+        cluster
+            .getSolrClient()
+            .query(
+                COLLECTION,
+                new SolrQuery("{!mlt qf=copyfield_dest mindf=0 
mintf=1}33").setShowDebugInfo(true));
+
+    NamedList<?> debugInfo = (NamedList<?>) 
queryResponse.getResponse().get("debug");
+    // Extract the parsed query string
+    String parsedQuery = (String) debugInfo.get("parsedquery_toString");
+    // Assert it matches the expected query string
+    assertEquals("+(copyfield_dest:rock copyfield_dest:hard) -id:33", 
parsedQuery);
+    // Assert it is not the incorrect fallback
+    assertNotEquals("+() -id:33", parsedQuery);
+  }
+
+  @Test
+  public void testCopyFieldSourceMissing_shouldReturnNoResults() throws 
Exception {
+    // Ensures that no results are returned when the copyField source field is 
missing in the source
+    // document.
+    QueryResponse queryResponse =
+        cluster
+            .getSolrClient()
+            .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 
mintf=1}30"));
+    SolrDocumentList solrDocuments = queryResponse.getResults();
+    assertEquals("Expected no results if source field is missing", 0, 
solrDocuments.size());
+  }
+
+  @Test
+  public void testCopyFieldDestinMultipleSources_shouldReturnExpectResults() 
throws Exception {
+    // Validates that when multiple source fields map to a single copyField 
destination, their
+    //  values are correctly combined and expected results are returned.
+    QueryResponse queryResponse =
+        cluster
+            .getSolrClient()
+            .query(
+                COLLECTION,
+                new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources 
mindf=0 mintf=1}33"));
+    SolrDocumentList solrDocuments = queryResponse.getResults();
+    int[] expectedIds = new int[] {34, 35};
+    int[] actualIds = new int[solrDocuments.size()];
+    int i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = 
Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+
+    Arrays.sort(actualIds);
+    Arrays.sort(expectedIds);
+    assertArrayEquals(expectedIds, actualIds);
+  }
+
+  @Test
+  public void
+      
testCopyFieldDestinationMultipleSources_shouldGenerateQueryUsingMultipleSourcesValues()
+          throws Exception {
+    // Validates that when multiple source fields map to a single copyField 
destination, their
+    // values are
+    // correctly combined and the resulting MLT query is properly constructed.
+    QueryResponse queryResponse =
+        cluster
+            .getSolrClient()
+            .query(
+                COLLECTION,
+                new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources 
mindf=0 mintf=1}33")
+                    .setShowDebugInfo(true));
+
+    NamedList<?> debugInfo = (NamedList<?>) 
queryResponse.getResponse().get("debug");
+    // Extract the parsed query string
+    String parsedQuery = (String) debugInfo.get("parsedquery_toString");
+    // Assert it matches the expected query string
+    assertEquals(
+        "+(copyfield_dest_multiple_sources:rock 
copyfield_dest_multiple_sources:version copyfield_dest_multiple_sources:hard 
copyfield_dest_multiple_sources:instrumental) -id:33",
+        parsedQuery);
+    // Assert it is not the incorrect fallback
+    assertNotEquals("+() -id:33", parsedQuery);
+  }
 }

Reply via email to