This is an automated email from the ASF dual-hosted git repository. abenedetti pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
commit c790b54104ab38fb5b297bc43ffcd223e7eaf5f9 Author: Ilaria Petreti <[email protected]> AuthorDate: Mon Jun 30 10:30:49 2025 +0200 [SOLR-17726] Fix CloudMLTQParser to support copyField in qf (#3328) using copy field source for more like this + tests --------- Co-authored-by: Alessandro Benedetti <[email protected]> (cherry picked from commit d249593e5affaa9795bc7c9c6e2218e31203eee4) --- solr/CHANGES.txt | 2 + .../apache/solr/search/mlt/CloudMLTQParser.java | 20 +++- .../solr/configsets/cloud-dynamic/conf/schema.xml | 11 ++- .../solr/search/mlt/CloudMLTQParserTest.java | 108 +++++++++++++++++++++ 4 files changed, 138 insertions(+), 3 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4cba2dc4920..f8364b40eac 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -130,6 +130,8 @@ Bug Fixes * SOLR-17790: Allow the -j or --jettyconfig option to start with a dash (-). (Houston Putman) +* SOLR-17726: MoreLikeThis to support copy-fields (Ilaria Petreti via Alessandro Benedetti) + Dependency Upgrades --------------------- * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke) diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java index 90ff5949138..6cdd3f9b9a2 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java @@ -67,7 +67,8 @@ public class CloudMLTQParser extends SimpleMLTQParser { Map<String, Collection<Object>> filteredDocument = new HashMap<>(); for (String field : moreLikeThis.getFieldNames()) { - Collection<Object> fieldValues = doc.getFieldValues(field); + Collection<Object> fieldValues = getFieldValuesIncludingCopyField(doc, field); + if (fieldValues != null) { Collection<Object> values = new ArrayList<>(); for (Object val : fieldValues) { @@ -110,4 +111,21 @@ public class CloudMLTQParser extends SimpleMLTQParser { return (SolrDocument) response.get("doc"); } + + private Collection<Object> getFieldValuesIncludingCopyField(SolrDocument doc, String field) { + Collection<Object> fieldValues = doc.getFieldValues(field); + if (fieldValues != null) return fieldValues; + // Fields created using copyField are not included in documents returned by RealTime Get. + // So if a copyField destination is used in the MLT query (qf), we need to get the values + // from its source field instead. If there are multiple source fields, their values must be + // combined. + Collection<Object> combinedValues = new ArrayList<>(); + for (String sourceField : req.getSchema().getCopySources(field)) { + Collection<Object> sourceValues = doc.getFieldValues(sourceField); + if (sourceValues != null) { + combinedValues.addAll(sourceValues); + } + } + return combinedValues.isEmpty() ? null : combinedValues; + } } diff --git a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml index 58e6405959a..5c26be38b8a 100644 --- a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml +++ b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml @@ -224,11 +224,18 @@ <field name="uniq3" type="string" indexed="true" stored="true"/> <field name="nouniq" type="string" indexed="true" stored="true" multiValued="true"/> + <field name="payload" type="sortable_binary" indexed="false" + stored="true" multiValued="false"/> + <!-- to test copyField in MLT qf --> <field name="copyfield_source" type="string" indexed="true" stored="true" multiValued="true"/> + <field name="copyfield_dest" type="nametext" indexed="true" stored="true" multiValued="true"/> + <copyField source="copyfield_source" dest="copyfield_dest" /> - <field name="payload" type="sortable_binary" indexed="false" - stored="true" multiValued="false"/> + <field name="copyfield_source_2" type="string" indexed="true" stored="true" multiValued="true"/> + <field name="copyfield_dest_multiple_sources" type="nametext" indexed="true" stored="true" multiValued="true"/> + <copyField source="copyfield_source" dest="copyfield_dest_multiple_sources" /> + <copyField source="copyfield_source_2" dest="copyfield_dest_multiple_sources" /> <!-- for versioning --> <field name="_version_" type="long" indexed="true" stored="true"/> diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java index 5912465f883..da87e218dbd 100644 --- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java @@ -27,6 +27,7 @@ import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -50,6 +51,8 @@ public class CloudMLTQParserTest extends SolrCloudTestCase { String id = "id"; String FIELD1 = "lowerfilt_u"; String FIELD2 = "lowerfilt1_u"; + String FIELD3 = "copyfield_source"; + String FIELD4 = "copyfield_source_2"; new UpdateRequest() .add(sdoc(id, "1", FIELD1, "toyota")) @@ -119,6 +122,9 @@ public class CloudMLTQParserTest extends SolrCloudTestCase { "The slim red fox jumped over the lazy brown dogs.", FIELD2, "yellow white black")) + .add(sdoc(id, "33", FIELD3, "hard rock", FIELD4, "instrumental version")) + .add(sdoc(id, "34", FIELD3, "hard rock", FIELD4, "instrumental version")) + .add(sdoc(id, "35", FIELD3, "pop rock")) .commit(client, COLLECTION); } @@ -340,4 +346,106 @@ public class CloudMLTQParserTest extends SolrCloudTestCase { .getSolrClient() .query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999"))); } + + @Test + public void testUsesACopyFieldInQf_shouldReturnExpectResults() throws Exception { + // Verifies that when a copyField destination is used in the qf parameter, the field values are + // correctly retrieved from the source field(s) and the MLT query returns the expected results. + QueryResponse queryResponse = + cluster + .getSolrClient() + .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}33")); + SolrDocumentList solrDocuments = queryResponse.getResults(); + int[] expectedIds = new int[] {34, 35}; + int[] actualIds = new int[solrDocuments.size()]; + int i = 0; + for (SolrDocument solrDocument : solrDocuments) { + actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id"))); + } + + Arrays.sort(actualIds); + Arrays.sort(expectedIds); + assertArrayEquals(expectedIds, actualIds); + } + + @Test + public void testUsesACopyFieldInQf_shouldGenerateNonEmptyQuery() throws Exception { + // Verifies that the MLT query correctly uses the content of the source field(s) when a + // copyField destination is specified in the qf parameter. + QueryResponse queryResponse = + cluster + .getSolrClient() + .query( + COLLECTION, + new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}33").setShowDebugInfo(true)); + + NamedList<?> debugInfo = (NamedList<?>) queryResponse.getResponse().get("debug"); + // Extract the parsed query string + String parsedQuery = (String) debugInfo.get("parsedquery_toString"); + // Assert it matches the expected query string + assertEquals("+(copyfield_dest:rock copyfield_dest:hard) -id:33", parsedQuery); + // Assert it is not the incorrect fallback + assertNotEquals("+() -id:33", parsedQuery); + } + + @Test + public void testCopyFieldSourceMissing_shouldReturnNoResults() throws Exception { + // Ensures that no results are returned when the copyField source field is missing in the source + // document. + QueryResponse queryResponse = + cluster + .getSolrClient() + .query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}30")); + SolrDocumentList solrDocuments = queryResponse.getResults(); + assertEquals("Expected no results if source field is missing", 0, solrDocuments.size()); + } + + @Test + public void testCopyFieldDestinMultipleSources_shouldReturnExpectResults() throws Exception { + // Validates that when multiple source fields map to a single copyField destination, their + // values are correctly combined and expected results are returned. + QueryResponse queryResponse = + cluster + .getSolrClient() + .query( + COLLECTION, + new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources mindf=0 mintf=1}33")); + SolrDocumentList solrDocuments = queryResponse.getResults(); + int[] expectedIds = new int[] {34, 35}; + int[] actualIds = new int[solrDocuments.size()]; + int i = 0; + for (SolrDocument solrDocument : solrDocuments) { + actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id"))); + } + + Arrays.sort(actualIds); + Arrays.sort(expectedIds); + assertArrayEquals(expectedIds, actualIds); + } + + @Test + public void + testCopyFieldDestinationMultipleSources_shouldGenerateQueryUsingMultipleSourcesValues() + throws Exception { + // Validates that when multiple source fields map to a single copyField destination, their + // values are + // correctly combined and the resulting MLT query is properly constructed. + QueryResponse queryResponse = + cluster + .getSolrClient() + .query( + COLLECTION, + new SolrQuery("{!mlt qf=copyfield_dest_multiple_sources mindf=0 mintf=1}33") + .setShowDebugInfo(true)); + + NamedList<?> debugInfo = (NamedList<?>) queryResponse.getResponse().get("debug"); + // Extract the parsed query string + String parsedQuery = (String) debugInfo.get("parsedquery_toString"); + // Assert it matches the expected query string + assertEquals( + "+(copyfield_dest_multiple_sources:rock copyfield_dest_multiple_sources:version copyfield_dest_multiple_sources:hard copyfield_dest_multiple_sources:instrumental) -id:33", + parsedQuery); + // Assert it is not the incorrect fallback + assertNotEquals("+() -id:33", parsedQuery); + } }
