squallsama commented on code in PR #4402:
URL: https://github.com/apache/solr/pull/4402#discussion_r3267697310


##########
solr/core/src/java/org/apache/solr/handler/component/MatchedQueriesComponent.java:
##########
@@ -0,0 +1,175 @@
+package org.apache.solr.handler.component;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
+import org.apache.lucene.search.NamedMatches;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Weight;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+
+/**
+ * Search component that enriches the response with named-match information 
for each document in the
+ * top-N hits.
+ *
+ * <p>Activation: Add {@code matched_queries=true} (or {@code mq=true}) to the 
request.
+ *
+ * <p>Output: - per-doc: each hit gets a "matched_queries": ["name1","name2"] 
field - response
+ * section: "matched_queries_summary": { "name1": {"count": 5, "docIds": 
["id1","id2"]}, "name2":
+ * {"count": 2, "docIds": ["id3"]} }
+ *
+ * <p>Implementation: We use the {@link Weight#matches(LeafReaderContext, 
int)} API which performs a
+ * separate, post-search pass over each requested document. {@link 
NamedMatches} become identifiable
+ * through {@link NamedMatches#findNamedMatches(Matches)} on the returned 
Matches tree.
+ * ScoreMode.COMPLETE_NO_SCORES is used for the matches Weight because 
matching does not need
+ * scoring and this lets Lucene skip score computation entirely for this pass.
+ */
+public class MatchedQueriesComponent extends SearchComponent {
+
+  public static final String COMPONENT_NAME = "matched_queries";
+  public static final String PARAM_ENABLE = "matched_queries";
+  public static final String PARAM_ENABLE_SHORT = "mq";
+
+  @Override
+  public void prepare(ResponseBuilder rb) {
+    // nothing to prepare
+  }
+
+  @Override
+  public void process(ResponseBuilder rb) throws IOException {
+    if (!isEnabled(rb)) {
+      return;
+    }
+
+    DocList docList = rb.getResults() == null ? null : rb.getResults().docList;
+    if (docList == null || docList.size() == 0) {
+      return;
+    }
+
+    Query query = rb.getQuery();
+    if (query == null) {
+      return;
+    }
+
+    SolrIndexSearcher searcher = rb.req.getSearcher();
+    // schema's unique key field — used to populate docIds in the summary
+    String idField = 
rb.req.getCore().getLatestSchema().getUniqueKeyField().getName();
+
+    // Build a Weight for matching only (no scoring needed)
+    Query rewritten = searcher.rewrite(query);
+    Weight matchesWeight = searcher.createWeight(rewritten, 
ScoreMode.COMPLETE_NO_SCORES, 1.0f);
+
+    // Collect: per global doc id → ordered set of names
+    Map<Integer, Set<String>> perDocNames = new LinkedHashMap<>();
+    // Collect: per name → list of global doc ids (preserves document order)
+    Map<String, List<Integer>> perNameDocs = new LinkedHashMap<>();
+    // Cache unique-key values: each matching doc's stored id field is read 
exactly once here
+    // and reused in both output loops below, avoiding redundant stored-field 
access.
+    Map<Integer, String> idCache = new LinkedHashMap<>();

Review Comment:
   Thanks, you are right about not needing an order - migrated to 
IntObjectHashMap



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to