cpoerschke commented on code in PR #2382:
URL: https://github.com/apache/solr/pull/2382#discussion_r1840617435


##########
solr/modules/monitor/src/java/org/apache/solr/monitor/search/ReverseSearchComponent.java:
##########
@@ -0,0 +1,207 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one or more
+ *  * contributor license agreements.  See the NOTICE file distributed with
+ *  * this work for additional information regarding copyright ownership.
+ *  * The ASF licenses this file to You under the Apache License, Version 2.0
+ *  * (the "License"); you may not use this file except in compliance with
+ *  * the License.  You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.apache.solr.monitor.search;
+
+import static org.apache.solr.monitor.MonitorConstants.MONITOR_DOCUMENTS_KEY;
+import static 
org.apache.solr.monitor.search.PresearcherFactory.DEFAULT_ALIAS_PREFIX;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.BiPredicate;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.monitor.DocumentBatchVisitor;
+import org.apache.lucene.monitor.MonitorFields;
+import org.apache.lucene.monitor.Presearcher;
+import org.apache.lucene.monitor.QueryDecomposer;
+import org.apache.lucene.monitor.TermFilteredPresearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.loader.JsonLoader;
+import org.apache.solr.monitor.AliasingPresearcher;
+import org.apache.solr.monitor.SolrMonitorQueryDecoder;
+import org.apache.solr.monitor.cache.MonitorQueryCache;
+import org.apache.solr.monitor.cache.SharedMonitorCache;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.update.DocumentBuilder;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+public class ReverseSearchComponent extends QueryComponent implements 
SolrCoreAware {
+
+  public static final String COMPONENT_NAME = "reverseSearch";
+
+  private static final String SOLR_MONITOR_CACHE_NAME_KEY = 
"solrMonitorCacheName";
+  private static final String SOLR_MONITOR_CACHE_NAME_DEFAULT = 
"solrMonitorCache";
+  private String solrMonitorCacheName = SOLR_MONITOR_CACHE_NAME_DEFAULT;
+
+  private QueryDecomposer queryDecomposer;
+  private Presearcher presearcher;
+  private final SolrMatcherSinkFactory solrMatcherSinkFactory = new 
SolrMatcherSinkFactory();
+  private PresearcherFactory.PresearcherParameters presearcherParameters;
+
+  @Override
+  public void init(NamedList<?> args) {
+    super.init(args);
+    Object solrMonitorCacheName = args.remove(SOLR_MONITOR_CACHE_NAME_KEY);
+    if (solrMonitorCacheName != null) {
+      this.solrMonitorCacheName = (String) solrMonitorCacheName;
+    }
+    presearcherParameters = new PresearcherFactory.PresearcherParameters();
+    SolrPluginUtils.invokeSetters(presearcherParameters, args);
+  }
+
+  @Override
+  public void prepare(ResponseBuilder rb) {
+    var req = rb.req;
+    var documentBatch = documentBatch(req);
+    var matcherSink = solrMatcherSinkFactory.build(documentBatch, 
rb.req.getContext());
+    Query preFilterQuery = presearcher.buildQuery(documentBatch.get(), 
getTermAcceptor(rb.req));
+    List<Query> mutableFilters =
+        
Optional.ofNullable(rb.getFilters()).map(ArrayList::new).orElseGet(ArrayList::new);
+    rb.setQuery(new MatchAllDocsQuery());
+    mutableFilters.add(preFilterQuery);
+    var searcher = req.getSearcher();
+    MonitorQueryCache solrMonitorCache =
+        (SharedMonitorCache) searcher.getCache(this.solrMonitorCacheName);
+    SolrMonitorQueryDecoder queryDecoder = new 
SolrMonitorQueryDecoder(req.getCore());
+    mutableFilters.add(
+        new MonitorPostFilter(
+            new SolrMonitorQueryCollector.CollectorContext(
+                solrMonitorCache, queryDecoder, matcherSink)));
+    rb.setFilters(mutableFilters);
+  }
+
+  @SuppressWarnings({"unchecked"})
+  private DocumentBatchVisitor documentBatch(SolrQueryRequest req) {
+    Object jsonParams = req.getJSON().get("params");
+    if (!(jsonParams instanceof Map)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "need 
params");
+    }
+    var paramMap = (Map<?, ?>) jsonParams;
+    var documents = paramMap.get(MONITOR_DOCUMENTS_KEY);
+    if (!(documents instanceof List)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "need 
documents list");
+    }
+    List<Document> luceneDocs = new ArrayList<>();
+    for (var document : (List<?>) documents) {
+      if (!(document instanceof Map)
+          || !((Map<?, ?>) document).keySet().stream().allMatch(key -> key 
instanceof String)) {
+        throw new SolrException(
+            SolrException.ErrorCode.BAD_REQUEST, "document needs to be a 
string-keyed map");
+      }
+      var docAsMap = (Map<Object, Object>) document;
+      docAsMap.putIfAbsent(
+          req.getSchema().getUniqueKeyField().getName(), 
UUID.randomUUID().toString());

Review Comment:
   My "would be simpler" thoughts were something along these lines:
    * "the key field must always be supplied" is clear to the user (and for the 
code)
    * "the key field must never be supplied" is clear to the user (and for the 
code)
    * "if the key field is not (fully) supplied it will be auto-generated" 
raises a bunch of questions:
      * will the auto-generation influence the results, hopefully not the 
results directly but perhaps results ordering?
      * what if the auto-generation (for missing key fields) clashes with 
supplied key field values?
      * can the auto-generation support my particular type of key field type 
e.g. composite routing -- 
https://solr.apache.org/guide/solr/latest/deployment-guide/solrcloud-shards-indexing.html#document-routing
      * in a sharded setup, could different shards auto-generate the same key 
(or could a key auto-generated on one shard clash with a supplied key on 
another shard), unlikely in practice though?
   
   Perhaps a middle ground could be _"the key field must be supplied for all 
documents or for none of the documents, and if it's not supplied then ... will 
be used to auto-generate and that will only work for ... fields, if you use 
another key field type then you must supply the keys"_ or something like that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to