[solr] branch main updated: DocRouter: strengthen abstraction (#1215)

dsmiley Mon, 09 Jan 2023 14:38:41 -0800

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/main by this push:
     new d2c4788065b DocRouter: strengthen abstraction (#1215)
d2c4788065b is described below

commit d2c4788065bcdbcc923240b0e01747d1b8eb2ee1
Author: David Smiley <[email protected]>
AuthorDate: Mon Jan 9 17:38:22 2023 -0500

    DocRouter: strengthen abstraction (#1215)
    
    Background: DocRouter is an abstraction with 3 implementations; it isn't 
pluggable.  There are a number of spots, especially with splits, that were 
making assumptions of how CompositeIdRouter parsed IDs.
    
    Strengthen the separation of concerns so that the split code can do its 
job, delegating to the DocRouter (specifically a CompositeIdRouter) on how to 
parse doc IDs.  It also makes CompositeIdRouter more extendable, but don't add 
subclasses or plug-ability.
---
 .../solr/cloud/api/collections/MigrateCmd.java     |  5 +-
 .../org/apache/solr/handler/admin/SplitOp.java     | 63 ++++++++++----------
 .../org/apache/solr/update/SolrIndexSplitter.java  | 22 +++----
 .../processor/DistributedZkUpdateProcessor.java    |  3 +-
 .../solr/handler/admin/SplitHandlerTest.java       |  2 +-
 .../solr/common/cloud/CompositeIdRouter.java       | 68 ++++++++++++++++++++--
 .../org/apache/solr/common/cloud/DocRouter.java    |  8 +--
 7 files changed, 112 insertions(+), 59 deletions(-)

diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
index 70c45027ae4..4ee59d13ca9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MigrateCmd.java
@@ -53,7 +53,6 @@ import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.component.ShardHandler;
-import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.solr.util.TimeOut;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -253,7 +252,7 @@ public class MigrateCmd implements 
CollApiCmds.CollectionApiCommand {
             SHARD_ID_PROP,
             sourceSlice.getName(),
             "routeKey",
-            SolrIndexSplitter.getRouteKey(splitKey) + "!",
+            sourceRouter.getRouteKeyNoSuffix(splitKey) + "!",
             "range",
             splitRange.toString(),
             "targetCollection",
@@ -283,7 +282,7 @@ public class MigrateCmd implements 
CollApiCmds.CollectionApiCommand {
       sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
       Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
       if (rules != null) {
-        RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + 
"!");
+        RoutingRule rule = 
rules.get(sourceRouter.getRouteKeyNoSuffix(splitKey) + "!");
         if (rule != null && rule.getRouteRanges().contains(splitRange)) {
           added = true;
           break;
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java 
b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
index 646b19e2dc2..3b66794c5d5 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
@@ -263,8 +263,9 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
         DocCollection collection = clusterState.getCollection(collectionName);
         String sliceName = 
parentCore.getCoreDescriptor().getCloudDescriptor().getShardId();
         Slice slice = collection.getSlice(sliceName);
-        DocRouter router =
-            collection.getRouter() != null ? collection.getRouter() : 
DocRouter.DEFAULT;
+        CompositeIdRouter router =
+            (CompositeIdRouter)
+                (collection.getRouter() != null ? collection.getRouter() : 
DocRouter.DEFAULT);
         DocRouter.Range currentRange = slice.getRange();
 
         Object routerObj =
@@ -354,7 +355,10 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
    * Returns a list of range counts sorted by the range lower bound
    */
   static Collection<RangeCount> getHashHistogram(
-      SolrIndexSearcher searcher, String prefixField, DocRouter router, 
DocCollection collection)
+      SolrIndexSearcher searcher,
+      String prefixField,
+      CompositeIdRouter router,
+      DocCollection collection)
       throws IOException {
     RTimer timer = new RTimer();
     TreeMap<DocRouter.Range, RangeCount> counts = new TreeMap<>();
@@ -374,14 +378,18 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
     while ((term = termsEnum.next()) != null) {
       numPrefixes++;
 
-      String termStr = term.utf8ToString();
-      int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
       // truncate to first separator since we don't support multiple levels 
currently
       // NOTE: this does not currently work for tri-level composite ids since 
the number of bits
       // allocated to the first ID is 16 for a 2 part id and 8 for a 3 part id!
-      if (firstSep != termStr.length() - 1 && firstSep > 0) {
-        numTriLevel++;
-        termStr = termStr.substring(0, firstSep + 1);
+      String termStr;
+      int routeKeyLen = router.getRouteKeyWithSeparator(term.bytes, 
term.offset, term.length);
+      if (routeKeyLen == 0 || routeKeyLen == term.length) {
+        termStr = term.utf8ToString();
+      } else {
+        int prevLen = term.length;
+        term.length = routeKeyLen;
+        termStr = term.utf8ToString();
+        term.length = prevLen; // restore    (Question: must we do this?)
       }
 
       DocRouter.Range range = router.getSearchRangeSingle(termStr, null, 
collection);
@@ -418,7 +426,10 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
    * (i.e. the terms are full IDs, not just prefixes)
    */
   static Collection<RangeCount> getHashHistogramFromId(
-      SolrIndexSearcher searcher, String idField, DocRouter router, 
DocCollection collection)
+      SolrIndexSearcher searcher,
+      String idField,
+      CompositeIdRouter router,
+      DocCollection collection)
       throws IOException {
     RTimer timer = new RTimer();
 
@@ -433,9 +444,8 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
     int numCollisions = 0;
     long sumBuckets = 0;
 
-    byte sep = (byte) CompositeIdRouter.SEPARATOR.charAt(0);
     TermsEnum termsEnum = terms.iterator();
-    BytesRef currPrefix = new BytesRef(); // prefix of the previous "id" term
+    BytesRef currPrefix = new BytesRef(); // prefix of the previous "id" term 
WITH SEPARATOR
     int bucketCount = 0; // count of the number of docs in the current bucket
 
     // We're going to iterate over all terms, so do the minimum amount of work 
per term.
@@ -445,7 +455,9 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
       BytesRef term = termsEnum.next();
 
       // compare to current prefix bucket and see if this new term shares the 
same prefix
-      if (term != null && term.length >= currPrefix.length && 
currPrefix.length > 0) {
+      if (term != null && currPrefix.length > 0) {
+        // since currPrefix includes the trailing separator, we can assume 
startsWith is a
+        // sufficient test
         if (StringHelper.startsWith(term, currPrefix)) {
           bucketCount++; // use 1 since we are dealing with unique ids
           continue;
@@ -474,25 +486,16 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
       // if the current term is null, we ran out of values
       if (term == null) break;
 
-      // find the new prefix (if any)
-
-      // resize if needed
-      if (currPrefix.length < term.length) {
-        currPrefix.bytes = new byte[term.length + 10];
-      }
-
-      // Copy the bytes up to and including the separator, and set the length 
if the separator is
-      // found. If there was no separator, then length remains 0 and it's the 
indicator that we have
-      // no prefix bucket
-      currPrefix.length = 0;
-      for (int i = 0; i < term.length; i++) {
-        byte b = term.bytes[i + term.offset];
-        currPrefix.bytes[i] = b;
-        if (b == sep) {
-          currPrefix.length = i + 1;
-          bucketCount++;
-          break;
+      // find the new prefix (if any), with trailing separator
+      currPrefix.length = router.getRouteKeyWithSeparator(term.bytes, 
term.offset, term.length);
+      if (currPrefix.length > 0) {
+        // resize if needed
+        if (currPrefix.length > currPrefix.bytes.length) {
+          currPrefix.bytes = new byte[currPrefix.length + 10];
         }
+        System.arraycopy(term.bytes, term.offset, currPrefix.bytes, 0, 
currPrefix.length);
+
+        bucketCount++;
       }
     }
 
diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java 
b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
index a285f79e73f..61ba505ddd5 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
@@ -133,7 +133,8 @@ public class SolrIndexSplitter {
     if (cmd.splitKey == null) {
       splitKey = null;
     } else {
-      splitKey = getRouteKey(cmd.splitKey);
+      checkRouterSupportsSplitKey(hashRouter, cmd.splitKey);
+      splitKey = ((CompositeIdRouter) 
hashRouter).getRouteKeyNoSuffix(cmd.splitKey);
     }
     if (cmd.cores == null) {
       this.splitMethod = SplitMethod.REWRITE;
@@ -647,6 +648,7 @@ public class SolrIndexSplitter {
       AtomicInteger currentPartition,
       boolean delete)
       throws IOException {
+    checkRouterSupportsSplitKey(hashRouter, splitKey);
     LeafReader reader = readerContext.reader();
     FixedBitSet[] docSets = new FixedBitSet[numPieces];
     for (int i = 0; i < docSets.length; i++) {
@@ -689,8 +691,7 @@ public class SolrIndexSplitter {
       String idString = idRef.toString();
 
       if (splitKey != null) {
-        // todo have composite routers support these kind of things instead
-        String part1 = getRouteKey(idString);
+        String part1 = ((CompositeIdRouter) 
hashRouter).getRouteKeyNoSuffix(idString);
         if (part1 == null) continue;
         if (!splitKey.equals(part1)) {
           continue;
@@ -765,18 +766,11 @@ public class SolrIndexSplitter {
     return docSets;
   }
 
-  public static String getRouteKey(String idString) {
-    int idx = idString.indexOf(CompositeIdRouter.SEPARATOR);
-    if (idx <= 0) return null;
-    String part1 = idString.substring(0, idx);
-    int commaIdx = part1.indexOf(CompositeIdRouter.bitsSeparator);
-    if (commaIdx > 0 && commaIdx + 1 < part1.length()) {
-      char ch = part1.charAt(commaIdx + 1);
-      if (ch >= '0' && ch <= '9') {
-        part1 = part1.substring(0, commaIdx);
-      }
+  private static void checkRouterSupportsSplitKey(HashBasedRouter hashRouter, 
String splitKey) {
+    if (splitKey != null && !(hashRouter instanceof CompositeIdRouter)) {
+      throw new IllegalStateException(
+          "splitKey isn't supported for router " + hashRouter.getClass());
     }
-    return part1;
   }
 
   // change livedocs on the reader to delete those docs we don't want
diff --git 
a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
 
b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
index fbe89388192..c1eee8572c2 100644
--- 
a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
+++ 
b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
@@ -67,7 +67,6 @@ import org.apache.solr.update.DeleteUpdateCommand;
 import org.apache.solr.update.MergeIndexesCommand;
 import org.apache.solr.update.RollbackUpdateCommand;
 import org.apache.solr.update.SolrCmdDistributor;
-import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.solr.update.UpdateCommand;
 import org.apache.solr.util.TestInjection;
 import org.apache.zookeeper.KeeperException;
@@ -1067,7 +1066,7 @@ public class DistributedZkUpdateProcessor extends 
DistributedUpdateProcessor {
           return nodes;
         }
 
-        String routeKey = SolrIndexSplitter.getRouteKey(id);
+        String routeKey = compositeIdRouter.getRouteKeyNoSuffix(id);
         if (routeKey != null) {
           RoutingRule rule = routingRules.get(routeKey + "!");
           if (rule != null) {
diff --git 
a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java 
b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
index f3f1f69322f..011f5fe3885 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
@@ -239,7 +239,7 @@ public class SplitHandlerTest extends SolrTestCaseJ4 {
 
     String prefixField = "id_prefix_s";
     String idField = "id";
-    DocRouter router = new CompositeIdRouter();
+    CompositeIdRouter router = new CompositeIdRouter();
 
     for (int i = 0; i < 100; i++) {
       SolrQueryRequest req = req("myquery");
diff --git 
a/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java 
b/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
index dd0b6669195..9094ddd84e8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
@@ -81,12 +81,57 @@ import org.apache.solr.common.util.Hash;
 public class CompositeIdRouter extends HashBasedRouter {
   public static final String NAME = "compositeId";
 
-  public static final String SEPARATOR = "!";
+  // TODO standardize naming: routeKey (probably) or shardKey or key; pick one.
+
+  /**
+   * This character separates a composite ID into a leading route key and the 
rest.
+   *
+   * <p>Importantly, it's also used at the end of a provided route key 
parameter (which appears in
+   * many places) to designate a hash range which translates to a list of 
slices. If a route key
+   * does not end with this character, then semantically the key points to a 
single slice that holds
+   * a doc with that ID.
+   */
+  public static final char SEPARATOR = '!';
 
   // separator used to optionally specify number of bits to allocate toward 
first part.
   public static final int bitsSeparator = '/';
   private int bits = 16;
 
+  /**
+   * Parse out the route key from {@code id} up to and including the {@link 
#SEPARATOR}, returning
+   * it's length. If no route key is detected then 0 is returned.
+   */
+  public int getRouteKeyWithSeparator(byte[] id, int idOffset, int idLength) {
+    final byte SEPARATOR_BYTE = (byte) CompositeIdRouter.SEPARATOR;
+    for (int i = 0; i < idLength; i++) {
+      byte b = id[idOffset + i];
+      if (b == SEPARATOR_BYTE) {
+        return i + 1;
+      }
+    }
+    return 0;
+  }
+
+  /**
+   * Parse out the route key from {@code id}. It will not have the "bits" 
suffix or separator, if
+   * present. If there is no route key found then null is returned.
+   */
+  public String getRouteKeyNoSuffix(String id) {
+    int idx = id.indexOf(SEPARATOR);
+    if (idx <= 0) {
+      return null;
+    }
+    String part1 = id.substring(0, idx);
+    int commaIdx = part1.indexOf(bitsSeparator);
+    if (commaIdx > 0 && commaIdx + 1 < part1.length()) {
+      char ch = part1.charAt(commaIdx + 1);
+      if (ch >= '0' && ch <= '9') {
+        part1 = part1.substring(0, commaIdx);
+      }
+    }
+    return part1;
+  }
+
   @Override
   public int sliceHash(
       String id, SolrInputDocument doc, SolrParams params, DocCollection 
collection) {
@@ -132,6 +177,8 @@ public class CompositeIdRouter extends HashBasedRouter {
    * @return Range for given routeKey
    */
   public Range keyHashRange(String routeKey) {
+    routeKey = preprocessRouteKey(routeKey);
+
     if (routeKey.indexOf(SEPARATOR) < 0) {
       int hash = sliceHash(routeKey, null, null, null);
       return new Range(hash, hash);
@@ -147,6 +194,8 @@ public class CompositeIdRouter extends HashBasedRouter {
       return fullRange();
     }
 
+    shardKey = preprocessRouteKey(shardKey);
+
     if (shardKey.indexOf(SEPARATOR) < 0) {
       // shardKey is a simple id, so don't do a range
       int hash = Hash.murmurhash3_x86_32(shardKey, 0, shardKey.length(), 0);
@@ -164,7 +213,8 @@ public class CompositeIdRouter extends HashBasedRouter {
       // TODO: this may need modification in the future when shard splitting 
could cause an overlap
       return collection.getActiveSlices();
     }
-    String id = shardKey;
+
+    String id = preprocessRouteKey(shardKey);
 
     if (shardKey.indexOf(SEPARATOR) < 0) {
       // shardKey is a simple id, so don't do a range
@@ -185,6 +235,14 @@ public class CompositeIdRouter extends HashBasedRouter {
     return targetSlices;
   }
 
+  /**
+   * Methods accepting a route key (shard key) can have this input 
preprocessed by a subclass before
+   * further analysis.
+   */
+  protected String preprocessRouteKey(String shardKey) {
+    return shardKey;
+  }
+
   @Override
   public String getName() {
     return NAME;
@@ -266,7 +324,7 @@ public class CompositeIdRouter extends HashBasedRouter {
   }
 
   /** Helper class to calculate parts, masks etc for an id. */
-  static class KeyParser {
+  protected static class KeyParser {
     String key;
     int[] numBits;
     int[] hashes;
@@ -333,7 +391,7 @@ public class CompositeIdRouter extends HashBasedRouter {
       masks = getMasks();
     }
 
-    Range getRange() {
+    public Range getRange() {
       int lowerBound;
       int upperBound;
 
@@ -395,7 +453,7 @@ public class CompositeIdRouter extends HashBasedRouter {
       return masks;
     }
 
-    int getHash() {
+    public int getHash() {
       int result = hashes[0] & masks[0];
 
       for (int i = 1; i < pieces; i++) result = result | (hashes[i] & 
masks[i]);
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/DocRouter.java 
b/solr/solrj/src/java/org/apache/solr/common/cloud/DocRouter.java
index af0ebc66f12..490a0a21eda 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/DocRouter.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/DocRouter.java
@@ -39,7 +39,7 @@ import org.noggit.JSONWriter;
  */
 public abstract class DocRouter {
   public static final String DEFAULT_NAME = CompositeIdRouter.NAME;
-  public static final DocRouter DEFAULT = new CompositeIdRouter();
+  public static final DocRouter DEFAULT;
 
   public static DocRouter getDocRouter(String routerName) {
     DocRouter router = routerMap.get(routerName);
@@ -79,11 +79,11 @@ public abstract class DocRouter {
     // to "plain" if it doesn't have any properties.
     routerMap.put(null, plain); // back compat with 4.0
     routerMap.put(PlainIdRouter.NAME, plain);
-    routerMap.put(
-        CompositeIdRouter.NAME,
-        DEFAULT_NAME.equals(CompositeIdRouter.NAME) ? DEFAULT : new 
CompositeIdRouter());
+    routerMap.put(CompositeIdRouter.NAME, new CompositeIdRouter());
     routerMap.put(ImplicitDocRouter.NAME, new ImplicitDocRouter());
     // NOTE: careful that the map keys (the static .NAME members) are filled 
in by making them final
+
+    DEFAULT = routerMap.get(DEFAULT_NAME);
   }
 
   // Hash ranges can't currently "wrap" - i.e. max must be greater or equal to 
min.

[solr] branch main updated: DocRouter: strengthen abstraction (#1215)

Reply via email to