[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r324030543
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/client/LocalStorageClient.java
 ##
 @@ -0,0 +1,259 @@
+package org.apache.solr.store.blob.client;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Class that handles reads and writes of solr blob files to the local file 
system.
+ */
+public class LocalStorageClient implements CoreStorageClient {
+  
+  /** The directory on the local file system where blobs will be stored. */
+  public static final String BLOB_STORE_LOCAL_FS_ROOT_DIR_PROPERTY = 
"blob.local.dir";
+  
+  private final String blobStoreRootDir = 
System.getProperty(BLOB_STORE_LOCAL_FS_ROOT_DIR_PROPERTY, 
"/tmp/BlobStoreLocal/");
+
+  public LocalStorageClient() throws IOException {
+File rootDir = new File(blobStoreRootDir);
+rootDir.mkdirs(); // Might create the directory... or not
+if (!rootDir.isDirectory()) {
+  throw new IOException("Can't create local Blob root directory " + 
rootDir.getAbsolutePath());
+}
+  }
+
+  private File getCoreRootDir(String blobName) {
+return new File(BlobClientUtils.concatenatePaths(blobStoreRootDir, 
blobName));
+  }
+
+  @Override
+  public String pushStream(String blobName, InputStream is, long 
contentLength, String fileNamePrefix) throws BlobException {
+try {
+  createCoreStorage(blobName);
+  String blobPath = createNewNonExistingBlob(blobName, fileNamePrefix);
+
+  Files.copy(is, Paths.get(getBlobAbsolutePath(blobPath)), 
StandardCopyOption.REPLACE_EXISTING);
+
+  assert new File(getBlobAbsolutePath(blobPath)).length() == contentLength;
+
+  return blobPath;
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  /**
+   * Picks a unique name for a new blob for the given core.
+   * The current implementation creates a file, but eventually we just pick up 
a random blob name then delegate to S3...
+   * @return the blob file name, including the "path" part of the name
+   */
+  private String createNewNonExistingBlob(String blobName, String 
fileNamePrefix) throws BlobException {
+try {
+  String blobPath = BlobClientUtils.generateNewBlobCorePath(blobName, 
fileNamePrefix);
+  final File blobFile = new File(getBlobAbsolutePath(blobPath));
+  if (blobFile.exists()) {
+// Not expecting this ever to happen. In theory we could just do 
"continue" here to try a new
+// name. For now throwing an exception to make sure we don't run into 
this...
+// continue;
+throw new IllegalStateException("The random file name chosen using 
UUID already exists. Very worrying! " + blobFile.getAbsolutePath());
+  }
+
+  return blobPath;
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public InputStream pullStream(String blobPath) throws BlobException {
+try {
+  File blobFile = new File(getBlobAbsolutePath(blobPath));
+  return new FileInputStream(blobFile);
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public void pushCoreMetadata(String sharedStoreName, String 
blobCoreMetadataName, BlobCoreMetadata bcm) throws BlobException {
+try {
+  createCoreStorage(sharedStoreName);
+  ToFromJson converter = new ToFromJson<>();
+  String json = converter.toJson(bcm);
+
+  // Constant path under which the core metadata is stored in the Blob 
store (the only blob stored under a constant path!)
+  String blobMetadataPath = 
getBlobAbsolutePath(getBlobMetadataName(sharedStoreName, blobCoreMetadataName));
+  final File blobMetadataFile = new File(blobMetadataPath); 
+
+  // Writing to the file assumed atomic, the file cannot be observed 
midway. Might not hold here but should be the case
+  // with a real S3 implementation.
+  try (PrintWriter out = new PrintWriter(blobMetadataFile)){
+out.println(json);
+  }  
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public BlobCoreMetadata pullCoreMetadata(String sharedStoreName, String 
blobCoreMetadataName) throws BlobException {
+try {
+  if (!coreMetadataExists(sharedStoreName, blobCoreMetadataName)) {
+return null;
+  }
+  
+  String blobMetadataPath = 
getBlobAbsolutePath(getBlobMetadataName(sharedStoreName, blobCoreMetadataName));
+  File blobMetadataFile = new File(blobMetadataPath); 
+  
+  String json = 

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r324027378
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/client/BlobCoreMetadataBuilder.java
 ##
 @@ -0,0 +1,95 @@
+package org.apache.solr.store.blob.client;
+
+import java.util.*;
 
 Review comment:
   We usually avoid wildcard imports in Apache projects, iirc. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323982385
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/process/CorePullTask.java
 ##
 @@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.blob.process;
+
+import java.io.File;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.store.blob.client.BlobCoreMetadata;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+import org.apache.solr.store.blob.metadata.CorePushPull;
+import org.apache.solr.store.blob.metadata.ServerSideMetadata;
+import org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil;
+import 
org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil.SharedMetadataResolutionResult;
+import org.apache.solr.store.blob.process.CorePullerFeeder.PullCoreInfo;
+import org.apache.solr.store.blob.provider.BlobStorageProvider;
+import org.apache.solr.store.blob.util.BlobStoreUtils;
+import org.apache.solr.store.blob.util.DeduplicatingList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Code for pulling updates on a specific core to the Blob store. see 
{@CorePushTask} for the push version of this.
+ */
+public class CorePullTask implements DeduplicatingList.Deduplicatable {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * Minimum delay between to pull retries for a given core. Setting this 
higher than the push retry to reduce noise
+   * we get from a flood of queries for a stale core
+   * 
+   * TODO: make configurable
+   */
+  private static final long MIN_RETRY_DELAY_MS = 2;
+
+  /** Cores currently being pulled and timestamp of pull start (to identify 
stuck ones in logs) */
+  private static final HashMap pullsInFlight = Maps.newHashMap();
+
+  /** Cores unknown locally that got created as part of the pull process but 
for which no data has been pulled yet
+   * from Blob store. If we ignore this transitory state, these cores can be 
accessed locally and simply look empty.
+   * We'd rather treat threads attempting to access such cores like threads 
attempting to access an unknown core and
+   * do a pull (or more likely wait for an ongoing pull to finish).
+   *
+   * When this lock has to be taken as well as {@link #pullsInFlight}, then 
{@link #pullsInFlight} has to be taken first.
+   * Reading this set implies acquiring the monitor of the set (as if 
@GuardedBy("itself")), but writing to the set
+   * additionally implies holding the {@link #pullsInFlight}. This guarantees 
that while {@link #pullsInFlight}
+   * is held, no element in the set is changing.
+   */
+  private static final Set coresCreatedNotPulledYet = 
Sets.newHashSet();
+
+  private final CoreContainer coreContainer;
+  private final PullCoreInfo pullCoreInfo;
+  private final long queuedTimeMs;
+  private int attempts;
+  private long lastAttemptTimestamp;
+  private final PullCoreCallback callback;
+
+  CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
PullCoreCallback callback) {
+this(coreContainer, pullCoreInfo, System.currentTimeMillis(), 0, 0L, 
callback);
+  }
+
+  private CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
long queuedTimeMs, int attempts,
+  long lastAttemptTimestamp, PullCoreCallback callback) {
+this.coreContainer = coreContainer;
+this.pullCoreInfo = pullCoreInfo;
+this.queuedTimeMs = queuedTimeMs;
+this.attempts = attempts;
+this.lastAttemptTimestamp = lastAttemptTimestamp;
+this.callback = callback;
+  }
+
+  

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323982385
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/process/CorePullTask.java
 ##
 @@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.blob.process;
+
+import java.io.File;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.store.blob.client.BlobCoreMetadata;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+import org.apache.solr.store.blob.metadata.CorePushPull;
+import org.apache.solr.store.blob.metadata.ServerSideMetadata;
+import org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil;
+import 
org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil.SharedMetadataResolutionResult;
+import org.apache.solr.store.blob.process.CorePullerFeeder.PullCoreInfo;
+import org.apache.solr.store.blob.provider.BlobStorageProvider;
+import org.apache.solr.store.blob.util.BlobStoreUtils;
+import org.apache.solr.store.blob.util.DeduplicatingList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Code for pulling updates on a specific core to the Blob store. see 
{@CorePushTask} for the push version of this.
+ */
+public class CorePullTask implements DeduplicatingList.Deduplicatable {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * Minimum delay between to pull retries for a given core. Setting this 
higher than the push retry to reduce noise
+   * we get from a flood of queries for a stale core
+   * 
+   * TODO: make configurable
+   */
+  private static final long MIN_RETRY_DELAY_MS = 2;
+
+  /** Cores currently being pulled and timestamp of pull start (to identify 
stuck ones in logs) */
+  private static final HashMap pullsInFlight = Maps.newHashMap();
+
+  /** Cores unknown locally that got created as part of the pull process but 
for which no data has been pulled yet
+   * from Blob store. If we ignore this transitory state, these cores can be 
accessed locally and simply look empty.
+   * We'd rather treat threads attempting to access such cores like threads 
attempting to access an unknown core and
+   * do a pull (or more likely wait for an ongoing pull to finish).
+   *
+   * When this lock has to be taken as well as {@link #pullsInFlight}, then 
{@link #pullsInFlight} has to be taken first.
+   * Reading this set implies acquiring the monitor of the set (as if 
@GuardedBy("itself")), but writing to the set
+   * additionally implies holding the {@link #pullsInFlight}. This guarantees 
that while {@link #pullsInFlight}
+   * is held, no element in the set is changing.
+   */
+  private static final Set coresCreatedNotPulledYet = 
Sets.newHashSet();
+
+  private final CoreContainer coreContainer;
+  private final PullCoreInfo pullCoreInfo;
+  private final long queuedTimeMs;
+  private int attempts;
+  private long lastAttemptTimestamp;
+  private final PullCoreCallback callback;
+
+  CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
PullCoreCallback callback) {
+this(coreContainer, pullCoreInfo, System.currentTimeMillis(), 0, 0L, 
callback);
+  }
+
+  private CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
long queuedTimeMs, int attempts,
+  long lastAttemptTimestamp, PullCoreCallback callback) {
+this.coreContainer = coreContainer;
+this.pullCoreInfo = pullCoreInfo;
+this.queuedTimeMs = queuedTimeMs;
+this.attempts = attempts;
+this.lastAttemptTimestamp = lastAttemptTimestamp;
+this.callback = callback;
+  }
+
+  

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r324026158
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/client/BlobCoreMetadata.java
 ##
 @@ -0,0 +1,284 @@
+package org.apache.solr.store.blob.client;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+/**
+ * Object defining metadata stored in blob store for a Shared Collection shard 
and its builders.  
+ * This metadata includes all actual segment files as well as the segments_N 
file of the commit point.
+ * 
+ * This object is serialized to/from Json and stored in the blob store as a 
blob.
+ */
+public class BlobCoreMetadata {
+
+/**
+ * Name of the shard index data that is shared by all replicas belonging 
to that shard. This 
+ * name is to decouple the core name that Solr manages from the name of 
the core on blob store. 
+ */
+private final String sharedBlobName;
+
+/**
+ * Unique identifier of this metadata, that changes on every update to the 
metadata (except generating a new corrupt metadata
+ * through {@link #getCorruptOf}).
+ */
+private final String uniqueIdentifier;
+
+/**
+ * Indicates that a Solr (search) server pulled this core and was then 
unable to open or use it. This flag is used as
+ * an indication to servers pushing blobs for that core into Blob Store to 
push a complete set of files if they have
+ * a locally working copy rather than just diffs (files missing on Blob 
Store).
+ */
+private final boolean isCorrupt;
+
+/**
+ * Indicates that this core has been deleted by the client. This flag is 
used as a marker to prevent other servers
+ * from pushing their version of this core to blob and to allow local copy 
cleanup.
+ */
+private final boolean isDeleted;
+
+/**
+ * The array of files that constitute the current commit point of the core 
(as known by the Blob store).
+ * This array is not ordered! There are no duplicate entries in it either 
(see how it's built in {@link BlobCoreMetadataBuilder}).
+ */
+private final BlobFile[] blobFiles;
+
+/**
+ * Files marked for delete but not yet removed from the Blob store. Each 
such file contains information indicating when
+ * it was marked for delete so we can actually remove the corresponding 
blob (and the entry from this array in the metadata)
+ * when it's safe to do so even if there are (unexpected) conflicting 
updates to the blob store by multiple solr servers...
+ * TODO: we might want to separate the metadata blob with the deletes as 
it's not required to always fetch the delete list when checking freshness of 
local core...
+ */
+private final BlobFileToDelete[] blobFilesToDelete;
+
+/**
+ * This is the constructor called by {@link BlobCoreMetadataBuilder}.
+ * It always builds non "isCorrupt" and non "isDeleted" metadata. 
+ * The only way to build an instance of "isCorrupt" metadata is to use 
{@link #getCorruptOf} and for "isDeleted" use {@link #getDeletedOf()}
+ */
+BlobCoreMetadata(String sharedBlobName, BlobFile[] blobFiles, 
BlobFileToDelete[] blobFilesToDelete) {
+this(sharedBlobName, blobFiles, blobFilesToDelete, 
UUID.randomUUID().toString(), false,
+false);
+}
+
+private BlobCoreMetadata(String sharedBlobName, BlobFile[] blobFiles, 
BlobFileToDelete[] blobFilesToDelete, 
+String uniqueIdentifier, boolean isCorrupt, boolean isDeleted) {
+this.sharedBlobName = sharedBlobName;
+this.blobFiles = blobFiles;
+this.blobFilesToDelete = blobFilesToDelete;
+this.uniqueIdentifier = uniqueIdentifier;
+this.isCorrupt = isCorrupt;
+this.isDeleted = isDeleted;
+}
+
+/**
+ * Given a non corrupt {@link BlobCoreMetadata} instance, creates an 
equivalent one based on it but marked as corrupt.
+ * The new instance keeps all the rest of the metadata unchanged, 
including the {@link #uniqueIdentifier}.
+ */
+public BlobCoreMetadata getCorruptOf() {
+assert !isCorrupt;
+return new BlobCoreMetadata(sharedBlobName, blobFiles, 
blobFilesToDelete, uniqueIdentifier, true, isDeleted);
+}
+
+/**
+ * Given a {@link BlobCoreMetadata} instance, creates an equivalent one 
based on it but marked as deleted.
+ * 
+ * The new instance keeps all the rest of the metadata unchanged, 
including the {@link #uniqueIdentifier}.
+ */
+public BlobCoreMetadata getDeletedOf() {
+assert !isDeleted;
+return new BlobCoreMetadata(sharedBlobName, blobFiles, 
blobFilesToDelete, uniqueIdentifier, isCorrupt, true);
+}
+
+/**
+ * Returns true if the Blob metadata was marked as deleted
+ */
+public boolean getIsDeleted() {
+return isDeleted;
+}
+
+/**
+ 

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323967328
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/handler/admin/RequestApplyUpdatesOp.java
 ##
 @@ -68,4 +74,20 @@ public void execute(CoreAdminHandler.CallInfo it) throws 
Exception {
   if (it.req != null) it.req.close();
 }
   }
+
+
+  private void pushToSharedStore(SolrCore core) {
+// Push the index to blob storage before we set our state to ACTIVE
+CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
+if (cloudDesc.getReplicaType().equals(Replica.Type.SHARED)) {
 
 Review comment:
   `Replica.Type.SHARED` is a enum so this line could be as below, right?
   
   ```suggestion
   if (cloudDesc.getReplicaType() == Replica.Type.SHARED) {
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323982385
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/process/CorePullTask.java
 ##
 @@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.blob.process;
+
+import java.io.File;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.store.blob.client.BlobCoreMetadata;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+import org.apache.solr.store.blob.metadata.CorePushPull;
+import org.apache.solr.store.blob.metadata.ServerSideMetadata;
+import org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil;
+import 
org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil.SharedMetadataResolutionResult;
+import org.apache.solr.store.blob.process.CorePullerFeeder.PullCoreInfo;
+import org.apache.solr.store.blob.provider.BlobStorageProvider;
+import org.apache.solr.store.blob.util.BlobStoreUtils;
+import org.apache.solr.store.blob.util.DeduplicatingList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Code for pulling updates on a specific core to the Blob store. see 
{@CorePushTask} for the push version of this.
+ */
+public class CorePullTask implements DeduplicatingList.Deduplicatable {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * Minimum delay between to pull retries for a given core. Setting this 
higher than the push retry to reduce noise
+   * we get from a flood of queries for a stale core
+   * 
+   * TODO: make configurable
+   */
+  private static final long MIN_RETRY_DELAY_MS = 2;
+
+  /** Cores currently being pulled and timestamp of pull start (to identify 
stuck ones in logs) */
+  private static final HashMap pullsInFlight = Maps.newHashMap();
+
+  /** Cores unknown locally that got created as part of the pull process but 
for which no data has been pulled yet
+   * from Blob store. If we ignore this transitory state, these cores can be 
accessed locally and simply look empty.
+   * We'd rather treat threads attempting to access such cores like threads 
attempting to access an unknown core and
+   * do a pull (or more likely wait for an ongoing pull to finish).
+   *
+   * When this lock has to be taken as well as {@link #pullsInFlight}, then 
{@link #pullsInFlight} has to be taken first.
+   * Reading this set implies acquiring the monitor of the set (as if 
@GuardedBy("itself")), but writing to the set
+   * additionally implies holding the {@link #pullsInFlight}. This guarantees 
that while {@link #pullsInFlight}
+   * is held, no element in the set is changing.
+   */
+  private static final Set coresCreatedNotPulledYet = 
Sets.newHashSet();
+
+  private final CoreContainer coreContainer;
+  private final PullCoreInfo pullCoreInfo;
+  private final long queuedTimeMs;
+  private int attempts;
+  private long lastAttemptTimestamp;
+  private final PullCoreCallback callback;
+
+  CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
PullCoreCallback callback) {
+this(coreContainer, pullCoreInfo, System.currentTimeMillis(), 0, 0L, 
callback);
+  }
+
+  private CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
long queuedTimeMs, int attempts,
+  long lastAttemptTimestamp, PullCoreCallback callback) {
+this.coreContainer = coreContainer;
+this.pullCoreInfo = pullCoreInfo;
+this.queuedTimeMs = queuedTimeMs;
+this.attempts = attempts;
+this.lastAttemptTimestamp = lastAttemptTimestamp;
+this.callback = callback;
+  }
+
+  

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r324023752
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/shared/SharedStoreManager.java
 ##
 @@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.shared;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.store.blob.process.BlobDeleteManager;
+import org.apache.solr.store.blob.process.BlobProcessUtil;
+import org.apache.solr.store.blob.process.CorePullTracker;
+import org.apache.solr.store.blob.provider.BlobStorageProvider;
+import org.apache.solr.store.shared.metadata.SharedShardMetadataController;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Provides access to Shared Store processes. Note that this class is meant to 
be 
+ * more generic in the future and provide a cleaner API but for now we'll 
expose
+ * the underlying implementations
+ */
+public class SharedStoreManager {
+  
+  private ZkController zkController;
+  private SharedShardMetadataController sharedShardMetadataController;
+  private BlobStorageProvider blobStorageProvider;
+  private BlobDeleteManager blobDeleteManager;
+  private BlobProcessUtil blobProcessUtil;
+  private CorePullTracker corePullTracker;
+  
+  public SharedStoreManager(ZkController controller) {
+zkController = controller;
+// initialize BlobProcessUtil with the SharedStoreManager for background 
processes to be ready
+blobProcessUtil = new BlobProcessUtil(zkController.getCoreContainer());
+  }
+  
+  @VisibleForTesting
+  public void initBlobStorageProvider(BlobStorageProvider blobStorageProvider) 
{
+this.blobStorageProvider = blobStorageProvider;
+  }
+  
+  /*
+   * Initiates a SharedShardMetadataController if it doesn't exist and returns 
one 
+   */
+  public SharedShardMetadataController getSharedShardMetadataController() {
+if (sharedShardMetadataController != null) {
 
 Review comment:
   If this method (and the ones below) can be called by multiple threads then 
it can possibly hit a situation where two or more threads arrive at this line 
at the same time and `sharedShardMetadataController` is null and will create 
one or more objects. Does it make sense? Would it be the case of synchronizing 
the method?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323984413
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
 ##
 @@ -31,6 +31,7 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.Locale;
 
 Review comment:
   is this being used?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323981156
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/process/CorePullTask.java
 ##
 @@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.blob.process;
+
+import java.io.File;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.store.blob.client.BlobCoreMetadata;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+import org.apache.solr.store.blob.metadata.CorePushPull;
+import org.apache.solr.store.blob.metadata.ServerSideMetadata;
+import org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil;
+import 
org.apache.solr.store.blob.metadata.SharedStoreResolutionUtil.SharedMetadataResolutionResult;
+import org.apache.solr.store.blob.process.CorePullerFeeder.PullCoreInfo;
+import org.apache.solr.store.blob.provider.BlobStorageProvider;
+import org.apache.solr.store.blob.util.BlobStoreUtils;
+import org.apache.solr.store.blob.util.DeduplicatingList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Code for pulling updates on a specific core to the Blob store. see 
{@CorePushTask} for the push version of this.
+ */
+public class CorePullTask implements DeduplicatingList.Deduplicatable {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * Minimum delay between to pull retries for a given core. Setting this 
higher than the push retry to reduce noise
+   * we get from a flood of queries for a stale core
+   * 
+   * TODO: make configurable
+   */
+  private static final long MIN_RETRY_DELAY_MS = 2;
+
+  /** Cores currently being pulled and timestamp of pull start (to identify 
stuck ones in logs) */
+  private static final HashMap pullsInFlight = Maps.newHashMap();
+
+  /** Cores unknown locally that got created as part of the pull process but 
for which no data has been pulled yet
+   * from Blob store. If we ignore this transitory state, these cores can be 
accessed locally and simply look empty.
+   * We'd rather treat threads attempting to access such cores like threads 
attempting to access an unknown core and
+   * do a pull (or more likely wait for an ongoing pull to finish).
+   *
+   * When this lock has to be taken as well as {@link #pullsInFlight}, then 
{@link #pullsInFlight} has to be taken first.
+   * Reading this set implies acquiring the monitor of the set (as if 
@GuardedBy("itself")), but writing to the set
+   * additionally implies holding the {@link #pullsInFlight}. This guarantees 
that while {@link #pullsInFlight}
+   * is held, no element in the set is changing.
+   */
+  private static final Set coresCreatedNotPulledYet = 
Sets.newHashSet();
+
+  private final CoreContainer coreContainer;
+  private final PullCoreInfo pullCoreInfo;
+  private final long queuedTimeMs;
+  private int attempts;
+  private long lastAttemptTimestamp;
+  private final PullCoreCallback callback;
+
+  CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
PullCoreCallback callback) {
+this(coreContainer, pullCoreInfo, System.currentTimeMillis(), 0, 0L, 
callback);
+  }
+
+  private CorePullTask(CoreContainer coreContainer, PullCoreInfo pullCoreInfo, 
long queuedTimeMs, int attempts,
+  long lastAttemptTimestamp, PullCoreCallback callback) {
+this.coreContainer = coreContainer;
+this.pullCoreInfo = pullCoreInfo;
+this.queuedTimeMs = queuedTimeMs;
+this.attempts = attempts;
+this.lastAttemptTimestamp = lastAttemptTimestamp;
+this.callback = callback;
+  }
+
+  

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323974231
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/provider/BlobStorageProvider.java
 ##
 @@ -0,0 +1,62 @@
+package org.apache.solr.store.blob.provider;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.store.blob.client.BlobException;
+import org.apache.solr.store.blob.client.BlobStorageClientBuilder;
+import org.apache.solr.store.blob.client.BlobstoreProviderType;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.amazonaws.SdkClientException;
+
+/**
+ * Class that provides access to the shared storage client (blob client) and
+ * handles initiation of such client. This class serves as the provider for all
+ * blob store communication channels.
+ */
+public class BlobStorageProvider {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private CoreStorageClient storageClient;
 
 Review comment:
   ```suggestion
 private volatile CoreStorageClient storageClient;
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323972376
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/client/S3StorageClient.java
 ##
 @@ -0,0 +1,385 @@
+package org.apache.solr.store.blob.client;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.*;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+import org.apache.solr.common.StringUtils;
+import org.apache.solr.util.FileUtils;
+
+import com.amazonaws.AmazonClientException;
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.regions.Regions;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.*;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion;
+import com.google.common.collect.Iterables;
+
+import org.apache.solr.store.blob.client.BlobCoreMetadata;
+import org.apache.solr.store.blob.client.BlobClientUtils;
+import org.apache.solr.store.blob.client.ToFromJson;
+
+/**
+ * This class implements an AmazonS3 client for reading and writing search 
index
+ * data to AWS S3.
+ */
+public class S3StorageClient implements CoreStorageClient {
+
+  private final AmazonS3 s3Client;
+
+  /** The S3 bucket where we write all of our blobs to */
+  private final String blobBucketName;
+
+  // S3 has a hard limit of 1000 keys per batch delete request
+  private static final int MAX_KEYS_PER_BATCH_DELETE = 1000;
+
+  /**
+   * Construct a new S3StorageClient that is an implementation of the
+   * CoreStorageClient using AWS S3 as the underlying blob store service 
provider.
+   */
+  public S3StorageClient() throws IOException {
+String credentialsFilePath = 
AmazonS3Configs.CREDENTIALS_FILE_PATH.getValue();
+
+// requires credentials file on disk to authenticate with S3
+if (!FileUtils.fileExists(credentialsFilePath)) {
+  throw new IOException("Credentials file does not exist in " + 
credentialsFilePath);
+}
+
+/*
+ * default s3 client builder loads credentials from disk and handles token 
refreshes
+ */
+AmazonS3ClientBuilder builder = AmazonS3ClientBuilder.standard();
+s3Client = builder
+.withPathStyleAccessEnabled(true)
+.withRegion(Regions.fromName(AmazonS3Configs.REGION.getValue()))
+.build();
+
+blobBucketName = AmazonS3Configs.BUCKET_NAME.getValue();
+  }
+
+  @Override
+  public void pushCoreMetadata(String sharedStoreName, String 
blobCoreMetadataName, BlobCoreMetadata bcm)
+  throws BlobException {
+try {
+  ToFromJson converter = new ToFromJson<>();
+  String json = converter.toJson(bcm);
+
+  String blobCoreMetadataPath = getBlobMetadataPath(sharedStoreName, 
blobCoreMetadataName);
+  /*
+   * Encodes contents of the string into an S3 object. If no exception is 
thrown
+   * then the object is guaranteed to have been stored
+   */
+  s3Client.putObject(blobBucketName, blobCoreMetadataPath, json);
+} catch (AmazonServiceException ase) {
+  throw handleAmazonServiceException(ase);
+} catch (AmazonClientException ace) {
+  throw new BlobClientException(ace);
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public BlobCoreMetadata pullCoreMetadata(String sharedStoreName, String 
blobCoreMetadataName) throws BlobException {
+try {
+  String blobCoreMetadataPath = getBlobMetadataPath(sharedStoreName, 
blobCoreMetadataName);
+
+  if (!coreMetadataExists(sharedStoreName, blobCoreMetadataName)) {
+return null;
+  }
+
+  String decodedJson = s3Client.getObjectAsString(blobBucketName, 
blobCoreMetadataPath);
+  ToFromJson converter = new ToFromJson<>();
+  return converter.fromJson(decodedJson, BlobCoreMetadata.class);
+} catch (AmazonServiceException ase) {
+  throw handleAmazonServiceException(ase);
+} catch (AmazonClientException ace) {
+  throw new BlobClientException(ace);
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public InputStream pullStream(String path) throws BlobException {
+try {
+  S3Object requestedObject = s3Client.getObject(blobBucketName, path);
+  // This InputStream instance needs to be closed by the caller
+  return requestedObject.getObjectContent();
+} catch (AmazonServiceException ase) {
+  throw handleAmazonServiceException(ase);
+} catch (AmazonClientException ace) {
+  throw new BlobClientException(ace);
+} catch (Exception ex) {
+  throw new BlobException(ex);
+}
+  }
+
+  @Override
+  public String pushStream(String blobName, InputStream is, long 
contentLength, String fileNamePrefix)
+  throws BlobException {
+try {
+  /*
+   * This object metadata is associated per 

[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323974906
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/provider/BlobStorageProvider.java
 ##
 @@ -0,0 +1,62 @@
+package org.apache.solr.store.blob.provider;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.store.blob.client.BlobException;
+import org.apache.solr.store.blob.client.BlobStorageClientBuilder;
+import org.apache.solr.store.blob.client.BlobstoreProviderType;
+import org.apache.solr.store.blob.client.CoreStorageClient;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.amazonaws.SdkClientException;
+
+/**
+ * Class that provides access to the shared storage client (blob client) and
+ * handles initiation of such client. This class serves as the provider for all
+ * blob store communication channels.
+ */
+public class BlobStorageProvider {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private CoreStorageClient storageClient;
+
+  public CoreStorageClient getClient() {
+if (storageClient != null) {
+  return storageClient;
+}
+
+return getClient(BlobstoreProviderType.getConfiguredProvider());
+  }
+
+  private synchronized CoreStorageClient getClient(BlobstoreProviderType 
blobStorageProviderType) {
+if (storageClient != null) {
 
 Review comment:
   Lines 37-39 duplicate lines 29-31. Maybe remove the redundant lines in the 
`getClient()` method? 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org
For additional commands, e-mail: dev-h...@lucene.apache.org



[GitHub] [lucene-solr] eribeiro commented on a change in pull request #864: SOLR-13101 : Shared storage support in SolrCloud

2019-09-12 Thread GitBox
eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r323973484
 
 

 ##
 File path: 
solr/core/src/java/org/apache/solr/store/blob/process/CorePullTracker.java
 ##
 @@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.store.blob.process;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Map;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.servlet.SolrRequestParsers;
+import org.apache.solr.store.blob.metadata.PushPullData;
+import org.apache.solr.store.blob.process.CorePullerFeeder.PullCoreInfo;
+import org.apache.solr.store.blob.util.BlobStoreUtils;
+import org.apache.solr.store.blob.util.DeduplicatingList;
+import org.apache.solr.store.shared.metadata.SharedShardMetadataController;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tracks cores that are being queried and if necessary enqueues them for pull 
from blob store
+ */
+public class CorePullTracker {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  static private final int TRACKING_LIST_MAX_SIZE = 50;
+
+  private final DeduplicatingList coresToPull;
+
+  /* Config value that enables core pulls */
+  @VisibleForTesting
+  public static boolean isBackgroundPullEnabled = true; // TODO : make 
configurable
+
+  // Let's define these paths in yet another place in the code...
+  private static final String QUERY_PATH_PREFIX = "/select";
+  private static final String SPELLCHECK_PATH_PREFIX = "/spellcheck";
+  private static final String RESULTPROMOTION_PATH_PREFIX = 
"/result_promotion";
+  private static final String INDEXLOOKUP_PATH_PREFIX = "/indexLookup";
+  private static final String HIGHLIGHT_PATH_PREFIX = "/highlight";
+  private static final String BACKUP_PATH_PREFIX = "/backup";
+
+  public CorePullTracker() {
+coresToPull = new DeduplicatingList<>(TRACKING_LIST_MAX_SIZE, new 
CorePullerFeeder.PullCoreInfoMerger());
+  }
+
+  /**
+   * If the local core is stale, enqueues it to be pulled in from blob
+   * TODO: add stricter checks so that we don't pull on every request
+   */
+  public void enqueueForPullIfNecessary(String requestPath, SolrCore core, 
String collectionName,
+  CoreContainer cores) throws IOException, SolrException {
+// Initialize variables
+String coreName = core.getName();
+String shardName = 
core.getCoreDescriptor().getCloudDescriptor().getShardId();
+SharedShardMetadataController sharedShardMetadataController = 
cores.getSharedStoreManager().getSharedShardMetadataController();
+DocCollection collection = 
cores.getZkController().getClusterState().getCollection(collectionName);
+
+Slice shard = collection.getSlicesMap().get(shardName);
+if (shard != null) {
+  try {
+if (!collection.getActiveSlices().contains(shard)) {
+  // unclear if there are side effects but logging for now
+  log.warn("Enqueueing a pull for shard " + shardName + " that is 
inactive!");
+}
+log.info("Enqueue a pull for collection=" + collectionName + " shard=" 
+ shardName + " coreName=" + coreName);
+// creates the metadata node if it doesn't exist
+sharedShardMetadataController.ensureMetadataNodeExists(collectionName, 
shardName);
+
+/*
+ * Get the metadataSuffix value from ZooKeeper or from a cache if an 
entry exists for the 
+ * given collection and shardName. If the leader has already changed, 
the conditional update
+ * later will fail and invalidate the cache entry if it exists. 
+ */
+