[ 
https://issues.apache.org/jira/browse/SOLR-10249?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15902139#comment-15902139
 ] 

Jeff Miller commented on SOLR-10249:
------------------------------------

Diffs for my local testing

diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java 
b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index 90e515a..2483e69 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -153,7 +153,7 @@ public class RecoveryStrategy extends Thread implements 
Closeable {
     solrParams.set(ReplicationHandler.MASTER_URL, leaderUrl);
     
     if (isClosed()) return; // we check closed on return
-    boolean success = replicationHandler.doFetch(solrParams, false);
+    boolean success = replicationHandler.doFetch(solrParams, 
false).getStatus();
     
     if (!success) {
       throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for 
recovery failed.");
diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrRequestHandler.java 
b/solr/core/src/java/org/apache/solr/handler/CdcrRequestHandler.java
index f706637..a65299a 100644
--- a/solr/core/src/java/org/apache/solr/handler/CdcrRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/CdcrRequestHandler.java
@@ -754,7 +754,7 @@ public class CdcrRequestHandler extends RequestHandlerBase 
implements SolrCoreAw
         // we do not want the raw tlog files from the source
         solrParams.set(ReplicationHandler.TLOG_FILES, false);
 
-        success = replicationHandler.doFetch(solrParams, false);
+        success = replicationHandler.doFetch(solrParams, false).getStatus();
 
         // this is required because this callable can race with 
HttpSolrCall#destroy
         // which clears the request info.
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java 
b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index b9d9f51..281e660 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -106,6 +106,8 @@ import static 
org.apache.solr.common.params.CommonParams.JAVABIN;
 import static org.apache.solr.common.params.CommonParams.NAME;
 import static org.apache.solr.handler.ReplicationHandler.*;
 
+import com.google.common.base.Strings;
+
 /**
  * <p> Provides functionality of downloading changed index files as well as 
config files and a timer for scheduling fetches from the
  * master. </p>
@@ -161,6 +163,52 @@ public class IndexFetcher {
 
   private Integer soTimeout;
 
+  private static final String INTERRUPT_RESPONSE_MESSAGE = "Interrupted while 
waiting for modify lock";
+
+  public static class IndexFetchResult {
+    private final String message;
+    private final boolean status;
+    private final Throwable exception;
+
+    public static final String FAILED_BY_INTERRUPT_MESSAGE = "Fetching index 
failed by interrupt";
+    public static final String FAILED_BY_EXCEPTION_MESSAGE = "Fetching index 
failed by exception";
+
+    /** pre-defined results */
+    public static final IndexFetchResult ALREADY_IN_SYNC = new 
IndexFetchResult("Local index commit is already in sync with peer", true, null);
+    public static final IndexFetchResult INDEX_FETCH_FAILURE = new 
IndexFetchResult("Fetching lastest index is failed", false, null);
+    public static final IndexFetchResult INDEX_FETCH_SUCCESS = new 
IndexFetchResult("Fetching latest index is successful", true, null);
+    public static final IndexFetchResult LOCK_OBTAIN_FAILED = new 
IndexFetchResult("Obtaining SnapPuller lock failed", false, null);
+    public static final IndexFetchResult MASTER_VERSION_ZERO = new 
IndexFetchResult("Index in peer is empty and never committed yet", true, null);
+    public static final IndexFetchResult NO_INDEX_COMMIT_EXIST = new 
IndexFetchResult("No IndexCommit in local index", false, null);
+    public static final IndexFetchResult PEER_INDEX_COMMIT_DELETED = new 
IndexFetchResult("No files to download because IndexCommit in peer was 
deleted", false, null);
+    // SFDC: adding a new failure result when replication is aborted because 
of local activity
+    public static final IndexFetchResult LOCAL_ACTIVITY_DURING_REPLICATION = 
new IndexFetchResult("Local index modification during replication", false, 
null);
+
+    IndexFetchResult(String message, boolean status, Throwable exception) {
+      this.message = message;
+      this.status = status;
+      this.exception = exception;
+    }
+
+    /*
+     * @return exception thrown if failed by exception or interrupt, otherwise 
null
+     */
+    public Throwable getException() {
+      return this.exception;
+    }
+
+    /*
+     * @return true if index fetch was successful, false otherwise
+     */
+    public boolean getStatus() {
+      return this.status;
+    }
+
+    public String getMessage() {
+      return this.message;
+    }
+  }
+
   private static HttpClient createHttpClient(SolrCore core, String 
httpBasicAuthUser, String httpBasicAuthPassword, boolean useCompression) {
     final ModifiableSolrParams httpClientParams = new ModifiableSolrParams();
     httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_USER, 
httpBasicAuthUser);
@@ -264,7 +312,7 @@ public class IndexFetcher {
     }
   }
 
-  boolean fetchLatestIndex(boolean forceReplication) throws IOException, 
InterruptedException {
+  IndexFetchResult fetchLatestIndex(boolean forceReplication) throws 
IOException, InterruptedException {
     return fetchLatestIndex(forceReplication, false);
   }
 
@@ -277,7 +325,7 @@ public class IndexFetcher {
    * @return true on success, false if slave is already in sync
    * @throws IOException if an exception occurs
    */
-  boolean fetchLatestIndex(boolean forceReplication, boolean forceCoreReload) 
throws IOException, InterruptedException {
+  IndexFetchResult fetchLatestIndex(boolean forceReplication, boolean 
forceCoreReload) throws IOException, InterruptedException {
 
     boolean cleanupDone = false;
     boolean successfulInstall = false;
@@ -302,9 +350,16 @@ public class IndexFetcher {
       try {
         response = getLatestVersion();
       } catch (Exception e) {
-        LOG.error("Master at: " + masterUrl + " is not available. Index fetch 
failed. Exception: " + e.getMessage());
-        return false;
-      }
+        final String errorMsg = e.getMessage();
+        if (!Strings.isNullOrEmpty(errorMsg) && 
errorMsg.contains(INTERRUPT_RESPONSE_MESSAGE)) {
+            LOG.warn("Master at: " + masterUrl + " is not available. Index 
fetch failed by interrupt. Exception: " + errorMsg);
+            return new 
IndexFetchResult(IndexFetchResult.FAILED_BY_INTERRUPT_MESSAGE, false, e);
+        } else {
+            LOG.warn("Master at: " + masterUrl + " is not available. Index 
fetch failed by exception: " + errorMsg);
+            return new 
IndexFetchResult(IndexFetchResult.FAILED_BY_EXCEPTION_MESSAGE, false, e);
+        }
+    }
+
       long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
       long latestGeneration = (Long) response.get(GENERATION);
 
@@ -320,7 +375,7 @@ public class IndexFetcher {
           searcherRefCounted = solrCore.getNewestSearcher(false);
           if (searcherRefCounted == null) {
             LOG.warn("No open searcher found - fetch aborted");
-            return false;
+            return IndexFetchResult.NO_INDEX_COMMIT_EXIST;
           }
           commit = searcherRefCounted.get().getIndexReader().getIndexCommit();
         } finally {
@@ -347,7 +402,7 @@ public class IndexFetcher {
 
         //there is nothing to be replicated
         successfulInstall = true;
-        return true;
+        return IndexFetchResult.MASTER_VERSION_ZERO;
       }
 
       // TODO: Should we be comparing timestamps (across machines) here?
@@ -355,14 +410,14 @@ public class IndexFetcher {
         //master and slave are already in sync just return
         LOG.info("Slave in sync with master.");
         successfulInstall = true;
-        return true;
+        return IndexFetchResult.ALREADY_IN_SYNC;
       }
       LOG.info("Starting replication process");
       // get the list of files first
       fetchFileList(latestGeneration);
       // this can happen if the commit point is deleted before we fetch the 
file list.
       if (filesToDownload.isEmpty()) {
-        return false;
+        return IndexFetchResult.PEER_INDEX_COMMIT_DELETED;
       }
       LOG.info("Number of files in latest index in master: " + 
filesToDownload.size());
       if (tlogFilesToDownload != null) {
@@ -550,14 +605,14 @@ public class IndexFetcher {
           LOG.warn(
               "Replication attempt was not successful - trying a full index 
replication reloadCore={}",
               reloadCore);
-          successfulInstall = fetchLatestIndex(true, reloadCore);
+          successfulInstall = fetchLatestIndex(true, reloadCore).getStatus();
         }
 
         markReplicationStop();
-        return successfulInstall;
+        return successfulInstall ? IndexFetchResult.INDEX_FETCH_SUCCESS : 
IndexFetchResult.INDEX_FETCH_FAILURE;
       } catch (ReplicationHandlerException e) {
         LOG.error("User aborted Replication");
-        return false;
+        return new 
IndexFetchResult(IndexFetchResult.FAILED_BY_EXCEPTION_MESSAGE, false, e);
       } catch (SolrException e) {
         throw e;
       } catch (InterruptedException e) {
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java 
b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index 84e1ba2..becec83 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -88,6 +88,7 @@ import org.apache.solr.core.SolrEventListener;
 import org.apache.solr.core.backup.repository.BackupRepository;
 import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
 import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
+import org.apache.solr.handler.IndexFetcher.IndexFetchResult;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -381,10 +382,10 @@ public class ReplicationHandler extends 
RequestHandlerBase implements SolrCoreAw
 
   private volatile IndexFetcher currentIndexFetcher;
 
-  public boolean doFetch(SolrParams solrParams, boolean forceReplication) {
+  public IndexFetchResult doFetch(SolrParams solrParams, boolean 
forceReplication) {
     String masterUrl = solrParams == null ? null : solrParams.get(MASTER_URL);
     if (!indexFetchLock.tryLock())
-      return false;
+      return IndexFetchResult.LOCK_OBTAIN_FAILED;
     try {
       if (masterUrl != null) {
         if (currentIndexFetcher != null && currentIndexFetcher != 
pollingIndexFetcher) {
@@ -400,17 +401,16 @@ public class ReplicationHandler extends 
RequestHandlerBase implements SolrCoreAw
       if (currentIndexFetcher != pollingIndexFetcher) {
         currentIndexFetcher.destroy();
       }
+      return new 
IndexFetchResult(IndexFetchResult.FAILED_BY_EXCEPTION_MESSAGE, false, e);
     } finally {
       if (pollingIndexFetcher != null) {
        if( currentIndexFetcher != pollingIndexFetcher) {
          currentIndexFetcher.destroy();
        }
-        
         currentIndexFetcher = pollingIndexFetcher;
       }
       indexFetchLock.unlock();
     }
-    return false;
   }
 
   boolean isReplicating() {


> Allow index fetching to return a detailed result instead of a true/false value
> ------------------------------------------------------------------------------
>
>                 Key: SOLR-10249
>                 URL: https://issues.apache.org/jira/browse/SOLR-10249
>             Project: Solr
>          Issue Type: Improvement
>      Security Level: Public(Default Security Level. Issues are Public) 
>          Components: replication (java)
>    Affects Versions: 6.4.1
>         Environment: Any
>            Reporter: Jeff Miller
>            Priority: Trivial
>              Labels: easyfix, newbie
>             Fix For: 6.4
>
>   Original Estimate: 3h
>  Remaining Estimate: 3h
>
> This gives us the ability to see into why a replication might of failed and 
> act on it if we need to.  We use this enhancement for logging conditions so 
> we can quantify what is happening with replication, get success rates, etc.
> The idea is to create a public static class IndexFetchResult as an inner 
> class to IndexFetcher that has strings that hold statuses that could occur 
> while fetching an index.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to