This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 06f7c1852a14e7b33ff4034dbceb69a3534eaaae
Author: Murtadha Hubail <[email protected]>
AuthorDate: Thu Mar 24 01:43:28 2022 +0300

    [NO ISSUE][REP] Increase replication ack timeout
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    
    - Increase replication ack timeout to 120 seconds.
    
    Change-Id: I228620af371d651a84160231cdd832ca1087e7f9
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15843
    Reviewed-by: Ali Alsuliman <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../results/api/cluster_state_1/cluster_state_1.1.regexadm         | 2 +-
 .../api/cluster_state_1_full/cluster_state_1_full.1.regexadm       | 2 +-
 .../api/cluster_state_1_less/cluster_state_1_less.1.regexadm       | 2 +-
 .../org/apache/asterix/common/config/ReplicationProperties.java    | 2 +-
 .../asterix/replication/messaging/MarkComponentValidTask.java      | 7 ++++++-
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index 1805e7a8f3..c55c0bcbee 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -43,7 +43,7 @@
     "replication\.log\.buffer\.numpages" : 8,
     "replication\.log\.buffer\.pagesize" : 131072,
     "replication\.strategy" : "none",
-    "replication\.timeout" : 30,
+    "replication\.timeout" : 120,
     "ssl\.enabled" : false,
     "storage.compression.block" : "snappy",
     "storage.global.cleanup.timeout" : 600,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 743347a005..661daf3ed2 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -43,7 +43,7 @@
     "replication\.log\.buffer\.numpages" : 8,
     "replication\.log\.buffer\.pagesize" : 131072,
     "replication\.strategy" : "none",
-    "replication\.timeout" : 30,
+    "replication\.timeout" : 120,
     "ssl\.enabled" : false,
     "storage.compression.block" : "snappy",
     "storage.global.cleanup.timeout" : 600,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 4359bd9ff2..1f0e865dcf 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -43,7 +43,7 @@
     "replication\.log\.buffer\.numpages" : 8,
     "replication\.log\.buffer\.pagesize" : 131072,
     "replication\.strategy" : "none",
-    "replication\.timeout" : 30,
+    "replication\.timeout" : 120,
     "ssl\.enabled" : false,
     "storage.compression.block" : "snappy",
     "storage.global.cleanup.timeout" : 600,
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
index dd42936812..ada3875c10 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/ReplicationProperties.java
@@ -48,7 +48,7 @@ public class ReplicationProperties extends AbstractProperties 
{
                 "The size in bytes to replicate in each batch"),
         REPLICATION_TIMEOUT(
                 LONG,
-                TimeUnit.SECONDS.toSeconds(30),
+                TimeUnit.SECONDS.toSeconds(120),
                 "The time in seconds to timeout waiting for master or replica 
to ack"),
         REPLICATION_ENABLED(BOOLEAN, false, "Whether or not data replication 
is enabled"),
         REPLICATION_FACTOR(NONNEGATIVE_INTEGER, 2, "Number of replicas 
(backups) to maintain per master replica"),
diff --git 
a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
 
b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
index 1ea076d8fa..172bd59ca1 100644
--- 
a/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
+++ 
b/asterixdb/asterix-replication/src/main/java/org/apache/asterix/replication/messaging/MarkComponentValidTask.java
@@ -37,12 +37,15 @@ import 
org.apache.asterix.replication.api.IReplicationWorker;
 import org.apache.asterix.replication.sync.IndexSynchronizer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import 
org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 /**
  * A task to mark a replicated LSM component as valid
  */
 public class MarkComponentValidTask implements IReplicaTask {
 
+    private static final Logger LOGGER = LogManager.getLogger();
     private final long masterLsn;
     private final long lastComponentId;
     private final String file;
@@ -90,7 +93,9 @@ public class MarkComponentValidTask implements IReplicaTask {
             // wait until the lsn mapping is flushed to disk
             while (!indexCheckpointManager.isFlushed(masterLsn)) {
                 if (replicationTimeOut <= 0) {
-                    throw new ReplicationException(new 
TimeoutException("Couldn't receive flush lsn from master"));
+                    LOGGER.warn("{} seconds passed without receiving flush lsn 
{} from master for component {}",
+                            
appCtx.getReplicationProperties().getReplicationTimeOut(), masterLsn, file);
+                    throw new ReplicationException(new 
TimeoutException("couldn't receive flush lsn from master"));
                 }
                 final long startTime = System.nanoTime();
                 indexCheckpointManager.wait(replicationTimeOut);

Reply via email to