[geode] branch support/1.12 updated: GEODE-7884: server hangs due to IllegalStateException (#4822)

2020-11-11 Thread burcham
This is an automated email from the ASF dual-hosted git repository.

burcham pushed a commit to branch support/1.12
in repository https://gitbox.apache.org/repos/asf/geode.git


The following commit(s) were added to refs/heads/support/1.12 by this push:
 new 6fec62f  GEODE-7884: server hangs due to IllegalStateException (#4822)
6fec62f is described below

commit 6fec62ff7b4b6ebc4f0f8079fcd67a2b0c3919b0
Author: Bruce Schuchardt 
AuthorDate: Fri Mar 20 09:05:24 2020 -0700

GEODE-7884: server hangs due to IllegalStateException (#4822)

* GEODE-7884: server hangs due to IllegalStateException

Added cancellation check before scheduling an idle-timeout or
ack-wait-threshold timer task.  I had to add a new method to
SystemTimerTask and then noticed there were no tests for SystemTimer, so
I cleaned up that class and added tests.

* adding missing copyright header to new test

* fixing LGTM issues

* reinstating 'continue' when encountering a null timer during a sweep

* addressing Bill's comments

renamed swarm everwhere
made the collection of timers associated with a DistributedSystem into a Set
made timer task variables in Connection volatile
added checks in tasks to cancel themselves if their Connection is closed

(cherry picked from commit 2d2a3f80bd5053749963889c1898df48e9aa0be7)
---
 .../internal/InternalDistributedSystem.java|   2 +-
 .../org/apache/geode/internal/SystemTimer.java | 370 +++--
 .../geode/internal/admin/StatAlertsManager.java|   2 +-
 .../geode/internal/cache/ExpirationScheduler.java  |   2 +-
 .../geode/internal/cache/GemFireCacheImpl.java |   2 +-
 .../cache/partitioned/PRSanityCheckMessage.java|   2 +-
 .../internal/cache/tier/sockets/AcceptorImpl.java  |   2 +-
 .../org/apache/geode/internal/tcp/Connection.java  |  30 +-
 .../apache/geode/internal/tcp/ConnectionTable.java |  22 +-
 .../org/apache/geode/internal/SystemTimerTest.java | 162 +
 10 files changed, 329 insertions(+), 267 deletions(-)

diff --git 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
index 823844f..e97bd02 100644
--- 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
+++ 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
@@ -1625,7 +1625,7 @@ public class InternalDistributedSystem extends 
DistributedSystem
   // bug 38501: this has to happen *after*
   // the DM is closed :-(
   if (!preparingForReconnect) {
-SystemTimer.cancelSwarm(this);
+SystemTimer.cancelTimers(this);
   }
 } // finally timer cancelled
   } // finally dm closed
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java 
b/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
index 9ce3525..7eddf43 100644
--- a/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
@@ -15,32 +15,30 @@
 package org.apache.geode.internal;
 
 import java.lang.ref.WeakReference;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
 
 import org.apache.logging.log4j.Logger;
 
 import org.apache.geode.CancelException;
-import org.apache.geode.SystemFailure;
 import org.apache.geode.annotations.internal.MakeNotStatic;
-import org.apache.geode.distributed.internal.InternalDistributedSystem;
+import org.apache.geode.distributed.DistributedSystem;
 import org.apache.geode.logging.internal.log4j.api.LogService;
 
 /**
- * Instances of this class are like {@link Timer}, but are associated with a 
"swarm", which can be
- * cancelled as a group with {@link #cancelSwarm(Object)}.
+ * Instances of this class are like {@link Timer}, but are associated with a 
DistributedSystem,
+ * which can be
+ * cancelled as a group with {@link #cancelTimers(DistributedSystem)}.
  *
  * @see Timer
  * @see TimerTask
  *
- *  TODO -- with Java 1.5, this will be a template type so that the 
swarm's class can be
- *  specified.
  */
 public class SystemTimer {
   private static final Logger logger = LogService.getLogger();
@@ -49,12 +47,6 @@ public class SystemTimer {
   "IBM Corporation".equals(System.getProperty("java.vm.vendor"));
 
   /**
-   * Extra debugging for this class
-   */
-  // private static final boolean DEBUG = true;
-  static final boolean DEBUG = false;
-
-  /**
* the underlying {@link Timer}
*/
   private final Timer timer;
@@ -62,119 +54,106 @@ public class SystemTimer {
   /**
   

[geode] branch support/1.12 updated: GEODE-7884: server hangs due to IllegalStateException (#4822)

2020-11-11 Thread burcham
This is an automated email from the ASF dual-hosted git repository.

burcham pushed a commit to branch support/1.12
in repository https://gitbox.apache.org/repos/asf/geode.git


The following commit(s) were added to refs/heads/support/1.12 by this push:
 new 6fec62f  GEODE-7884: server hangs due to IllegalStateException (#4822)
6fec62f is described below

commit 6fec62ff7b4b6ebc4f0f8079fcd67a2b0c3919b0
Author: Bruce Schuchardt 
AuthorDate: Fri Mar 20 09:05:24 2020 -0700

GEODE-7884: server hangs due to IllegalStateException (#4822)

* GEODE-7884: server hangs due to IllegalStateException

Added cancellation check before scheduling an idle-timeout or
ack-wait-threshold timer task.  I had to add a new method to
SystemTimerTask and then noticed there were no tests for SystemTimer, so
I cleaned up that class and added tests.

* adding missing copyright header to new test

* fixing LGTM issues

* reinstating 'continue' when encountering a null timer during a sweep

* addressing Bill's comments

renamed swarm everwhere
made the collection of timers associated with a DistributedSystem into a Set
made timer task variables in Connection volatile
added checks in tasks to cancel themselves if their Connection is closed

(cherry picked from commit 2d2a3f80bd5053749963889c1898df48e9aa0be7)
---
 .../internal/InternalDistributedSystem.java|   2 +-
 .../org/apache/geode/internal/SystemTimer.java | 370 +++--
 .../geode/internal/admin/StatAlertsManager.java|   2 +-
 .../geode/internal/cache/ExpirationScheduler.java  |   2 +-
 .../geode/internal/cache/GemFireCacheImpl.java |   2 +-
 .../cache/partitioned/PRSanityCheckMessage.java|   2 +-
 .../internal/cache/tier/sockets/AcceptorImpl.java  |   2 +-
 .../org/apache/geode/internal/tcp/Connection.java  |  30 +-
 .../apache/geode/internal/tcp/ConnectionTable.java |  22 +-
 .../org/apache/geode/internal/SystemTimerTest.java | 162 +
 10 files changed, 329 insertions(+), 267 deletions(-)

diff --git 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
index 823844f..e97bd02 100644
--- 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
+++ 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
@@ -1625,7 +1625,7 @@ public class InternalDistributedSystem extends 
DistributedSystem
   // bug 38501: this has to happen *after*
   // the DM is closed :-(
   if (!preparingForReconnect) {
-SystemTimer.cancelSwarm(this);
+SystemTimer.cancelTimers(this);
   }
 } // finally timer cancelled
   } // finally dm closed
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java 
b/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
index 9ce3525..7eddf43 100644
--- a/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/SystemTimer.java
@@ -15,32 +15,30 @@
 package org.apache.geode.internal;
 
 import java.lang.ref.WeakReference;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
 
 import org.apache.logging.log4j.Logger;
 
 import org.apache.geode.CancelException;
-import org.apache.geode.SystemFailure;
 import org.apache.geode.annotations.internal.MakeNotStatic;
-import org.apache.geode.distributed.internal.InternalDistributedSystem;
+import org.apache.geode.distributed.DistributedSystem;
 import org.apache.geode.logging.internal.log4j.api.LogService;
 
 /**
- * Instances of this class are like {@link Timer}, but are associated with a 
"swarm", which can be
- * cancelled as a group with {@link #cancelSwarm(Object)}.
+ * Instances of this class are like {@link Timer}, but are associated with a 
DistributedSystem,
+ * which can be
+ * cancelled as a group with {@link #cancelTimers(DistributedSystem)}.
  *
  * @see Timer
  * @see TimerTask
  *
- *  TODO -- with Java 1.5, this will be a template type so that the 
swarm's class can be
- *  specified.
  */
 public class SystemTimer {
   private static final Logger logger = LogService.getLogger();
@@ -49,12 +47,6 @@ public class SystemTimer {
   "IBM Corporation".equals(System.getProperty("java.vm.vendor"));
 
   /**
-   * Extra debugging for this class
-   */
-  // private static final boolean DEBUG = true;
-  static final boolean DEBUG = false;
-
-  /**
* the underlying {@link Timer}
*/
   private final Timer timer;
@@ -62,119 +54,106 @@ public class SystemTimer {
   /**