Author: challngr
Date: Thu Mar 14 12:49:44 2013
New Revision: 1456407

URL: http://svn.apache.org/r1456407
Log:
UIMA-2739
SM Service instance restart policy.  Controlled by ducc.properties
  ducc.sm.instance.failure.max
and Service registry's
  process_failures_limit

Applies to both failure during instance submission and run-failure after
OR has accepted it.

Also modifies "instance termination" to purge instance information as soon
as orchestrator indicates it is no longer running. Previously this did not 
happen until the instance was removed from the OR publications.

Modified:
    uima/sandbox/uima-ducc/trunk/src/main/resources/ducc.properties
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java

Modified: uima/sandbox/uima-ducc/trunk/src/main/resources/ducc.properties
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/ducc.properties?rev=1456407&r1=1456406&r2=1456407&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/ducc.properties (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/ducc.properties Thu Mar 14 
12:49:44 2013
@@ -141,6 +141,7 @@ ducc.sm.state.update.endpoint.type=topic
 ducc.sm.meta.ping.rate = 60000
 ducc.sm.meta.ping.stability =  10
 ducc.sm.meta.ping.timeout =  500
+ducc.sm.instance.falure.max = 5
 ducc.sm.http.port=19989
 ducc.sm.http.node=${ducc.head}
 ducc.sm.default.linger=300000

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java?rev=1456407&r1=1456406&r2=1456407&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
 Thu Mar 14 12:49:44 2013
@@ -36,6 +36,7 @@ import org.apache.uima.ducc.transport.ev
 import org.apache.uima.ducc.transport.event.ServiceStopEvent;
 import org.apache.uima.ducc.transport.event.ServiceUnregisterEvent;
 import org.apache.uima.ducc.transport.event.common.DuccWorkJob;
+import 
org.apache.uima.ducc.transport.event.common.IDuccCompletionType.JobCompletionType;
 import org.apache.uima.ducc.transport.event.common.IDuccState.JobState;
 import org.apache.uima.ducc.transport.event.common.IDuccWork;
 import org.apache.uima.ducc.transport.event.sm.IServiceDescription;
@@ -619,17 +620,17 @@ public class ServiceHandler
             if ( sset == null ) {
                 // may have already died and this is just leftover OR 
publications.
                 if ( w.isActive() ) {             // or maybe we just screwed 
up!
-                    try {
-                        throw new IllegalStateException("Got update for 
service " + id.toString() + " but no ServiceSet! Job state: " + 
w.getJobState());
-                    } catch ( Throwable t ) {
-                        // catch and log stack but don't crash SM
-                        logger.error(methodName, id, t);
-                        continue;
-                    }
+                    logger.info(methodName, id, "Got update for active service 
instance", id.toString(), "but no ServiceSet! Job state:", w.getJobState());
+                    continue;
                 }
                 continue;
             }
-
+            
+            if ( !sset.containsImplementor(id) ) {
+                logger.info(methodName, id, "Bypassing removed service 
instance for", endpoint);
+                continue;
+            }
+        
             ServiceDependency s = serviceMap.get(id);
             if ( w.isFinished() ) {              // nothing more, just 
dereference and maybe stop stuff I'm dependent upon
                 stopDependentServices(id);
@@ -641,6 +642,30 @@ public class ServiceHandler
             // now factor in cumulative state of the implementors and manage 
the ping thread as needed
             sset.establish(id, w.getJobState());
 
+            // State is established.  Now, if the instance died, remove it - 
OR will keep publishing it for a while and we want to ignore those
+            if ( ! w.isActive() ) {
+                sset.removeImplementor(id);
+
+                JobCompletionType jct = w.getCompletionType();
+                JobState          state = w.getJobState();
+                logger.info(methodName, id, "Removing stopped instance from 
maps: state[", state, "] completion[", jct, "]");
+                switch ( jct ) {
+                    case EndOfJob:
+                    case CanceledByUser:
+                    case CanceledByAdministrator:
+                    case Undefined:
+                        break;
+                    default:
+                        logger.debug(methodName, id, "RECORDING FAILURE");
+                        // all other cases are errors that contribute to the 
error count
+                        if ( sset.runFailures() ) {    // if true, the count 
is exceeeded, but reset
+                            logger.warn(methodName, null, "Process Failure: " 
+ jct + " Maximum consecutive failures[" + sset.failure_run + "] max [" + 
sset.failure_max + "]");
+                        }
+                        break;
+                }
+                
+            }
+
             if ( (sset.getServiceState() == ServiceState.NotAvailable) && 
(sset.countReferences() == 0) ) {
                 // this service is now toast.  remove from our maps asap to 
avoid clashes if it gets
                 // resubmitted before the OR can purge it.

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java?rev=1456407&r1=1456406&r2=1456407&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
 Thu Mar 14 12:49:44 2013
@@ -96,6 +96,8 @@ public class ServiceManagerComponent 
     static int meta_ping_timeout = 500;      // timeout on ping 
     static String default_ping_class;
 
+    static int failure_max = 5;
+
     private String state_dir = null;
     private String state_file = null;
     private String descriptor_dir = null;
@@ -262,6 +264,7 @@ public class ServiceManagerComponent 
                super.start(service, args);
                
DuccDaemonRuntimeProperties.getInstance().boot(DaemonName.ServiceManager,getProcessJmxUrl());
 
+        failure_max = 
SystemPropertyResolver.getIntProperty("ducc.sm.instance.failure.max", 
failure_max);
         meta_ping_rate = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.rate", meta_ping_rate);
         meta_ping_timeout = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.timeout", 
meta_ping_timeout);
         meta_ping_stability = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.stability", 
meta_ping_stability);
@@ -293,6 +296,7 @@ public class ServiceManagerComponent 
         logger.info(methodName, null, "    Service ping rate       : ", 
meta_ping_rate);
         logger.info(methodName, null, "    Service ping timeout    : ", 
meta_ping_timeout);
         logger.info(methodName, null, "    Service ping stability  : ", 
meta_ping_stability);
+        logger.info(methodName, null, "    Instance Failure Max    : ", 
failure_max);
         logger.info(methodName, null, "    DUCC Version            : ", 
Version.version());
         logger.info(methodName, null, 
"------------------------------------------------------------------------------------");
 

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java?rev=1456407&r1=1456406&r2=1456407&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
 Thu Mar 14 12:49:44 2013
@@ -38,6 +38,7 @@ import org.apache.uima.UIMAFramework;
 import org.apache.uima.aae.client.UimaAsynchronousEngine;
 import org.apache.uima.adapter.jms.client.BaseUIMAAsynchronousEngine_impl;
 import org.apache.uima.ducc.cli.IServiceApi.RegistrationOption;
+import org.apache.uima.ducc.cli.IUiOptions.UiOption;
 import org.apache.uima.ducc.common.ServiceStatistics;
 import org.apache.uima.ducc.common.TcpStreamHandler;
 import org.apache.uima.ducc.common.utils.DuccLogger;
@@ -103,6 +104,9 @@ public class ServiceSet
     // Registered services, the number of instances to maintain
     int instances = 1;
 
+    // max allowable failures before disabling autostart
+    int max_failures = 5;
+
     // UIMA-AS pinger
     IServiceMeta serviceMeta = null;
 
@@ -122,6 +126,11 @@ public class ServiceSet
     LingerTask linger = null;
     long linger_time = 5000;
 
+    // max allowed consecutive failures, current failure count
+    int failure_max = ServiceManagerComponent.failure_max;
+    int failure_start = 0;
+    int failure_run = 0;
+
     //JobState     job_state     = JobState.Undefined;
     //
     // This is the constructor for an implicit service
@@ -199,8 +208,8 @@ public class ServiceSet
         this.meta_filename = meta_filename;
         this.service_state = ServiceState.NotAvailable;
         this.linger_time = 
props.getLongProperty(RegistrationOption.ServiceLinger.decode(), 5000);
-
         this.key = meta.getProperty("endpoint");
+        this.failure_max = 
props.getIntProperty(UiOption.ProcessFailuresLimit.pname(), 
ServiceManagerComponent.failure_max);
 
         parseEndpoint(key);
 
@@ -660,6 +669,12 @@ public class ServiceSet
         return false;            
     }
 
+    boolean containsImplementor(DuccId id)
+    {
+        // must use friendly, in case the thing was just started and not into 
the implementors set yet
+        return friendly_ids.containsKey(id.getFriendly());
+    }
+
     public void removeImplementor(DuccId id)
     {
         if ( ! implementors.containsKey(id ) ) return;  // quick short circuit 
if it's already gone
@@ -807,6 +822,10 @@ public class ServiceSet
     {
         String methodName = "establish.1";
 
+        if ( job_state == JobState.Running ) {
+            failure_run = 0;
+        }
+
         if ( service_class == ServiceClass.Implicit ) {
             startPingThread(); 
             return;
@@ -960,6 +979,19 @@ public class ServiceSet
         return key;
     }
 
+    synchronized boolean runFailures()
+    {
+        String methodName = "runFailures";
+        if ( (++failure_run) > failure_max ) {
+            logger.debug(methodName, id, "RUN FAILURES EXCEEDED");
+            setAutostart(false);
+            failure_run = 0;
+            return true;
+        }
+        logger.debug(methodName, id, "RUN FAILURES NOT EXCEEDED YET", 
failure_run);
+        return false;
+    }
+
     private void startPingThread()
     {
        String methodName = "startPingThread";
@@ -1140,32 +1172,27 @@ public class ServiceSet
             logger.error(methodName, null, t);
                }
 
+        for ( String s : stderr_lines ) {
+            logger.info(methodName, id, "Start stderr:", s);
+        }
+
         // That was annoying.  Now search the lines for some hint of the id.
         boolean inhibit_cp = false;
+        boolean started = false;
         for ( String s : stdout_lines ) {
+
+            // simple logic to inhibit printing the danged classpath
             if ( inhibit_cp ) {
-                // The classpaths can be just awful filling the logs with 
junk.  It will end up in the agent log
-                // anyway so let's inhibit it here.
-                logger.debug(methodName, id, "<INHIBITED CP>");
                 inhibit_cp = false;
+                logger.info(methodName, id, "<INHIBITED CP>");
             } else {
-                logger.debug(methodName, id, "Start stdout:", s);
-                if ( s.indexOf("-cp") >= 0 ) {
-                    inhibit_cp = true;
-                }
+                logger.info(methodName, id, "Start stdout:", s);
             }
-        }
-
-        for ( String s : stderr_lines ) {
-            logger.debug(methodName, id, "Start stderr:", s);
-        }
 
-        // That was annoying.  Now search the lines for some hint of the id.
+            if ( s.indexOf("-cp") >= 0 ) {
+                inhibit_cp = true;
+            }
 
-        boolean started = false;
-        for ( String s : stdout_lines ) {
-            logger.debug(methodName, id, "Start stdout:", s);
-            
             if ( s.startsWith("Service") && s.endsWith("submitted") ) {
                 String[] toks = s.split("\\s");
                 long friendly = 0;
@@ -1183,9 +1210,16 @@ public class ServiceSet
         }
         
         if ( ! started ) {
-            logger.warn(methodName, null, "Request to start service " + 
id.toString() + " failed:", stdout_lines.toString() + " " + 
stderr_lines.toString());
+            logger.warn(methodName, null, "Request to start service " + 
id.toString() + " failed.");
+            if ( (++failure_start) >= (failure_max) ) {
+                logger.warn(methodName, null, "Start failure. Maximum 
consecutive failures[" + failure_max + "] exceeded{" + failure_start + "].  
Autostart disabled.");
+                setAutostart(false);
+                failure_start = 0;
+            }
+
         } else {
             setServiceState(ServiceState.Initializing);
+            failure_start = 0;
         }
         saveMetaProperties();
         logger.debug(methodName, null, "ENDSTART ENDSTART ENDSTART ENDSTART 
ENDSTART ENDSTART");
@@ -1212,7 +1246,13 @@ public class ServiceSet
         };
         
         for ( int i = 0; i < args.length; i++ ) { 
-            logger.debug(methodName, null, "Args[", i, "]:", args[i]);
+            if ( i > 0 && (args[i-1].equals("-cp") ) ) {
+                // The classpaths can be just awful filling the logs with 
junk.  It will end up in the agent log
+                // anyway so let's inhibit it here.
+                logger.debug(methodName, null, "Args[", i, "]: <CLASSPATH>");
+            } else {
+                logger.debug(methodName, null, "Args[", i, "]:", args[i]);
+            }
         }
         
         ProcessBuilder pb = new ProcessBuilder(args);
@@ -1231,6 +1271,7 @@ public class ServiceSet
             InputStream stderr = p.getErrorStream();
             BufferedReader stdout_reader = new BufferedReader(new 
InputStreamReader(stdout));
             BufferedReader stderr_reader = new BufferedReader(new 
InputStreamReader(stderr));
+
             String line = null;
             while ( (line = stdout_reader.readLine()) != null ) {
                 stdout_lines.add(line);
@@ -1246,13 +1287,23 @@ public class ServiceSet
             logger.error(methodName, null, t);
         }
 
-        // That was annoying.  Now search the lines for some hint of the id.
+        boolean inhibit_cp = false;
         for ( String s : stdout_lines ) {
-            logger.debug(methodName, id, "Stop stdout:", s);
+            // simple logic to inhibit printing the danged classpath
+            if ( inhibit_cp ) {
+                inhibit_cp = false;
+                logger.info(methodName, id, "<INHIBITED CP>");
+            } else {
+                logger.info(methodName, id, "Stop stdout:", s);
+            }
+            
+            if ( s.indexOf("-cp") >= 0 ) {
+                inhibit_cp = true;
+            }
         }
             
         for ( String s : stderr_lines ) {
-            logger.debug(methodName, id, "Stop stderr:", s);
+            logger.info(methodName, id, "Stop stderr:", s);
         }
 
         // is this the last implementor?  if so the service is no longer 
available.


Reply via email to