Add TM2 tmcheck validator for all monitors

Adds tmcheck.ValidateAllMonitorsOfflineStates, which efficiently
checks all monitors in Traffic Ops (i.e. only getting the CRConfig
once).


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/83b58d97
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/83b58d97
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/83b58d97

Branch: refs/heads/master
Commit: 83b58d97fb868309db0db48963a66f85a91caadb
Parents: 33fa72c
Author: Robert Butts <[email protected]>
Authored: Thu Mar 2 11:29:07 2017 -0700
Committer: Dave Neuman <[email protected]>
Committed: Wed Apr 12 15:43:31 2017 -0600

----------------------------------------------------------------------
 .../traffic_monitor/enum/enum.go                |   3 +
 .../traffic_monitor/tmcheck/tmcheck.go          | 142 ++++++++++++++++++-
 .../traffic_monitor/tools/validate-offline.go   |   6 +-
 3 files changed, 143 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/enum/enum.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/enum/enum.go 
b/traffic_monitor_golang/traffic_monitor/enum/enum.go
index 45546bd..1c06ac8 100644
--- a/traffic_monitor_golang/traffic_monitor/enum/enum.go
+++ b/traffic_monitor_golang/traffic_monitor/enum/enum.go
@@ -32,6 +32,9 @@ import (
        "strings"
 )
 
+// CDNName is the name of a CDN in Traffic Control.
+type CDNName string
+
 // TrafficMonitorName is the hostname of a Traffic Monitor peer.
 type TrafficMonitorName string
 

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go 
b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
index d6a964e..1178721 100644
--- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
@@ -93,8 +93,12 @@ func ValidateOfflineStates(tmURI string, toClient 
*to.Session) error {
        if err != nil {
                return fmt.Errorf("getting CDN from Traffic Monitor: %v", err)
        }
+       return ValidateOfflineStatesWithCDN(tmURI, cdn, toClient)
+}
 
-       crConfigBytes, err := toClient.CRConfigRaw(cdn)
+// ValidateOfflineStatesWithCDN validates per ValidateOfflineStates, but saves 
an additional query if the Traffic Monitor's CDN is known.
+func ValidateOfflineStatesWithCDN(tmURI string, tmCDN string, toClient 
*to.Session) error {
+       crConfigBytes, err := toClient.CRConfigRaw(tmCDN)
        if err != nil {
                return fmt.Errorf("getting CRConfig: %v", err)
        }
@@ -104,19 +108,24 @@ func ValidateOfflineStates(tmURI string, toClient 
*to.Session) error {
                return fmt.Errorf("unmarshalling CRConfig JSON: %v", err)
        }
 
+       return ValidateOfflineStatesWithCRConfig(tmURI, &crConfig, toClient)
+}
+
+// ValidateOfflineStatesWithCRConfig validates per ValidateOfflineStates, but 
saves querying the CRconfig if it's already fetched.
+func ValidateOfflineStatesWithCRConfig(tmURI string, crConfig 
*crconfig.CRConfig, toClient *to.Session) error {
        crStates, err := GetCRStates(tmURI + TrafficMonitorCRStatesPath)
        if err != nil {
                return fmt.Errorf("getting CRStates: %v", err)
        }
 
-       return ValidateCRStates(crStates, &crConfig)
+       return ValidateCRStates(crStates, crConfig)
 }
 
 // ValidateCRStates validates that no OFFLINE or ADMIN_DOWN caches in the 
given CRConfig are marked Available in the given CRStates.
 func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) 
error {
        for cacheName, cacheInfo := range crconfig.ContentServers {
                status := enum.CacheStatusFromString(string(*cacheInfo.Status))
-               if status != enum.CacheStatusOffline || status != 
enum.CacheStatusOffline {
+               if status != enum.CacheStatusAdminDown || status != 
enum.CacheStatusOffline {
                        continue
                }
 
@@ -133,8 +142,8 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig 
*crconfig.CRConfig) erro
        return nil
 }
 
-// Validator is designed to be run as a goroutine, and does not return. It 
continously validates every `interval`, and calls `onErr` on failure, 
`onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
-func Validator(
+// CRStatesOfflineValidator is designed to be run as a goroutine, and does not 
return. It continously validates every `interval`, and calls `onErr` on 
failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
+func CRStatesOfflineValidator(
        tmURI string,
        toClient *to.Session,
        interval time.Duration,
@@ -170,3 +179,126 @@ func Validator(
                time.Sleep(interval)
        }
 }
+
+// CRConfigOrError contains a CRConfig or an error. Union types? Monads? What 
are those?
+type CRConfigOrError struct {
+       CRConfig *crconfig.CRConfig
+       Err      error
+}
+
+// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the 
given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's 
CRStates.
+func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline 
bool) (map[enum.TrafficMonitorName]error, error) {
+       trafficMonitorType := "RASCAL"
+       monitorTypeQuery := map[string][]string{"type": 
[]string{trafficMonitorType}}
+       servers, err := toClient.ServersByType(monitorTypeQuery)
+       if err != nil {
+               return nil, fmt.Errorf("getting monitors from Traffic Ops: %v", 
err)
+       }
+
+       if !includeOffline {
+               servers = FilterOfflines(servers)
+       }
+
+       crConfigs := GetCRConfigs(GetCDNs(servers), toClient)
+
+       errs := map[enum.TrafficMonitorName]error{}
+       for _, server := range servers {
+               crConfig := crConfigs[enum.CDNName(server.CDNName)]
+               if err := crConfig.Err; err != nil {
+                       errs[enum.TrafficMonitorName(server.HostName)] = 
fmt.Errorf("getting CRConfig: %v", err)
+                       continue
+               }
+
+               fqdn := fmt.Sprintf("%s.%s", server.HostName, server.DomainName)
+               if err := ValidateOfflineStatesWithCRConfig(fqdn, 
crConfig.CRConfig, toClient); err != nil {
+                       errs[enum.TrafficMonitorName(server.HostName)] = err
+               }
+       }
+       return errs, nil
+}
+
+// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, 
and does not return. It continously validates every `interval`, and calls 
`onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on 
every poll. Note the error passed to `onErr` may be a general validation error 
not associated with any monitor, in which case the passed 
`enum.TrafficMonitorName` will be empty.
+func AllMonitorsCRStatesOfflineValidator(
+       toClient *to.Session,
+       interval time.Duration,
+       includeOffline bool,
+       grace time.Duration,
+       onErr func(enum.TrafficMonitorName, error),
+       onResumeSuccess func(enum.TrafficMonitorName),
+       onCheck func(enum.TrafficMonitorName, error),
+) {
+       invalid := map[enum.TrafficMonitorName]bool{}
+       invalidStart := map[enum.TrafficMonitorName]time.Time{}
+       for {
+               tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, 
includeOffline) // []MonitorError {
+               if err != nil {
+                       onErr("", fmt.Errorf("Error validating monitors: %v", 
err))
+                       time.Sleep(interval)
+               }
+
+               for name, err := range tmErrs {
+                       if err != nil && !invalid[name] {
+                               invalid[name] = true
+                               invalidStart[name] = time.Now()
+                       }
+
+                       if err != nil {
+                               invalidSpan := 
time.Now().Sub(invalidStart[name])
+                               if invalidSpan > grace {
+                                       onErr(name, fmt.Errorf("invalid state 
for %v: %v\n", invalidSpan, err))
+                               }
+                       }
+
+                       onCheck(name, err)
+               }
+
+               for tm, tmInvalid := range invalid {
+                       if _, ok := tmErrs[tm]; tmInvalid && !ok {
+                               onResumeSuccess(tm)
+                               invalid[tm] = false
+                       }
+               }
+
+               time.Sleep(interval)
+       }
+}
+
+// FilterOfflines returns only servers which are REPORTED or ONLINE
+func FilterOfflines(servers []to.Server) []to.Server {
+       onlineServers := []to.Server{}
+       for _, server := range servers {
+               status := enum.CacheStatusFromString(server.Status)
+               if status != enum.CacheStatusOnline && status != 
enum.CacheStatusReported {
+                       continue
+               }
+               onlineServers = append(onlineServers, server)
+       }
+       return onlineServers
+}
+
+func GetCDNs(servers []to.Server) map[enum.CDNName]struct{} {
+       cdns := map[enum.CDNName]struct{}{}
+       for _, server := range servers {
+               cdns[enum.CDNName(server.CDNName)] = struct{}{}
+       }
+       return cdns
+}
+
+func GetCRConfigs(cdns map[enum.CDNName]struct{}, toClient *to.Session) 
map[enum.CDNName]CRConfigOrError {
+       crConfigs := map[enum.CDNName]CRConfigOrError{}
+       for cdn, _ := range cdns {
+               crConfigBytes, err := toClient.CRConfigRaw(string(cdn))
+               if err != nil {
+                       crConfigs[cdn] = CRConfigOrError{Err: 
fmt.Errorf("getting CRConfig: %v", err)}
+                       continue
+               }
+
+               crConfig := crconfig.CRConfig{}
+               if err := json.Unmarshal(crConfigBytes, &crConfig); err != nil {
+                       crConfigs[cdn] = CRConfigOrError{Err: 
fmt.Errorf("unmarshalling CRConfig JSON: %v", err)}
+               }
+
+               crConfigs[cdn] = CRConfigOrError{CRConfig: &crConfig}
+       }
+       return crConfigs
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go 
b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
index 92b17c7..07eee78 100644
--- a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
+++ b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
@@ -117,7 +117,7 @@ func main() {
                }
        }
 
-       go tmcheck.Validator(*tmURI, toClient, *interval, *grace, onErr, 
onResumeSuccess, onCheck)
+       go tmcheck.CRStatesOfflineValidator(*tmURI, toClient, *interval, 
*grace, onErr, onResumeSuccess, onCheck)
 
        if err := serve(log, *toURI, *tmURI); err != nil {
                fmt.Printf("Serve error: %v\n", err)
@@ -145,8 +145,8 @@ func serve(log Log, toURI string, tmURI string) error {
 
                fmt.Fprintf(w, `<pre>`)
                logCopy := log.Get()
-               for i := len(logCopy) - 1; i >= 0; i-- {
-                       fmt.Fprintf(w, "%s\n", logCopy[i])
+               for _, msg := range logCopy {
+                       fmt.Fprintf(w, "%s\n", msg)
                }
                fmt.Fprintf(w, `</pre>`)
 

Reply via email to