Add TM2 tmcheck validator for all monitors Adds tmcheck.ValidateAllMonitorsOfflineStates, which efficiently checks all monitors in Traffic Ops (i.e. only getting the CRConfig once).
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/83b58d97 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/83b58d97 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/83b58d97 Branch: refs/heads/master Commit: 83b58d97fb868309db0db48963a66f85a91caadb Parents: 33fa72c Author: Robert Butts <[email protected]> Authored: Thu Mar 2 11:29:07 2017 -0700 Committer: Dave Neuman <[email protected]> Committed: Wed Apr 12 15:43:31 2017 -0600 ---------------------------------------------------------------------- .../traffic_monitor/enum/enum.go | 3 + .../traffic_monitor/tmcheck/tmcheck.go | 142 ++++++++++++++++++- .../traffic_monitor/tools/validate-offline.go | 6 +- 3 files changed, 143 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/enum/enum.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/enum/enum.go b/traffic_monitor_golang/traffic_monitor/enum/enum.go index 45546bd..1c06ac8 100644 --- a/traffic_monitor_golang/traffic_monitor/enum/enum.go +++ b/traffic_monitor_golang/traffic_monitor/enum/enum.go @@ -32,6 +32,9 @@ import ( "strings" ) +// CDNName is the name of a CDN in Traffic Control. +type CDNName string + // TrafficMonitorName is the hostname of a Traffic Monitor peer. type TrafficMonitorName string http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go index d6a964e..1178721 100644 --- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go @@ -93,8 +93,12 @@ func ValidateOfflineStates(tmURI string, toClient *to.Session) error { if err != nil { return fmt.Errorf("getting CDN from Traffic Monitor: %v", err) } + return ValidateOfflineStatesWithCDN(tmURI, cdn, toClient) +} - crConfigBytes, err := toClient.CRConfigRaw(cdn) +// ValidateOfflineStatesWithCDN validates per ValidateOfflineStates, but saves an additional query if the Traffic Monitor's CDN is known. +func ValidateOfflineStatesWithCDN(tmURI string, tmCDN string, toClient *to.Session) error { + crConfigBytes, err := toClient.CRConfigRaw(tmCDN) if err != nil { return fmt.Errorf("getting CRConfig: %v", err) } @@ -104,19 +108,24 @@ func ValidateOfflineStates(tmURI string, toClient *to.Session) error { return fmt.Errorf("unmarshalling CRConfig JSON: %v", err) } + return ValidateOfflineStatesWithCRConfig(tmURI, &crConfig, toClient) +} + +// ValidateOfflineStatesWithCRConfig validates per ValidateOfflineStates, but saves querying the CRconfig if it's already fetched. +func ValidateOfflineStatesWithCRConfig(tmURI string, crConfig *crconfig.CRConfig, toClient *to.Session) error { crStates, err := GetCRStates(tmURI + TrafficMonitorCRStatesPath) if err != nil { return fmt.Errorf("getting CRStates: %v", err) } - return ValidateCRStates(crStates, &crConfig) + return ValidateCRStates(crStates, crConfig) } // ValidateCRStates validates that no OFFLINE or ADMIN_DOWN caches in the given CRConfig are marked Available in the given CRStates. func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) error { for cacheName, cacheInfo := range crconfig.ContentServers { status := enum.CacheStatusFromString(string(*cacheInfo.Status)) - if status != enum.CacheStatusOffline || status != enum.CacheStatusOffline { + if status != enum.CacheStatusAdminDown || status != enum.CacheStatusOffline { continue } @@ -133,8 +142,8 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) erro return nil } -// Validator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. -func Validator( +// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. +func CRStatesOfflineValidator( tmURI string, toClient *to.Session, interval time.Duration, @@ -170,3 +179,126 @@ func Validator( time.Sleep(interval) } } + +// CRConfigOrError contains a CRConfig or an error. Union types? Monads? What are those? +type CRConfigOrError struct { + CRConfig *crconfig.CRConfig + Err error +} + +// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates. +func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) { + trafficMonitorType := "RASCAL" + monitorTypeQuery := map[string][]string{"type": []string{trafficMonitorType}} + servers, err := toClient.ServersByType(monitorTypeQuery) + if err != nil { + return nil, fmt.Errorf("getting monitors from Traffic Ops: %v", err) + } + + if !includeOffline { + servers = FilterOfflines(servers) + } + + crConfigs := GetCRConfigs(GetCDNs(servers), toClient) + + errs := map[enum.TrafficMonitorName]error{} + for _, server := range servers { + crConfig := crConfigs[enum.CDNName(server.CDNName)] + if err := crConfig.Err; err != nil { + errs[enum.TrafficMonitorName(server.HostName)] = fmt.Errorf("getting CRConfig: %v", err) + continue + } + + fqdn := fmt.Sprintf("%s.%s", server.HostName, server.DomainName) + if err := ValidateOfflineStatesWithCRConfig(fqdn, crConfig.CRConfig, toClient); err != nil { + errs[enum.TrafficMonitorName(server.HostName)] = err + } + } + return errs, nil +} + +// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty. +func AllMonitorsCRStatesOfflineValidator( + toClient *to.Session, + interval time.Duration, + includeOffline bool, + grace time.Duration, + onErr func(enum.TrafficMonitorName, error), + onResumeSuccess func(enum.TrafficMonitorName), + onCheck func(enum.TrafficMonitorName, error), +) { + invalid := map[enum.TrafficMonitorName]bool{} + invalidStart := map[enum.TrafficMonitorName]time.Time{} + for { + tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, includeOffline) // []MonitorError { + if err != nil { + onErr("", fmt.Errorf("Error validating monitors: %v", err)) + time.Sleep(interval) + } + + for name, err := range tmErrs { + if err != nil && !invalid[name] { + invalid[name] = true + invalidStart[name] = time.Now() + } + + if err != nil { + invalidSpan := time.Now().Sub(invalidStart[name]) + if invalidSpan > grace { + onErr(name, fmt.Errorf("invalid state for %v: %v\n", invalidSpan, err)) + } + } + + onCheck(name, err) + } + + for tm, tmInvalid := range invalid { + if _, ok := tmErrs[tm]; tmInvalid && !ok { + onResumeSuccess(tm) + invalid[tm] = false + } + } + + time.Sleep(interval) + } +} + +// FilterOfflines returns only servers which are REPORTED or ONLINE +func FilterOfflines(servers []to.Server) []to.Server { + onlineServers := []to.Server{} + for _, server := range servers { + status := enum.CacheStatusFromString(server.Status) + if status != enum.CacheStatusOnline && status != enum.CacheStatusReported { + continue + } + onlineServers = append(onlineServers, server) + } + return onlineServers +} + +func GetCDNs(servers []to.Server) map[enum.CDNName]struct{} { + cdns := map[enum.CDNName]struct{}{} + for _, server := range servers { + cdns[enum.CDNName(server.CDNName)] = struct{}{} + } + return cdns +} + +func GetCRConfigs(cdns map[enum.CDNName]struct{}, toClient *to.Session) map[enum.CDNName]CRConfigOrError { + crConfigs := map[enum.CDNName]CRConfigOrError{} + for cdn, _ := range cdns { + crConfigBytes, err := toClient.CRConfigRaw(string(cdn)) + if err != nil { + crConfigs[cdn] = CRConfigOrError{Err: fmt.Errorf("getting CRConfig: %v", err)} + continue + } + + crConfig := crconfig.CRConfig{} + if err := json.Unmarshal(crConfigBytes, &crConfig); err != nil { + crConfigs[cdn] = CRConfigOrError{Err: fmt.Errorf("unmarshalling CRConfig JSON: %v", err)} + } + + crConfigs[cdn] = CRConfigOrError{CRConfig: &crConfig} + } + return crConfigs +} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go index 92b17c7..07eee78 100644 --- a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go +++ b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go @@ -117,7 +117,7 @@ func main() { } } - go tmcheck.Validator(*tmURI, toClient, *interval, *grace, onErr, onResumeSuccess, onCheck) + go tmcheck.CRStatesOfflineValidator(*tmURI, toClient, *interval, *grace, onErr, onResumeSuccess, onCheck) if err := serve(log, *toURI, *tmURI); err != nil { fmt.Printf("Serve error: %v\n", err) @@ -145,8 +145,8 @@ func serve(log Log, toURI string, tmURI string) error { fmt.Fprintf(w, `<pre>`) logCopy := log.Get() - for i := len(logCopy) - 1; i >= 0; i-- { - fmt.Fprintf(w, "%s\n", logCopy[i]) + for _, msg := range logCopy { + fmt.Fprintf(w, "%s\n", msg) } fmt.Fprintf(w, `</pre>`)
