Add TM2 validator for peer pollers
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/c9196a12 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/c9196a12 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/c9196a12 Branch: refs/heads/master Commit: c9196a12e57c381b8ba6de29e2aa4009a27a1f14 Parents: 4658c51 Author: Robert Butts <[email protected]> Authored: Fri Mar 3 15:39:10 2017 -0700 Committer: Dave Neuman <[email protected]> Committed: Wed Apr 12 15:43:31 2017 -0600 ---------------------------------------------------------------------- .../traffic_monitor/tmcheck/peerpoller.go | 112 ++++++++++ .../traffic_monitor/tmcheck/tmcheck.go | 69 +++++- .../tools/nagios-validate-peerpoller.go | 36 +++ .../tools/service-validate-offline.go | 204 ----------------- .../traffic_monitor/tools/validator-service.go | 222 +++++++++++++++++++ 5 files changed, 432 insertions(+), 211 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go new file mode 100644 index 0000000..cdeaf36 --- /dev/null +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package tmcheck + +import ( + "encoding/json" + "fmt" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" + to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" + "io/ioutil" + "time" +) + +const PeerPollMax = time.Duration(10) * time.Second + +const TrafficMonitorStatsPath = "/publish/Stats" + +// TrafficMonitorStatsJSON represents the JSON returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library. +type TrafficMonitorStatsJSON struct { + Stats TrafficMonitorStats `json:"stats"` +} + +// TrafficMonitorStats represents the internal JSON object returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library. +type TrafficMonitorStats struct { + OldestPolledPeerTime int `json:"Oldest Polled Peer Time (ms)"` +} + +func GetOldestPolledPeerTime(uri string) (time.Duration, error) { + resp, err := getClient().Get(uri + TrafficMonitorStatsPath) + if err != nil { + return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err) + } + respBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err) + } + + stats := TrafficMonitorStatsJSON{} + if err := json.Unmarshal(respBytes, &stats); err != nil { + return time.Duration(0), fmt.Errorf("unmarshalling: %v", err) + } + + oldestPolledPeerTime := time.Duration(stats.Stats.OldestPolledPeerTime) * time.Millisecond + + return oldestPolledPeerTime, nil +} + +func ValidatePeerPoller(uri string) error { + lastPollTime, err := GetOldestPolledPeerTime(uri) + if err != nil { + return fmt.Errorf("failed to get oldest peer time: %v", err) + } + if lastPollTime > PeerPollMax { + return fmt.Errorf("Peer poller is dead, last poll was %v ago", lastPollTime) + } + return nil +} + +func ValidateAllPeerPollers(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) { + servers, err := GetMonitors(toClient, includeOffline) + if err != nil { + return nil, err + } + errs := map[enum.TrafficMonitorName]error{} + for _, server := range servers { + uri := fmt.Sprintf("http://%s.%s", server.HostName, server.DomainName) + errs[enum.TrafficMonitorName(server.HostName)] = ValidatePeerPoller(uri) + } + return errs, nil +} + +func PeerPollersValidator( + tmURI string, + toClient *to.Session, + interval time.Duration, + grace time.Duration, + onErr func(error), + onResumeSuccess func(), + onCheck func(error), +) { + wrapValidatePeerPoller := func(uri string, _ *to.Session) error { return ValidatePeerPoller(uri) } + Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, wrapValidatePeerPoller) +} + +func PeerPollersAllValidator( + toClient *to.Session, + interval time.Duration, + includeOffline bool, + grace time.Duration, + onErr func(enum.TrafficMonitorName, error), + onResumeSuccess func(enum.TrafficMonitorName), + onCheck func(enum.TrafficMonitorName, error), +) { + AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllPeerPollers) +} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go index 819c1ab..a6e12ba 100644 --- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go @@ -142,8 +142,28 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) erro return nil } +type ValidatorFunc func( + tmURI string, + toClient *to.Session, + interval time.Duration, + grace time.Duration, + onErr func(error), + onResumeSuccess func(), + onCheck func(error), +) + +type AllValidatorFunc func( + toClient *to.Session, + interval time.Duration, + includeOffline bool, + grace time.Duration, + onErr func(enum.TrafficMonitorName, error), + onResumeSuccess func(enum.TrafficMonitorName), + onCheck func(enum.TrafficMonitorName, error), +) + // CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. -func CRStatesOfflineValidator( +func Validator( tmURI string, toClient *to.Session, interval time.Duration, @@ -151,11 +171,12 @@ func CRStatesOfflineValidator( onErr func(error), onResumeSuccess func(), onCheck func(error), + validator func(tmURI string, toClient *to.Session) error, ) { invalid := false invalidStart := time.Time{} for { - err := ValidateOfflineStates(tmURI, toClient) + err := validator(tmURI, toClient) if err != nil && !invalid { invalid = true @@ -180,14 +201,26 @@ func CRStatesOfflineValidator( } } +// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. +func CRStatesOfflineValidator( + tmURI string, + toClient *to.Session, + interval time.Duration, + grace time.Duration, + onErr func(error), + onResumeSuccess func(), + onCheck func(error), +) { + Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, ValidateOfflineStates) +} + // CRConfigOrError contains a CRConfig or an error. Union types? Monads? What are those? type CRConfigOrError struct { CRConfig *crconfig.CRConfig Err error } -// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates. -func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) { +func GetMonitors(toClient *to.Session, includeOffline bool) ([]to.Server, error) { trafficMonitorType := "RASCAL" monitorTypeQuery := map[string][]string{"type": []string{trafficMonitorType}} servers, err := toClient.ServersByType(monitorTypeQuery) @@ -198,6 +231,15 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) if !includeOffline { servers = FilterOfflines(servers) } + return servers, nil +} + +// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates. +func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) { + servers, err := GetMonitors(toClient, includeOffline) + if err != nil { + return nil, err + } crConfigs := GetCRConfigs(GetCDNs(servers), toClient) @@ -215,8 +257,7 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) return errs, nil } -// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty. -func AllMonitorsCRStatesOfflineValidator( +func AllValidator( toClient *to.Session, interval time.Duration, includeOffline bool, @@ -224,12 +265,13 @@ func AllMonitorsCRStatesOfflineValidator( onErr func(enum.TrafficMonitorName, error), onResumeSuccess func(enum.TrafficMonitorName), onCheck func(enum.TrafficMonitorName, error), + validator func(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error), ) { invalid := map[enum.TrafficMonitorName]bool{} invalidStart := map[enum.TrafficMonitorName]time.Time{} metaFail := false for { - tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, includeOffline) + tmErrs, err := validator(toClient, includeOffline) if err != nil { onErr("", fmt.Errorf("Error validating monitors: %v", err)) time.Sleep(interval) @@ -266,6 +308,19 @@ func AllMonitorsCRStatesOfflineValidator( } } +// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty. +func AllMonitorsCRStatesOfflineValidator( + toClient *to.Session, + interval time.Duration, + includeOffline bool, + grace time.Duration, + onErr func(enum.TrafficMonitorName, error), + onResumeSuccess func(enum.TrafficMonitorName), + onCheck func(enum.TrafficMonitorName, error), +) { + AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllMonitorsOfflineStates) +} + // FilterOfflines returns only servers which are REPORTED or ONLINE func FilterOfflines(servers []to.Server) []to.Server { onlineServers := []to.Server{} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go new file mode 100644 index 0000000..5e9c9fd --- /dev/null +++ b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go @@ -0,0 +1,36 @@ +package main + +import ( + "flag" + "fmt" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/common/nagios" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck" +) + +const UserAgent = "tm-peerpoller-validator/0.1" + +func main() { + tmURI := flag.String("tm", "", "The Traffic Monitor URI, whose Peer Poller to validate") + // toUser := flag.String("touser", "", "The Traffic Ops user") + // toPass := flag.String("topass", "", "The Traffic Ops password") + // includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors") + help := flag.Bool("help", false, "Usage info") + helpBrief := flag.Bool("h", false, "Usage info") + flag.Parse() + if *help || *helpBrief { + fmt.Printf("Usage: ./nagios-validate-peerpoller -to https://traffic-ops.example.net -touser bill -topass thelizard -includeOffline true\n") + return + } + + // toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout) + // if err != nil { + // fmt.Printf("Error logging in to Traffic Ops: %v\n", err) + // return + // } + + err := tmcheck.ValidatePeerPoller(*tmURI) + if err != nil { + nagios.Exit(nagios.Critical, fmt.Sprintf("Error validating monitor peer poller: %v", err)) + } + nagios.Exit(nagios.Ok, "") +} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go b/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go deleted file mode 100644 index 41e0eb4..0000000 --- a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint. - -package main - -import ( - "flag" - "fmt" - "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" - "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck" - to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" - "net/http" - "sort" - "sync" - "time" -) - -const UserAgent = "tm-offline-validator/0.1" - -const LogLimit = 10 - -type Log struct { - log *[]string - limit int - errored *bool - lastCheck *time.Time - m *sync.RWMutex -} - -func (l *Log) Add(msg string) { - l.m.Lock() - defer l.m.Unlock() - *l.log = append([]string{msg}, *l.log...) - if len(*l.log) > l.limit { - *l.log = (*l.log)[:l.limit] - } -} - -func (l *Log) Get() []string { - l.m.RLock() - defer l.m.RUnlock() - return *l.log -} - -func (l *Log) GetErrored() (bool, time.Time) { - l.m.RLock() - defer l.m.RUnlock() - return *l.errored, *l.lastCheck -} - -func (l *Log) SetErrored(e bool) { - l.m.Lock() - defer l.m.Unlock() - *l.errored = e - *l.lastCheck = time.Now() -} - -func NewLog() Log { - log := make([]string, 0, LogLimit+1) - errored := false - limit := LogLimit - lastCheck := time.Time{} - return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck} -} - -type Logs struct { - logs map[enum.TrafficMonitorName]Log - m *sync.RWMutex -} - -func NewLogs() Logs { - return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}} -} - -func (l Logs) Get(name enum.TrafficMonitorName) Log { - l.m.Lock() - defer l.m.Unlock() - if _, ok := l.logs[name]; !ok { - l.logs[name] = NewLog() - } - return l.logs[name] -} - -func (l Logs) GetMonitors() []string { - l.m.RLock() - defer l.m.RUnlock() - monitors := []string{} - for name, _ := range l.logs { - monitors = append(monitors, string(name)) - } - return monitors -} - -func main() { - toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate") - toUser := flag.String("touser", "", "The Traffic Ops user") - toPass := flag.String("topass", "", "The Traffic Ops password") - interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate") - grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported") - includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors") - help := flag.Bool("help", false, "Usage info") - helpBrief := flag.Bool("h", false, "Usage info") - flag.Parse() - if *help || *helpBrief { - fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n") - return - } - - toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout) - if err != nil { - fmt.Printf("Error logging in to Traffic Ops: %v\n", err) - return - } - - logs := NewLogs() - - onErr := func(name enum.TrafficMonitorName, err error) { - log := logs.Get(name) - log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err)) - log.SetErrored(true) - } - - onResumeSuccess := func(name enum.TrafficMonitorName) { - log := logs.Get(name) - log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now())) - log.SetErrored(false) - } - - onCheck := func(name enum.TrafficMonitorName, err error) { - log := logs.Get(name) - log.SetErrored(err != nil) - } - - go tmcheck.AllMonitorsCRStatesOfflineValidator(toClient, *interval, *includeOffline, *grace, onErr, onResumeSuccess, onCheck) - - if err := serve(logs, *toURI); err != nil { - fmt.Printf("Serve error: %v\n", err) - } -} - -func serve(logs Logs, toURI string) error { - http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Access-Control-Allow-Origin", "*") - w.Header().Set("Content-Type", "text/html") - fmt.Fprintf(w, `<!DOCTYPE html> -<meta http-equiv="refresh" content="5"> -<meta charset="utf-8"> -<meta name="viewport" content="width=device-width, initial-scale=1"> -<title>Traffic Monitor Offline Validator</title> -<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`) - - fmt.Fprintf(w, `<p>%s`, toURI) - - fmt.Fprintf(w, `<table style="width:100%%">`) - - monitors := logs.GetMonitors() - sort.Strings(monitors) // sort, so they're always in the same order in the webpage - for _, monitor := range monitors { - fmt.Fprintf(w, `</tr>`) - - log := logs.Get(enum.TrafficMonitorName(monitor)) - - fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor) - errored, lastCheck := log.GetErrored() - if errored { - fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`) - } else { - fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`) - } - fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck) - - fmt.Fprintf(w, `<td><span style="font-family:monospace">`) - logCopy := log.Get() - firstMsg := "" - if len(logCopy) > 0 { - firstMsg = logCopy[0] - } - fmt.Fprintf(w, "%s\n", firstMsg) - fmt.Fprintf(w, `</span></td>`) - - fmt.Fprintf(w, `</tr>`) - } - fmt.Fprintf(w, `</table>`) - }) - return http.ListenAndServe(":80", nil) -} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/validator-service.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go new file mode 100644 index 0000000..0b551c8 --- /dev/null +++ b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint. + +package main + +import ( + "flag" + "fmt" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck" + to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" + "io" + "net/http" + "sort" + "sync" + "time" +) + +const UserAgent = "tm-offline-validator/0.1" + +const LogLimit = 10 + +type Log struct { + log *[]string + limit int + errored *bool + lastCheck *time.Time + m *sync.RWMutex +} + +func (l *Log) Add(msg string) { + l.m.Lock() + defer l.m.Unlock() + *l.log = append([]string{msg}, *l.log...) + if len(*l.log) > l.limit { + *l.log = (*l.log)[:l.limit] + } +} + +func (l *Log) Get() []string { + l.m.RLock() + defer l.m.RUnlock() + return *l.log +} + +func (l *Log) GetErrored() (bool, time.Time) { + l.m.RLock() + defer l.m.RUnlock() + return *l.errored, *l.lastCheck +} + +func (l *Log) SetErrored(e bool) { + l.m.Lock() + defer l.m.Unlock() + *l.errored = e + *l.lastCheck = time.Now() +} + +func NewLog() Log { + log := make([]string, 0, LogLimit+1) + errored := false + limit := LogLimit + lastCheck := time.Time{} + return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck} +} + +type Logs struct { + logs map[enum.TrafficMonitorName]Log + m *sync.RWMutex +} + +func NewLogs() Logs { + return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}} +} + +func (l Logs) Get(name enum.TrafficMonitorName) Log { + l.m.Lock() + defer l.m.Unlock() + if _, ok := l.logs[name]; !ok { + l.logs[name] = NewLog() + } + return l.logs[name] +} + +func (l Logs) GetMonitors() []string { + l.m.RLock() + defer l.m.RUnlock() + monitors := []string{} + for name, _ := range l.logs { + monitors = append(monitors, string(name)) + } + return monitors +} + +func startValidator(validator tmcheck.AllValidatorFunc, toClient *to.Session, interval time.Duration, includeOffline bool, grace time.Duration) Logs { + logs := NewLogs() + + onErr := func(name enum.TrafficMonitorName, err error) { + log := logs.Get(name) + log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err)) + log.SetErrored(true) + } + + onResumeSuccess := func(name enum.TrafficMonitorName) { + log := logs.Get(name) + log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now())) + log.SetErrored(false) + } + + onCheck := func(name enum.TrafficMonitorName, err error) { + log := logs.Get(name) + log.SetErrored(err != nil) + } + + go validator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck) + return logs +} + +func main() { + toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate") + toUser := flag.String("touser", "", "The Traffic Ops user") + toPass := flag.String("topass", "", "The Traffic Ops password") + interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate") + grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported") + includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors") + help := flag.Bool("help", false, "Usage info") + helpBrief := flag.Bool("h", false, "Usage info") + flag.Parse() + if *help || *helpBrief { + fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n") + return + } + + toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout) + if err != nil { + fmt.Printf("Error logging in to Traffic Ops: %v\n", err) + return + } + + crStatesOfflineLogs := startValidator(tmcheck.AllMonitorsCRStatesOfflineValidator, toClient, *interval, *includeOffline, *grace) + peerPollerLogs := startValidator(tmcheck.PeerPollersAllValidator, toClient, *interval, *includeOffline, *grace) + + if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs); err != nil { + fmt.Printf("Serve error: %v\n", err) + } +} + +func printLogs(logs Logs, w io.Writer) { + fmt.Fprintf(w, `<table style="width:100%%">`) + + monitors := logs.GetMonitors() + sort.Strings(monitors) // sort, so they're always in the same order in the webpage + for _, monitor := range monitors { + fmt.Fprintf(w, `</tr>`) + + log := logs.Get(enum.TrafficMonitorName(monitor)) + + fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor) + errored, lastCheck := log.GetErrored() + if errored { + fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`) + } else { + fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`) + } + fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck) + + fmt.Fprintf(w, `<td><span style="font-family:monospace">`) + logCopy := log.Get() + firstMsg := "" + if len(logCopy) > 0 { + firstMsg = logCopy[0] + } + fmt.Fprintf(w, "%s\n", firstMsg) + fmt.Fprintf(w, `</span></td>`) + + fmt.Fprintf(w, `</tr>`) + } + fmt.Fprintf(w, `</table>`) +} + +func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs) error { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Content-Type", "text/html") + fmt.Fprintf(w, `<!DOCTYPE html> +<meta http-equiv="refresh" content="5"> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<title>Traffic Monitor Offline Validator</title> +<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`) + + fmt.Fprintf(w, `<h1>Traffic Monitor Validator</h1>`) + + fmt.Fprintf(w, `<p>%s`, toURI) + + fmt.Fprintf(w, `<h2>CRStates Offline</h2>`) + printLogs(crStatesOfflineLogs, w) + + fmt.Fprintf(w, `<h2>Peer Poller</h2>`) + printLogs(peerPollerLogs, w) + + }) + return http.ListenAndServe(":80", nil) +}
