Add TM2 validator for peer pollers

Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/c9196a12
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/c9196a12
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/c9196a12

Branch: refs/heads/master
Commit: c9196a12e57c381b8ba6de29e2aa4009a27a1f14
Parents: 4658c51
Author: Robert Butts <[email protected]>
Authored: Fri Mar 3 15:39:10 2017 -0700
Committer: Dave Neuman <[email protected]>
Committed: Wed Apr 12 15:43:31 2017 -0600

----------------------------------------------------------------------
 .../traffic_monitor/tmcheck/peerpoller.go       | 112 ++++++++++
 .../traffic_monitor/tmcheck/tmcheck.go          |  69 +++++-
 .../tools/nagios-validate-peerpoller.go         |  36 +++
 .../tools/service-validate-offline.go           | 204 -----------------
 .../traffic_monitor/tools/validator-service.go  | 222 +++++++++++++++++++
 5 files changed, 432 insertions(+), 211 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go 
b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
new file mode 100644
index 0000000..cdeaf36
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package tmcheck
+
+import (
+       "encoding/json"
+       "fmt"
+       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+       to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+       "io/ioutil"
+       "time"
+)
+
+const PeerPollMax = time.Duration(10) * time.Second
+
+const TrafficMonitorStatsPath = "/publish/Stats"
+
+// TrafficMonitorStatsJSON represents the JSON returned by Traffic Monitor's 
Stats endpoint. This currently only contains the Oldest Polled Peer Time 
member, as needed by this library.
+type TrafficMonitorStatsJSON struct {
+       Stats TrafficMonitorStats `json:"stats"`
+}
+
+// TrafficMonitorStats represents the internal JSON object returned by Traffic 
Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer 
Time member, as needed by this library.
+type TrafficMonitorStats struct {
+       OldestPolledPeerTime int `json:"Oldest Polled Peer Time (ms)"`
+}
+
+func GetOldestPolledPeerTime(uri string) (time.Duration, error) {
+       resp, err := getClient().Get(uri + TrafficMonitorStatsPath)
+       if err != nil {
+               return time.Duration(0), fmt.Errorf("reading reply from %v: 
%v\n", uri, err)
+       }
+       respBytes, err := ioutil.ReadAll(resp.Body)
+       if err != nil {
+               return time.Duration(0), fmt.Errorf("reading reply from %v: 
%v\n", uri, err)
+       }
+
+       stats := TrafficMonitorStatsJSON{}
+       if err := json.Unmarshal(respBytes, &stats); err != nil {
+               return time.Duration(0), fmt.Errorf("unmarshalling: %v", err)
+       }
+
+       oldestPolledPeerTime := time.Duration(stats.Stats.OldestPolledPeerTime) 
* time.Millisecond
+
+       return oldestPolledPeerTime, nil
+}
+
+func ValidatePeerPoller(uri string) error {
+       lastPollTime, err := GetOldestPolledPeerTime(uri)
+       if err != nil {
+               return fmt.Errorf("failed to get oldest peer time: %v", err)
+       }
+       if lastPollTime > PeerPollMax {
+               return fmt.Errorf("Peer poller is dead, last poll was %v ago", 
lastPollTime)
+       }
+       return nil
+}
+
+func ValidateAllPeerPollers(toClient *to.Session, includeOffline bool) 
(map[enum.TrafficMonitorName]error, error) {
+       servers, err := GetMonitors(toClient, includeOffline)
+       if err != nil {
+               return nil, err
+       }
+       errs := map[enum.TrafficMonitorName]error{}
+       for _, server := range servers {
+               uri := fmt.Sprintf("http://%s.%s";, server.HostName, 
server.DomainName)
+               errs[enum.TrafficMonitorName(server.HostName)] = 
ValidatePeerPoller(uri)
+       }
+       return errs, nil
+}
+
+func PeerPollersValidator(
+       tmURI string,
+       toClient *to.Session,
+       interval time.Duration,
+       grace time.Duration,
+       onErr func(error),
+       onResumeSuccess func(),
+       onCheck func(error),
+) {
+       wrapValidatePeerPoller := func(uri string, _ *to.Session) error { 
return ValidatePeerPoller(uri) }
+       Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, 
onCheck, wrapValidatePeerPoller)
+}
+
+func PeerPollersAllValidator(
+       toClient *to.Session,
+       interval time.Duration,
+       includeOffline bool,
+       grace time.Duration,
+       onErr func(enum.TrafficMonitorName, error),
+       onResumeSuccess func(enum.TrafficMonitorName),
+       onCheck func(enum.TrafficMonitorName, error),
+) {
+       AllValidator(toClient, interval, includeOffline, grace, onErr, 
onResumeSuccess, onCheck, ValidateAllPeerPollers)
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go 
b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
index 819c1ab..a6e12ba 100644
--- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
@@ -142,8 +142,28 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig 
*crconfig.CRConfig) erro
        return nil
 }
 
+type ValidatorFunc func(
+       tmURI string,
+       toClient *to.Session,
+       interval time.Duration,
+       grace time.Duration,
+       onErr func(error),
+       onResumeSuccess func(),
+       onCheck func(error),
+)
+
+type AllValidatorFunc func(
+       toClient *to.Session,
+       interval time.Duration,
+       includeOffline bool,
+       grace time.Duration,
+       onErr func(enum.TrafficMonitorName, error),
+       onResumeSuccess func(enum.TrafficMonitorName),
+       onCheck func(enum.TrafficMonitorName, error),
+)
+
 // CRStatesOfflineValidator is designed to be run as a goroutine, and does not 
return. It continously validates every `interval`, and calls `onErr` on 
failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
-func CRStatesOfflineValidator(
+func Validator(
        tmURI string,
        toClient *to.Session,
        interval time.Duration,
@@ -151,11 +171,12 @@ func CRStatesOfflineValidator(
        onErr func(error),
        onResumeSuccess func(),
        onCheck func(error),
+       validator func(tmURI string, toClient *to.Session) error,
 ) {
        invalid := false
        invalidStart := time.Time{}
        for {
-               err := ValidateOfflineStates(tmURI, toClient)
+               err := validator(tmURI, toClient)
 
                if err != nil && !invalid {
                        invalid = true
@@ -180,14 +201,26 @@ func CRStatesOfflineValidator(
        }
 }
 
+// CRStatesOfflineValidator is designed to be run as a goroutine, and does not 
return. It continously validates every `interval`, and calls `onErr` on 
failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
+func CRStatesOfflineValidator(
+       tmURI string,
+       toClient *to.Session,
+       interval time.Duration,
+       grace time.Duration,
+       onErr func(error),
+       onResumeSuccess func(),
+       onCheck func(error),
+) {
+       Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, 
onCheck, ValidateOfflineStates)
+}
+
 // CRConfigOrError contains a CRConfig or an error. Union types? Monads? What 
are those?
 type CRConfigOrError struct {
        CRConfig *crconfig.CRConfig
        Err      error
 }
 
-// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the 
given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's 
CRStates.
-func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline 
bool) (map[enum.TrafficMonitorName]error, error) {
+func GetMonitors(toClient *to.Session, includeOffline bool) ([]to.Server, 
error) {
        trafficMonitorType := "RASCAL"
        monitorTypeQuery := map[string][]string{"type": 
[]string{trafficMonitorType}}
        servers, err := toClient.ServersByType(monitorTypeQuery)
@@ -198,6 +231,15 @@ func ValidateAllMonitorsOfflineStates(toClient 
*to.Session, includeOffline bool)
        if !includeOffline {
                servers = FilterOfflines(servers)
        }
+       return servers, nil
+}
+
+// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the 
given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's 
CRStates.
+func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline 
bool) (map[enum.TrafficMonitorName]error, error) {
+       servers, err := GetMonitors(toClient, includeOffline)
+       if err != nil {
+               return nil, err
+       }
 
        crConfigs := GetCRConfigs(GetCDNs(servers), toClient)
 
@@ -215,8 +257,7 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, 
includeOffline bool)
        return errs, nil
 }
 
-// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, 
and does not return. It continously validates every `interval`, and calls 
`onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on 
every poll. Note the error passed to `onErr` may be a general validation error 
not associated with any monitor, in which case the passed 
`enum.TrafficMonitorName` will be empty.
-func AllMonitorsCRStatesOfflineValidator(
+func AllValidator(
        toClient *to.Session,
        interval time.Duration,
        includeOffline bool,
@@ -224,12 +265,13 @@ func AllMonitorsCRStatesOfflineValidator(
        onErr func(enum.TrafficMonitorName, error),
        onResumeSuccess func(enum.TrafficMonitorName),
        onCheck func(enum.TrafficMonitorName, error),
+       validator func(toClient *to.Session, includeOffline bool) 
(map[enum.TrafficMonitorName]error, error),
 ) {
        invalid := map[enum.TrafficMonitorName]bool{}
        invalidStart := map[enum.TrafficMonitorName]time.Time{}
        metaFail := false
        for {
-               tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, 
includeOffline)
+               tmErrs, err := validator(toClient, includeOffline)
                if err != nil {
                        onErr("", fmt.Errorf("Error validating monitors: %v", 
err))
                        time.Sleep(interval)
@@ -266,6 +308,19 @@ func AllMonitorsCRStatesOfflineValidator(
        }
 }
 
+// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, 
and does not return. It continously validates every `interval`, and calls 
`onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on 
every poll. Note the error passed to `onErr` may be a general validation error 
not associated with any monitor, in which case the passed 
`enum.TrafficMonitorName` will be empty.
+func AllMonitorsCRStatesOfflineValidator(
+       toClient *to.Session,
+       interval time.Duration,
+       includeOffline bool,
+       grace time.Duration,
+       onErr func(enum.TrafficMonitorName, error),
+       onResumeSuccess func(enum.TrafficMonitorName),
+       onCheck func(enum.TrafficMonitorName, error),
+) {
+       AllValidator(toClient, interval, includeOffline, grace, onErr, 
onResumeSuccess, onCheck, ValidateAllMonitorsOfflineStates)
+}
+
 // FilterOfflines returns only servers which are REPORTED or ONLINE
 func FilterOfflines(servers []to.Server) []to.Server {
        onlineServers := []to.Server{}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
----------------------------------------------------------------------
diff --git 
a/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go 
b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
new file mode 100644
index 0000000..5e9c9fd
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
@@ -0,0 +1,36 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/common/nagios"
+       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+)
+
+const UserAgent = "tm-peerpoller-validator/0.1"
+
+func main() {
+       tmURI := flag.String("tm", "", "The Traffic Monitor URI, whose Peer 
Poller to validate")
+       // toUser := flag.String("touser", "", "The Traffic Ops user")
+       // toPass := flag.String("topass", "", "The Traffic Ops password")
+       // includeOffline := flag.Bool("includeOffline", false, "Whether to 
include Offline Monitors")
+       help := flag.Bool("help", false, "Usage info")
+       helpBrief := flag.Bool("h", false, "Usage info")
+       flag.Parse()
+       if *help || *helpBrief {
+               fmt.Printf("Usage: ./nagios-validate-peerpoller -to 
https://traffic-ops.example.net -touser bill -topass thelizard -includeOffline 
true\n")
+               return
+       }
+
+       // toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, 
UserAgent, false, tmcheck.RequestTimeout)
+       // if err != nil {
+       //      fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+       //      return
+       // }
+
+       err := tmcheck.ValidatePeerPoller(*tmURI)
+       if err != nil {
+               nagios.Exit(nagios.Critical, fmt.Sprintf("Error validating 
monitor peer poller: %v", err))
+       }
+       nagios.Exit(nagios.Ok, "")
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
----------------------------------------------------------------------
diff --git 
a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go 
b/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
deleted file mode 100644
index 41e0eb4..0000000
--- a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// validate-offline is a utility HTTP service which polls the given Traffic 
Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops 
CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
-
-package main
-
-import (
-       "flag"
-       "fmt"
-       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
-       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
-       to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
-       "net/http"
-       "sort"
-       "sync"
-       "time"
-)
-
-const UserAgent = "tm-offline-validator/0.1"
-
-const LogLimit = 10
-
-type Log struct {
-       log       *[]string
-       limit     int
-       errored   *bool
-       lastCheck *time.Time
-       m         *sync.RWMutex
-}
-
-func (l *Log) Add(msg string) {
-       l.m.Lock()
-       defer l.m.Unlock()
-       *l.log = append([]string{msg}, *l.log...)
-       if len(*l.log) > l.limit {
-               *l.log = (*l.log)[:l.limit]
-       }
-}
-
-func (l *Log) Get() []string {
-       l.m.RLock()
-       defer l.m.RUnlock()
-       return *l.log
-}
-
-func (l *Log) GetErrored() (bool, time.Time) {
-       l.m.RLock()
-       defer l.m.RUnlock()
-       return *l.errored, *l.lastCheck
-}
-
-func (l *Log) SetErrored(e bool) {
-       l.m.Lock()
-       defer l.m.Unlock()
-       *l.errored = e
-       *l.lastCheck = time.Now()
-}
-
-func NewLog() Log {
-       log := make([]string, 0, LogLimit+1)
-       errored := false
-       limit := LogLimit
-       lastCheck := time.Time{}
-       return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: 
limit, lastCheck: &lastCheck}
-}
-
-type Logs struct {
-       logs map[enum.TrafficMonitorName]Log
-       m    *sync.RWMutex
-}
-
-func NewLogs() Logs {
-       return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
-}
-
-func (l Logs) Get(name enum.TrafficMonitorName) Log {
-       l.m.Lock()
-       defer l.m.Unlock()
-       if _, ok := l.logs[name]; !ok {
-               l.logs[name] = NewLog()
-       }
-       return l.logs[name]
-}
-
-func (l Logs) GetMonitors() []string {
-       l.m.RLock()
-       defer l.m.RUnlock()
-       monitors := []string{}
-       for name, _ := range l.logs {
-               monitors = append(monitors, string(name))
-       }
-       return monitors
-}
-
-func main() {
-       toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to 
validate")
-       toUser := flag.String("touser", "", "The Traffic Ops user")
-       toPass := flag.String("topass", "", "The Traffic Ops password")
-       interval := flag.Duration("interval", time.Second*time.Duration(5), 
"The interval to validate")
-       grace := flag.Duration("grace", time.Second*time.Duration(30), "The 
grace period before invalid states are reported")
-       includeOffline := flag.Bool("includeOffline", false, "Whether to 
include Offline Monitors")
-       help := flag.Bool("help", false, "Usage info")
-       helpBrief := flag.Bool("h", false, "Usage info")
-       flag.Parse()
-       if *help || *helpBrief {
-               fmt.Printf("Usage: go run validate-offline -to 
https://traffic-ops.example.net -touser bill -topass thelizard -tm 
http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline 
true\n")
-               return
-       }
-
-       toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, 
UserAgent, false, tmcheck.RequestTimeout)
-       if err != nil {
-               fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
-               return
-       }
-
-       logs := NewLogs()
-
-       onErr := func(name enum.TrafficMonitorName, err error) {
-               log := logs.Get(name)
-               log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
-               log.SetErrored(true)
-       }
-
-       onResumeSuccess := func(name enum.TrafficMonitorName) {
-               log := logs.Get(name)
-               log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
-               log.SetErrored(false)
-       }
-
-       onCheck := func(name enum.TrafficMonitorName, err error) {
-               log := logs.Get(name)
-               log.SetErrored(err != nil)
-       }
-
-       go tmcheck.AllMonitorsCRStatesOfflineValidator(toClient, *interval, 
*includeOffline, *grace, onErr, onResumeSuccess, onCheck)
-
-       if err := serve(logs, *toURI); err != nil {
-               fmt.Printf("Serve error: %v\n", err)
-       }
-}
-
-func serve(logs Logs, toURI string) error {
-       http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-               w.Header().Set("Access-Control-Allow-Origin", "*")
-               w.Header().Set("Content-Type", "text/html")
-               fmt.Fprintf(w, `<!DOCTYPE html>
-<meta http-equiv="refresh" content="5">
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>Traffic Monitor Offline Validator</title>
-<style type="text/css">body{margin:40px 
auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 
8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
-
-               fmt.Fprintf(w, `<p>%s`, toURI)
-
-               fmt.Fprintf(w, `<table style="width:100%%">`)
-
-               monitors := logs.GetMonitors()
-               sort.Strings(monitors) // sort, so they're always in the same 
order in the webpage
-               for _, monitor := range monitors {
-                       fmt.Fprintf(w, `</tr>`)
-
-                       log := logs.Get(enum.TrafficMonitorName(monitor))
-
-                       fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
-                       errored, lastCheck := log.GetErrored()
-                       if errored {
-                               fmt.Fprintf(w, `<td><span 
style="color:red">Invalid</span></td>`)
-                       } else {
-                               fmt.Fprintf(w, `<td><span 
style="color:limegreen">Valid</span></td>`)
-                       }
-                       fmt.Fprintf(w, `<td><span>as of %v</span></td>`, 
lastCheck)
-
-                       fmt.Fprintf(w, `<td><span 
style="font-family:monospace">`)
-                       logCopy := log.Get()
-                       firstMsg := ""
-                       if len(logCopy) > 0 {
-                               firstMsg = logCopy[0]
-                       }
-                       fmt.Fprintf(w, "%s\n", firstMsg)
-                       fmt.Fprintf(w, `</span></td>`)
-
-                       fmt.Fprintf(w, `</tr>`)
-               }
-               fmt.Fprintf(w, `</table>`)
-       })
-       return http.ListenAndServe(":80", nil)
-}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go 
b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
new file mode 100644
index 0000000..0b551c8
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// validate-offline is a utility HTTP service which polls the given Traffic 
Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops 
CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
+
+package main
+
+import (
+       "flag"
+       "fmt"
+       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+       
"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+       to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+       "io"
+       "net/http"
+       "sort"
+       "sync"
+       "time"
+)
+
+const UserAgent = "tm-offline-validator/0.1"
+
+const LogLimit = 10
+
+type Log struct {
+       log       *[]string
+       limit     int
+       errored   *bool
+       lastCheck *time.Time
+       m         *sync.RWMutex
+}
+
+func (l *Log) Add(msg string) {
+       l.m.Lock()
+       defer l.m.Unlock()
+       *l.log = append([]string{msg}, *l.log...)
+       if len(*l.log) > l.limit {
+               *l.log = (*l.log)[:l.limit]
+       }
+}
+
+func (l *Log) Get() []string {
+       l.m.RLock()
+       defer l.m.RUnlock()
+       return *l.log
+}
+
+func (l *Log) GetErrored() (bool, time.Time) {
+       l.m.RLock()
+       defer l.m.RUnlock()
+       return *l.errored, *l.lastCheck
+}
+
+func (l *Log) SetErrored(e bool) {
+       l.m.Lock()
+       defer l.m.Unlock()
+       *l.errored = e
+       *l.lastCheck = time.Now()
+}
+
+func NewLog() Log {
+       log := make([]string, 0, LogLimit+1)
+       errored := false
+       limit := LogLimit
+       lastCheck := time.Time{}
+       return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: 
limit, lastCheck: &lastCheck}
+}
+
+type Logs struct {
+       logs map[enum.TrafficMonitorName]Log
+       m    *sync.RWMutex
+}
+
+func NewLogs() Logs {
+       return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
+}
+
+func (l Logs) Get(name enum.TrafficMonitorName) Log {
+       l.m.Lock()
+       defer l.m.Unlock()
+       if _, ok := l.logs[name]; !ok {
+               l.logs[name] = NewLog()
+       }
+       return l.logs[name]
+}
+
+func (l Logs) GetMonitors() []string {
+       l.m.RLock()
+       defer l.m.RUnlock()
+       monitors := []string{}
+       for name, _ := range l.logs {
+               monitors = append(monitors, string(name))
+       }
+       return monitors
+}
+
+func startValidator(validator tmcheck.AllValidatorFunc, toClient *to.Session, 
interval time.Duration, includeOffline bool, grace time.Duration) Logs {
+       logs := NewLogs()
+
+       onErr := func(name enum.TrafficMonitorName, err error) {
+               log := logs.Get(name)
+               log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
+               log.SetErrored(true)
+       }
+
+       onResumeSuccess := func(name enum.TrafficMonitorName) {
+               log := logs.Get(name)
+               log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
+               log.SetErrored(false)
+       }
+
+       onCheck := func(name enum.TrafficMonitorName, err error) {
+               log := logs.Get(name)
+               log.SetErrored(err != nil)
+       }
+
+       go validator(toClient, interval, includeOffline, grace, onErr, 
onResumeSuccess, onCheck)
+       return logs
+}
+
+func main() {
+       toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to 
validate")
+       toUser := flag.String("touser", "", "The Traffic Ops user")
+       toPass := flag.String("topass", "", "The Traffic Ops password")
+       interval := flag.Duration("interval", time.Second*time.Duration(5), 
"The interval to validate")
+       grace := flag.Duration("grace", time.Second*time.Duration(30), "The 
grace period before invalid states are reported")
+       includeOffline := flag.Bool("includeOffline", false, "Whether to 
include Offline Monitors")
+       help := flag.Bool("help", false, "Usage info")
+       helpBrief := flag.Bool("h", false, "Usage info")
+       flag.Parse()
+       if *help || *helpBrief {
+               fmt.Printf("Usage: go run validate-offline -to 
https://traffic-ops.example.net -touser bill -topass thelizard -tm 
http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline 
true\n")
+               return
+       }
+
+       toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, 
UserAgent, false, tmcheck.RequestTimeout)
+       if err != nil {
+               fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+               return
+       }
+
+       crStatesOfflineLogs := 
startValidator(tmcheck.AllMonitorsCRStatesOfflineValidator, toClient, 
*interval, *includeOffline, *grace)
+       peerPollerLogs := startValidator(tmcheck.PeerPollersAllValidator, 
toClient, *interval, *includeOffline, *grace)
+
+       if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs); err != 
nil {
+               fmt.Printf("Serve error: %v\n", err)
+       }
+}
+
+func printLogs(logs Logs, w io.Writer) {
+       fmt.Fprintf(w, `<table style="width:100%%">`)
+
+       monitors := logs.GetMonitors()
+       sort.Strings(monitors) // sort, so they're always in the same order in 
the webpage
+       for _, monitor := range monitors {
+               fmt.Fprintf(w, `</tr>`)
+
+               log := logs.Get(enum.TrafficMonitorName(monitor))
+
+               fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
+               errored, lastCheck := log.GetErrored()
+               if errored {
+                       fmt.Fprintf(w, `<td><span 
style="color:red">Invalid</span></td>`)
+               } else {
+                       fmt.Fprintf(w, `<td><span 
style="color:limegreen">Valid</span></td>`)
+               }
+               fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck)
+
+               fmt.Fprintf(w, `<td><span style="font-family:monospace">`)
+               logCopy := log.Get()
+               firstMsg := ""
+               if len(logCopy) > 0 {
+                       firstMsg = logCopy[0]
+               }
+               fmt.Fprintf(w, "%s\n", firstMsg)
+               fmt.Fprintf(w, `</span></td>`)
+
+               fmt.Fprintf(w, `</tr>`)
+       }
+       fmt.Fprintf(w, `</table>`)
+}
+
+func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs) error {
+       http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+               w.Header().Set("Access-Control-Allow-Origin", "*")
+               w.Header().Set("Content-Type", "text/html")
+               fmt.Fprintf(w, `<!DOCTYPE html>
+<meta http-equiv="refresh" content="5">
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Traffic Monitor Offline Validator</title>
+<style type="text/css">body{margin:40px 
auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 
8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
+
+               fmt.Fprintf(w, `<h1>Traffic Monitor Validator</h1>`)
+
+               fmt.Fprintf(w, `<p>%s`, toURI)
+
+               fmt.Fprintf(w, `<h2>CRStates Offline</h2>`)
+               printLogs(crStatesOfflineLogs, w)
+
+               fmt.Fprintf(w, `<h2>Peer Poller</h2>`)
+               printLogs(peerPollerLogs, w)
+
+       })
+       return http.ListenAndServe(":80", nil)
+}

Reply via email to