Repository: incubator-trafficcontrol Updated Branches: refs/heads/master 33fa72c4a -> 43bb5e0b6
Add TM2 validator for query interval Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/0f564553 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/0f564553 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/0f564553 Branch: refs/heads/master Commit: 0f564553f62acd3a3092ccb4f5809707de7f2f1e Parents: 095def7 Author: Robert Butts <[email protected]> Authored: Thu Mar 30 17:01:53 2017 -0600 Committer: Dave Neuman <[email protected]> Committed: Wed Apr 12 15:43:31 2017 -0600 ---------------------------------------------------------------------- .../traffic_monitor/tmcheck/peerpoller.go | 29 +------ .../traffic_monitor/tmcheck/queryinterval.go | 85 ++++++++++++++++++++ .../traffic_monitor/tmcheck/tmcheck.go | 22 ++++- .../tools/nagios-validate-queryinterval.go | 69 ++++++++++++++++ .../traffic_monitor/tools/validator-service.go | 9 ++- 5 files changed, 185 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/0f564553/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go index cdeaf36..999142b 100644 --- a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go @@ -20,44 +20,21 @@ package tmcheck import ( - "encoding/json" "fmt" "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" - "io/ioutil" "time" ) const PeerPollMax = time.Duration(10) * time.Second -const TrafficMonitorStatsPath = "/publish/Stats" - -// TrafficMonitorStatsJSON represents the JSON returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library. -type TrafficMonitorStatsJSON struct { - Stats TrafficMonitorStats `json:"stats"` -} - -// TrafficMonitorStats represents the internal JSON object returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library. -type TrafficMonitorStats struct { - OldestPolledPeerTime int `json:"Oldest Polled Peer Time (ms)"` -} - func GetOldestPolledPeerTime(uri string) (time.Duration, error) { - resp, err := getClient().Get(uri + TrafficMonitorStatsPath) + stats, err := GetStats(uri + TrafficMonitorStatsPath) if err != nil { - return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err) - } - respBytes, err := ioutil.ReadAll(resp.Body) - if err != nil { - return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err) - } - - stats := TrafficMonitorStatsJSON{} - if err := json.Unmarshal(respBytes, &stats); err != nil { - return time.Duration(0), fmt.Errorf("unmarshalling: %v", err) + return time.Duration(0), fmt.Errorf("getting stats: %v", err) } - oldestPolledPeerTime := time.Duration(stats.Stats.OldestPolledPeerTime) * time.Millisecond + oldestPolledPeerTime := time.Duration(stats.OldestPolledPeerMs) * time.Millisecond return oldestPolledPeerTime, nil } http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/0f564553/traffic_monitor_golang/traffic_monitor/tmcheck/queryinterval.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/queryinterval.go b/traffic_monitor_golang/traffic_monitor/tmcheck/queryinterval.go new file mode 100644 index 0000000..e302214 --- /dev/null +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/queryinterval.go @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package tmcheck + +import ( + "fmt" + "time" + + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" + to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" +) + +const QueryIntervalMax = time.Duration(10) * time.Second + +// ValidateQueryInterval validates the given monitor has an acceptable Query Interval 95th percentile. +func ValidateQueryInterval(tmURI string, toClient *to.Session) error { + stats, err := GetStats(tmURI + TrafficMonitorStatsPath) + if err != nil { + return fmt.Errorf("getting Stats: %v", err) + } + queryInterval := time.Duration(stats.QueryInterval95thPercentile) * time.Millisecond + + if queryInterval > QueryIntervalMax { + return fmt.Errorf("Query Interval 95th Percentile %v greater than max %v", queryInterval, QueryIntervalMax) + } + return nil +} + +// QueryIntervalValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. +func QueryIntervalValidator( + tmURI string, + toClient *to.Session, + interval time.Duration, + grace time.Duration, + onErr func(error), + onResumeSuccess func(), + onCheck func(error), +) { + Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, ValidateQueryInterval) +} + +// AllMonitorsQueryIntervalValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty. +func AllMonitorsQueryIntervalValidator( + toClient *to.Session, + interval time.Duration, + includeOffline bool, + grace time.Duration, + onErr func(enum.TrafficMonitorName, error), + onResumeSuccess func(enum.TrafficMonitorName), + onCheck func(enum.TrafficMonitorName, error), +) { + AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllMonitorsQueryInterval) +} + +// ValidateAllMonitorsQueryInterval validates, for all monitors in the given Traffic Ops, an acceptable query interval 95th percentile. +func ValidateAllMonitorsQueryInterval(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) { + servers, err := GetMonitors(toClient, includeOffline) + if err != nil { + return nil, err + } + + errs := map[enum.TrafficMonitorName]error{} + for _, server := range servers { + uri := fmt.Sprintf("http://%s.%s", server.HostName, server.DomainName) + errs[enum.TrafficMonitorName(server.HostName)] = ValidateQueryInterval(uri, toClient) + } + return errs, nil +} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/0f564553/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go index 1ef2532..db1c182 100644 --- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go +++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go @@ -29,6 +29,7 @@ import ( "time" "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/crconfig" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/datareq" dsdata "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/deliveryservicedata" "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum" "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/peer" @@ -40,6 +41,7 @@ const RequestTimeout = time.Second * time.Duration(30) const TrafficMonitorCRStatesPath = "/publish/CrStates" const TrafficMonitorDSStatsPath = "/publish/DsStats" const TrafficMonitorConfigDocPath = "/publish/ConfigDoc" +const TrafficMonitorStatsPath = "/publish/Stats" func getClient() *http.Client { return &http.Client{ @@ -89,7 +91,7 @@ func GetCRStates(uri string) (*peer.Crstates, error) { return &states, nil } -// GetCRStates gets the CRStates from the given Traffic Monitor. +// GetDSStats gets the DSStats from the given Traffic Monitor. func GetDSStats(uri string) (*dsdata.StatsOld, error) { resp, err := getClient().Get(uri) if err != nil { @@ -107,6 +109,24 @@ func GetDSStats(uri string) (*dsdata.StatsOld, error) { return &dsStats, nil } +// GetStats gets the stats from the given Traffic Monitor. +func GetStats(uri string) (*datareq.Stats, error) { + resp, err := getClient().Get(uri) + if err != nil { + return nil, fmt.Errorf("reading reply from %v: %v\n", uri, err) + } + respBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading reply from %v: %v\n", uri, err) + } + + stats := datareq.JSONStats{} + if err := json.Unmarshal(respBytes, &stats); err != nil { + return nil, fmt.Errorf("unmarshalling: %v", err) + } + return &stats.Stats, nil +} + type ValidatorFunc func( tmURI string, toClient *to.Session, http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/0f564553/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-queryinterval.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-queryinterval.go b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-queryinterval.go new file mode 100644 index 0000000..279ea18 --- /dev/null +++ b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-queryinterval.go @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package main + +import ( + "flag" + "fmt" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/common/nagios" + "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck" + to "github.com/apache/incubator-trafficcontrol/traffic_ops/client" +) + +const UserAgent = "tm-queryinterval-validator/0.1" + +func main() { + toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate") + toUser := flag.String("touser", "", "The Traffic Ops user") + toPass := flag.String("topass", "", "The Traffic Ops password") + includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors") + help := flag.Bool("help", false, "Usage info") + helpBrief := flag.Bool("h", false, "Usage info") + flag.Parse() + if *help || *helpBrief || *toURI == "" { + fmt.Printf("Usage: ./nagios-validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -includeOffline true\n") + return + } + + toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout) + if err != nil { + fmt.Printf("Error logging in to Traffic Ops: %v\n", err) + return + } + + monitorErrs, err := tmcheck.ValidateAllMonitorsQueryInterval(toClient, *includeOffline) + + if err != nil { + nagios.Exit(nagios.Critical, fmt.Sprintf("Error validating monitor offline statuses: %v", err)) + } + + errStr := "" + for monitor, err := range monitorErrs { + if err != nil { + errStr += fmt.Sprintf("error validating offline status for monitor %v : %v\n", monitor, err.Error()) + } + } + + if errStr != "" { + nagios.Exit(nagios.Critical, errStr) + } + + nagios.Exit(nagios.Ok, "") +} http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/0f564553/traffic_monitor_golang/traffic_monitor/tools/validator-service.go ---------------------------------------------------------------------- diff --git a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go index 5dc1e04..20dff82 100644 --- a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go +++ b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go @@ -158,8 +158,9 @@ func main() { crStatesOfflineLogs := startValidator(tmcheck.AllMonitorsCRStatesOfflineValidator, toClient, *interval, *includeOffline, *grace) peerPollerLogs := startValidator(tmcheck.PeerPollersAllValidator, toClient, *interval, *includeOffline, *grace) dsStatsLogs := startValidator(tmcheck.AllMonitorsDSStatsValidator, toClient, *interval, *includeOffline, *grace) + queryIntervalLogs := startValidator(tmcheck.AllMonitorsQueryIntervalValidator, toClient, *interval, *includeOffline, *grace) - if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs, dsStatsLogs); err != nil { + if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs, dsStatsLogs, queryIntervalLogs); err != nil { fmt.Printf("Serve error: %v\n", err) } } @@ -199,7 +200,7 @@ func printLogs(logs Logs, w io.Writer) { fmt.Fprintf(w, `</table>`) } -func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs, dsStatsLogs Logs) error { +func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs, dsStatsLogs Logs, queryIntervalLogs Logs) error { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Content-Type", "text/html") @@ -226,6 +227,10 @@ func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs, dsStatsL fmt.Fprintf(w, `<h2>Delivery Services</h2>`) fmt.Fprintf(w, `<h3>validates all Delivery Services in the CRConfig exist in DsStats</h3>`) printLogs(dsStatsLogs, w) + + fmt.Fprintf(w, `<h2>Query Interval</h2>`) + fmt.Fprintf(w, `<h3>validates all Monitors' Query Interval (95th percentile) is less than %v</h3>`, tmcheck.QueryIntervalMax) + printLogs(queryIntervalLogs, w) }) return http.ListenAndServe(":80", nil) }
