This is an automated email from the ASF dual-hosted git repository. ocket8888 pushed a commit to branch 5.1.x in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git
commit 47b68cd4c2ad6ac7699661a1b5e7ba8053a357a6 Author: Taylor Clayton Frey <[email protected]> AuthorDate: Thu Apr 15 17:10:20 2021 -0600 Fixes an issue where multiple interfaces reported by a cache are included in the vitals (bandwidth) (#5730) * Normalizing README.md for MacOS case insensitive discrepancies * Removed whitespace at end of line * Only use monitored interfaces for vitals Per bug report #5695, any and all interfaces returned in polling were used to calculate vitals, such as bytes in or out, bandwidth and more. However, only designated interfaces should be used for calculating such metrics and vitals. As such, this commit will check for monitored interfaces and only use those for the calculations. * Normalized README.md file * Normalized README file * Add bugfix info to CHANGELOG * Check monitor config for nil Ensure callers for GetVitals must include a valid config and the config should contain interfaces * Fix merge issue with CHANGELOG * Address PR feedback. Remove extra blank line from CHANGELOG Condense test structs for sake of clarity Change log levels to reflect actual logging issues Add logging clarity to missing or empty caches and interfaces Co-authored-by: Taylor Frey <[email protected]> (cherry picked from commit 89669fd67428cabe3a6450480a6ff22853e53b19) --- CHANGELOG.md | 2 + traffic_monitor/health/cache.go | 37 +++- traffic_monitor/health/cache_test.go | 329 +++++++++++++++++++++++++++++++++-- 3 files changed, 351 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60b4fa0..267275a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed ORT being unable to update URLSIG keys for Delivery Services - Fixed an issue where Traffic Ops becoming unavailable caused Traffic Monitor to segfault and crash - [#5754](https://github.com/apache/trafficcontrol/issues/5754) - Ensure Health Threshold Parameters use legacy format for legacy Monitoring Config handler +- [#5695](https://github.com/apache/trafficcontrol/issues/5695) - Ensure vitals are calculated only against monitored interfaces + - [#5744](https://github.com/apache/trafficcontrol/issues/5744) - Sort TM Delivery Service States page by DS name ## [5.1.1] - 2021-03-19 diff --git a/traffic_monitor/health/cache.go b/traffic_monitor/health/cache.go index 899187f..efcab8f 100644 --- a/traffic_monitor/health/cache.go +++ b/traffic_monitor/health/cache.go @@ -53,6 +53,11 @@ func GetVitals(newResult *cache.Result, prevResult *cache.Result, mc *tc.Traffic return } + if mc == nil { + log.Errorf("TrafficMonitorConfigMap must not be nil") + return + } + if newResult.InterfaceVitals == nil { newResult.InterfaceVitals = map[string]cache.Vitals{} } @@ -60,7 +65,37 @@ func GetVitals(newResult *cache.Result, prevResult *cache.Result, mc *tc.Traffic // proc.loadavg -- we're using the 1 minute average (!?) newResult.Vitals.LoadAvg = newResult.Statistics.Loadavg.One - for ifaceName, iface := range newResult.Interfaces() { + ts, exists := mc.TrafficServer[newResult.ID] + if !exists { + log.Errorf("cache server not found in config map for cache: %s", newResult.ID) + return + } + if ts.Interfaces == nil { + log.Warnf("no interfaces reported in config map for cache: %s", newResult.ID) + return + } + + var monitoredInterfaces []tc.ServerInterfaceInfo + for _, srvrIfaceInfo := range mc.TrafficServer[newResult.ID].Interfaces { + if srvrIfaceInfo.Monitor { + monitoredInterfaces = append(monitoredInterfaces, srvrIfaceInfo) + } + } + + if len(monitoredInterfaces) == 0 { + log.Warnf("no interfaces selected to be monitored for %v", newResult.ID) + return + } + + for _, monitoredInterface := range monitoredInterfaces { + ifaceName := monitoredInterface.Name + iface, exists := newResult.Interfaces()[ifaceName] + if !exists { + // monitored interface doesn't exist in Result interfaces, skip + log.Warnf("monitored interface %v does not exist in cache %v", ifaceName, newResult.ID) + continue + } + ifaceVitals := cache.Vitals{ BytesIn: iface.BytesIn, BytesOut: iface.BytesOut, diff --git a/traffic_monitor/health/cache_test.go b/traffic_monitor/health/cache_test.go index 74486e8..f87f744 100644 --- a/traffic_monitor/health/cache_test.go +++ b/traffic_monitor/health/cache_test.go @@ -33,9 +33,315 @@ import ( "github.com/apache/trafficcontrol/lib/go-tc" ) +// TestNoMonitoredInterfacesGetVitals assures that GetVitals +// does not fail even if no interfaces are marked to be monitored +func TestNoMonitoredInterfacesGetVitals(t *testing.T) { + serverID := "no-monitored" + fakeRequestTime := time.Now() + zeroValueVitals := cache.Vitals{} + + // Interfaces to monitor are marked true (none) + tmcm := tc.TrafficMonitorConfigMap{ + TrafficServer: map[string]tc.TrafficServer{ + serverID: { + Interfaces: []tc.ServerInterfaceInfo{ + { + Name: "bond0", + Monitor: false, + }, + { + Name: "bond1", + Monitor: false, + }, + { + Name: "lo", + Monitor: false, + }, + }, + }, + }, + } + + // multiple interfaces, plus extra + firstResult := cache.Result{ + ID: serverID, + Error: nil, + Miscellaneous: map[string]interface{}{}, + Statistics: cache.Statistics{ + Interfaces: map[string]cache.Interface{ + "bond0": { + Speed: 100000, + BytesIn: 570791700709, + BytesOut: 4212211168526, + }, + "bond1": { + Speed: 100000, + BytesIn: 1989352297218, + BytesOut: 10630690813, + }, + "lo": { + Speed: 0, + BytesIn: 181882394, + BytesOut: 181882394, + }, + "em5": { + Speed: 0, + BytesIn: 0, + BytesOut: 0, + }, + }, + }, + Time: fakeRequestTime, + RequestTime: time.Second, + Vitals: cache.Vitals{}, + InterfaceVitals: nil, + PollID: 42, + PollFinished: make(chan uint64, 1), + PrecomputedData: cache.PrecomputedData{}, + Available: true, + UsingIPv4: false, + } + GetVitals(&firstResult, nil, &tmcm) + + // No interfaces were selected to be monitored so none + // should have been added later + if len(firstResult.InterfaceVitals) > 0 { + t.Errorf("InterfaceVitals map should be empty. expected: %v actual: %v:", 0, len(firstResult.InterfaceVitals)) + } + + // No interfaces were selected to be monitored so no vitals + // should have been calculated + if firstResult.Vitals != zeroValueVitals { + t.Errorf("Vitals should have zero values. expected: %v actual: %v:", zeroValueVitals, firstResult.Vitals) + } + + secondResult := firstResult + secondResult.Time = fakeRequestTime.Add(5 * time.Second) + + GetVitals(&secondResult, &firstResult, &tmcm) + + // No interfaces were selected to be monitored so none + // should have been added later + if len(secondResult.InterfaceVitals) > 0 { + t.Errorf("InterfaceVitals map should be empty. expected: %v actual: %v:", 0, len(secondResult.InterfaceVitals)) + } + + // No interfaces were selected to be monitored so no vitals + // should have been calculated + if secondResult.Vitals != zeroValueVitals { + t.Errorf("Vitals should have zero values. expected: %v actual: %v:", zeroValueVitals, secondResult.Vitals) + } + + // The previous results should not have been impacted + if firstResult.Vitals != zeroValueVitals { + t.Errorf("Vitals should have zero values. expected: %v actual: %v:", zeroValueVitals, firstResult.Vitals) + } +} + +// TestDualHomingMonitoredInterfacesGetVitals ensures cache servers +// with multiple interfaces correctly calculate bandwidth based on +// whether the interfaces are marked as "Monitor this interface" +func TestDualHomingMonitoredInterfacesGetVitals(t *testing.T) { + + serverID := "dual-homed" + fakeRequestTime := time.Now() + + // Interfaces to monitor are marked true + tmcm := tc.TrafficMonitorConfigMap{ + TrafficServer: map[string]tc.TrafficServer{ + serverID: { + Interfaces: []tc.ServerInterfaceInfo{ + { + Name: "bond0", + Monitor: true, + }, + { + Name: "bond1", + Monitor: true, + }, + { + Name: "lo", + Monitor: false, + }, + }, + }, + }, + } + + // multiple interfaces, plus extras + firstResult := cache.Result{ + ID: serverID, + Error: nil, + Miscellaneous: map[string]interface{}{}, + Statistics: cache.Statistics{ + Interfaces: map[string]cache.Interface{ + "bond0": { + Speed: 100000, + BytesIn: 570791700709, + BytesOut: 4212211168526, + }, + "bond1": { + Speed: 100000, + BytesIn: 1989352297218, + BytesOut: 10630690813, + }, + "p1p1": { + Speed: 100000, + BytesIn: 570793589545, + BytesOut: 4212220919951, + }, + "p3p1": { + Speed: 100000, + BytesIn: 1989354450479, + BytesOut: 10630690813, + }, + "lo": { + Speed: 0, + BytesIn: 181882394, + BytesOut: 181882394, + }, + "em5": { + Speed: 0, + BytesIn: 0, + BytesOut: 0, + }, + "em6": { + Speed: 0, + BytesIn: 0, + BytesOut: 0, + }, + }, + }, + Time: fakeRequestTime, + RequestTime: time.Second, + Vitals: cache.Vitals{}, + InterfaceVitals: nil, + PollID: 42, + PollFinished: make(chan uint64, 1), + PrecomputedData: cache.PrecomputedData{}, + Available: true, + UsingIPv4: false, + } + GetVitals(&firstResult, nil, &tmcm) + + // Two interfaces were selected to be monitored so they + // should have been added later + if len(firstResult.InterfaceVitals) != 2 { + t.Errorf("InterfaceVitals map should not be empty. expected: %v actual: %v:", 2, len(firstResult.InterfaceVitals)) + } + + expectedFirstVitals := cache.Vitals{ + LoadAvg: 0, + BytesIn: 2560143997927, + BytesOut: 4222841859339, + KbpsOut: 0, + MaxKbpsOut: 200000000, + } + // Only two interfaces were selected to be monitored so vitals + // should have been calculated based on those two (bond0 and bond1) + if firstResult.Vitals != expectedFirstVitals { + t.Errorf("Vitals do not match expected output. expected: %v actual: %v:", expectedFirstVitals, firstResult.Vitals) + } + + secondResult := firstResult + secondResult.Statistics.Interfaces = map[string]cache.Interface{ + "bond0": { + Speed: 100000, + BytesIn: 572608907987, + BytesOut: 4227149141326, + }, + "bond1": { + Speed: 100000, + BytesIn: 1996376171468, + BytesOut: 10630696953, + }, + "p1p1": { + Speed: 100000, + BytesIn: 572609282353, + BytesOut: 4227157881921, + }, + "p3p1": { + Speed: 100000, + BytesIn: 1996378204692, + BytesOut: 10630696953, + }, + "lo": { + Speed: 0, + BytesIn: 181882394, + BytesOut: 181882394, + }, + "em5": { + Speed: 0, + BytesIn: 0, + BytesOut: 0, + }, + "em6": { + Speed: 0, + BytesIn: 0, + BytesOut: 0, + }, + } + secondResult.Time = fakeRequestTime.Add(5 * time.Second) + secondResult.Vitals = cache.Vitals{} + + GetVitals(&secondResult, &firstResult, &tmcm) + + // Two interfaces were selected to be monitored so they + // should have been added later + if len(secondResult.InterfaceVitals) != 2 { + t.Errorf("InterfaceVitals map should not be empty. expected: %v actual: %v:", 2, len(secondResult.InterfaceVitals)) + } + + expectedSecondVitals := cache.Vitals{ + LoadAvg: 0, + BytesIn: 2568985079455, + BytesOut: 4237779838279, + KbpsOut: 23900766, + MaxKbpsOut: 200000000, + } + + // Only two interfaces were selected to be monitored so vitals + // should have been calculated based on those two (bond0 and bond1) + if secondResult.Vitals != expectedSecondVitals { + t.Errorf("Vitals do not match expected output. expected: %v actual: %v:", expectedSecondVitals, secondResult.Vitals) + } + + // Previous result values should have been altered + if firstResult.Vitals != expectedFirstVitals { + t.Errorf("Vitals do not match expected output. expected: %v actual: %v:", expectedFirstVitals, firstResult.Vitals) + } +} + func TestCalcAvailabilityThresholds(t *testing.T) { + + resultID := "myCacheName" + + mc := tc.TrafficMonitorConfigMap{ + TrafficServer: map[string]tc.TrafficServer{ + string(resultID): { + ServerStatus: string(tc.CacheStatusReported), + Profile: "myProfileName", + Interfaces: []tc.ServerInterfaceInfo{ + { + Name: "bond0", + Monitor: true, + }, + { + Name: "eth0", + Monitor: true, + }, + { + Name: "lo", + Monitor: false, + }, + }, + }, + }, + Profile: map[string]tc.TMProfile{}, + } + result := cache.Result{ - ID: "myCacheName", + ID: resultID, Error: nil, Miscellaneous: map[string]interface{}{}, Statistics: cache.Statistics{ @@ -48,12 +354,12 @@ func TestCalcAvailabilityThresholds(t *testing.T) { LatestPID: 32109, }, Interfaces: map[string]cache.Interface{ - "bond0": cache.Interface{ + "bond0": { Speed: 20000, BytesIn: 1234567891011121, BytesOut: 12345678910111213, }, - "eth0": cache.Interface{ + "eth0": { Speed: 30000, BytesIn: 1234567891011121, BytesOut: 12345678910111213, @@ -71,7 +377,7 @@ func TestCalcAvailabilityThresholds(t *testing.T) { Available: true, UsingIPv4: false, } - GetVitals(&result, nil, nil) + GetVitals(&result, nil, &mc) totalBytesOut := result.Statistics.Interfaces["bond0"].BytesOut + result.Statistics.Interfaces["eth0"].BytesOut if totalBytesOut != result.Vitals.BytesOut { @@ -86,23 +392,13 @@ func TestCalcAvailabilityThresholds(t *testing.T) { Vitals: cache.Vitals{BytesOut: result.Vitals.BytesOut - 1250000000}, // 10 gigabits InterfaceVitals: prevIV, } - GetVitals(&result, &prevResult, nil) + GetVitals(&result, &prevResult, &mc) - statResultHistory := (*threadsafe.ResultStatHistory)(nil) - mc := tc.TrafficMonitorConfigMap{ - TrafficServer: map[string]tc.TrafficServer{ - string(result.ID): { - ServerStatus: string(tc.CacheStatusReported), - Profile: "myProfileName", - }, - }, - Profile: map[string]tc.TMProfile{}, - } mc.Profile[mc.TrafficServer[string(result.ID)].Profile] = tc.TMProfile{ Name: mc.TrafficServer[string(result.ID)].Profile, Parameters: tc.TMParameters{ Thresholds: map[string]tc.HealthThreshold{ - "availableBandwidthInKbps": tc.HealthThreshold{ + "availableBandwidthInKbps": { Val: 15000000, Comparator: ">", }, @@ -130,6 +426,7 @@ func TestCalcAvailabilityThresholds(t *testing.T) { // Ensure that if the interfaces haven't been reported yet that CalcAvailability doesn't panic original := results[0].Statistics.Interfaces + statResultHistory := (*threadsafe.ResultStatHistory)(nil) results[0].Statistics.Interfaces = make(map[string]cache.Interface) CalcAvailability(results, pollerName, statResultHistory, mc, toData, localCacheStatusThreadsafe, localStates, events, config.Both) results[0].Statistics.Interfaces = original
