This is an automated email from the ASF dual-hosted git repository.
mattjackson pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git
The following commit(s) were added to refs/heads/master by this push:
new 1c956e8 Fix TM stats_over_http parsing to sum DS data (#6423)
1c956e8 is described below
commit 1c956e80f46ccbf4b7056639568240d99430a8cc
Author: Rawlin Peters <[email protected]>
AuthorDate: Mon Dec 20 15:57:50 2021 -0700
Fix TM stats_over_http parsing to sum DS data (#6423)
The stats_over_http parsing was replacing stat values instead of summing
them for delivery services with multiple regexes. Since stats_over_http
response output can change order from request to request, this was
causing TM to report "new stat is lower than last stat" warnings when
the response order changed (which happens quite frequently). By properly
summing together the stats that match the same underlying delivery
service, these warnings disappear, and the reported data from TM is more
accurate.
Additionally, some things like unused parameters were cleaned up while
trying to find the underlying cause of these warnings.
---
CHANGELOG.md | 1 +
traffic_monitor/cache/astats.go | 12 ++++-----
traffic_monitor/cache/cache.go | 2 +-
traffic_monitor/cache/stats_over_http.go | 16 ++++++------
traffic_monitor/ds/stat.go | 43 ++++++++++++++------------------
traffic_monitor/ds/stat_test.go | 13 ++--------
traffic_monitor/manager/manager.go | 1 -
traffic_monitor/manager/stat.go | 15 +++--------
8 files changed, 41 insertions(+), 62 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d147dce..664b19f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@ The format is based on [Keep a
Changelog](http://keepachangelog.com/en/1.0.0/).
- [#6259](https://github.com/apache/trafficcontrol/issues/6259) - Traffic
Portal No Longer Allows Spaces in Server Object "Router Port Name"
- [#6392](https://github.com/apache/trafficcontrol/issues/6392) - Traffic Ops
prevents assigning ORG servers to topology-based delivery services (as well as
a number of other valid operations being prohibited by "last server assigned to
DS" validations which don't apply to topology-based delivery services)
- [#6175](https://github.com/apache/trafficcontrol/issues/6175) - POST request
to /api/4.0/phys_locations accepts mismatch values for regionName.
+- Fixed Traffic Monitor parsing stats_over_http output so that multiple stats
for the same underlying delivery service (when the delivery service has more
than 1 regex) are properly summed together. This makes the resulting data more
accurate in addition to fixing the "new stat is lower than last stat" warnings.
- [#6285](https://github.com/apache/trafficcontrol/issues/6285) - The Traffic
Ops Postinstall script will work in CentOS 7, even if Python 3 is installed
- [#5373](https://github.com/apache/trafficcontrol/issues/5373) - Traffic
Monitor logs not consistent
- Traffic Ops: Sanitize username before executing LDAP query
diff --git a/traffic_monitor/cache/astats.go b/traffic_monitor/cache/astats.go
index fe33dcf..00981b3 100644
--- a/traffic_monitor/cache/astats.go
+++ b/traffic_monitor/cache/astats.go
@@ -139,7 +139,7 @@ func astatsPrecompute(cacheName string, data todata.TOData,
stats Statistics, mi
var err error
for stat, value := range miscStats {
- dsStats, err = astatsProcessStat(cacheName, dsStats, data,
stat, value)
+ dsStats, err = astatsProcessStat(dsStats, data, stat, value)
if err != nil && err != dsdata.ErrNotProcessedStat {
log.Infof("precomputing cache %s stat %s value %v error
%v", cacheName, stat, value, err)
precomputed.Errors = append(precomputed.Errors, err)
@@ -153,7 +153,7 @@ func astatsPrecompute(cacheName string, data todata.TOData,
stats Statistics, mi
// astatsProcessStat and its subsidiary functions act as a State Machine,
// flowing the stat through states for each "." component of the stat name.
-func astatsProcessStat(server string, stats map[string]*DSStat, toData
todata.TOData, stat string, value interface{}) (map[string]*DSStat, error) {
+func astatsProcessStat(stats map[string]*DSStat, toData todata.TOData, stat
string, value interface{}) (map[string]*DSStat, error) {
parts := strings.Split(stat, ".")
if len(parts) < 1 {
return stats, fmt.Errorf("stat has no initial part")
@@ -161,7 +161,7 @@ func astatsProcessStat(server string, stats
map[string]*DSStat, toData todata.TO
switch parts[0] {
case "plugin":
- return astatsProcessStatPlugin(server, stats, toData, stat,
parts[1:], value)
+ return astatsProcessStatPlugin(stats, toData, parts[1:], value)
case "proxy":
fallthrough
case "server":
@@ -173,19 +173,19 @@ func astatsProcessStat(server string, stats
map[string]*DSStat, toData todata.TO
}
}
-func astatsProcessStatPlugin(server string, stats map[string]*DSStat, toData
todata.TOData, stat string, statParts []string, value interface{})
(map[string]*DSStat, error) {
+func astatsProcessStatPlugin(stats map[string]*DSStat, toData todata.TOData,
statParts []string, value interface{}) (map[string]*DSStat, error) {
if len(statParts) < 1 {
return stats, fmt.Errorf("stat has no plugin part")
}
switch statParts[0] {
case "remap_stats":
- return astatsProcessStatPluginRemapStats(server, stats, toData,
stat, statParts[1:], value)
+ return astatsProcessStatPluginRemapStats(stats, toData,
statParts[1:], value)
default:
return stats, fmt.Errorf("stat has unknown plugin part '%s'",
statParts[0])
}
}
-func astatsProcessStatPluginRemapStats(server string, stats
map[string]*DSStat, toData todata.TOData, stat string, statParts []string,
value interface{}) (map[string]*DSStat, error) {
+func astatsProcessStatPluginRemapStats(stats map[string]*DSStat, toData
todata.TOData, statParts []string, value interface{}) (map[string]*DSStat,
error) {
if len(statParts) < 3 {
return stats, fmt.Errorf("stat has no remap_stats
deliveryservice and name parts")
}
diff --git a/traffic_monitor/cache/cache.go b/traffic_monitor/cache/cache.go
index 341b18b..211b390 100644
--- a/traffic_monitor/cache/cache.go
+++ b/traffic_monitor/cache/cache.go
@@ -280,7 +280,7 @@ func (handler Handler) Handle(id string, rdr io.Reader,
format string, reqTime t
}
if reqErr != nil {
- log.Warnf("%v handler given error '%v'\n", id, reqErr) // error
here, in case the thing that called Handle didn't error
+ log.Warnf("%s handler given error: %s", id, reqErr.Error()) //
error here, in case the thing that called Handle didn't error
result.Error = reqErr
handler.resultChan <- result
return
diff --git a/traffic_monitor/cache/stats_over_http.go
b/traffic_monitor/cache/stats_over_http.go
index 29e9a76..7926d42 100644
--- a/traffic_monitor/cache/stats_over_http.go
+++ b/traffic_monitor/cache/stats_over_http.go
@@ -343,7 +343,7 @@ func parseNumericStat(value interface{}) (uint64, error) {
if value.(int64) < 0 {
return 0, errors.New("value was negative")
}
- return uint64(value.(uint64)), nil
+ return value.(uint64), nil
case float64:
if value.(float64) > math.MaxUint64 || value.(float64) < 0 {
return 0, errors.New("value out of range for uint64")
@@ -356,7 +356,7 @@ func parseNumericStat(value interface{}) (uint64, error) {
return uint64(value.(float64)), nil
case string:
if statVal, err := strconv.ParseUint(value.(string), 10, 64);
err != nil {
- return 0, fmt.Errorf("Could not parse '%v' to uint64:
%v", value, err)
+ return 0, fmt.Errorf("could not parse '%v' to uint64:
%v", value, err)
} else {
return statVal, nil
}
@@ -421,17 +421,17 @@ func statsOverHTTPPrecompute(cacheName string, data
todata.TOData, stats Statist
switch statParts[len(statParts)-1] {
case "status_2xx":
- dsStat.Status2xx = parsedStat
+ dsStat.Status2xx += parsedStat
case "status_3xx":
- dsStat.Status3xx = parsedStat
+ dsStat.Status3xx += parsedStat
case "status_4xx":
- dsStat.Status4xx = parsedStat
+ dsStat.Status4xx += parsedStat
case "status_5xx":
- dsStat.Status5xx = parsedStat
+ dsStat.Status5xx += parsedStat
case "out_bytes":
- dsStat.OutBytes = parsedStat
+ dsStat.OutBytes += parsedStat
case "in_bytes":
- dsStat.InBytes = parsedStat
+ dsStat.InBytes += parsedStat
default:
err = fmt.Errorf("Unknown stat '%s'",
statParts[len(statParts)-1])
log.Infof("precomputing cache %s stat %s value
%v error %v", cacheName, stat, value, err)
diff --git a/traffic_monitor/ds/stat.go b/traffic_monitor/ds/stat.go
index 95f3b68..17bc613 100644
--- a/traffic_monitor/ds/stat.go
+++ b/traffic_monitor/ds/stat.go
@@ -42,7 +42,7 @@ func setStaticData(dsStats *dsdata.Stats, dsServers
map[tc.DeliveryServiceName][
}
}
-func addAvailableData(dsStats *dsdata.Stats, crStates tc.CRStates,
serverCachegroups map[tc.CacheName]tc.CacheGroupName, serverDs
map[tc.CacheName][]tc.DeliveryServiceName, serverTypes
map[tc.CacheName]tc.CacheType, precomputed
map[tc.CacheName]cache.PrecomputedData, lastStats *dsdata.LastStats, events
health.ThreadsafeEvents) {
+func addAvailableData(dsStats *dsdata.Stats, crStates tc.CRStates,
serverCachegroups map[tc.CacheName]tc.CacheGroupName, serverDs
map[tc.CacheName][]tc.DeliveryServiceName, serverTypes
map[tc.CacheName]tc.CacheType, precomputed
map[tc.CacheName]cache.PrecomputedData) {
for cache, available := range crStates.Caches {
cacheGroup, ok := serverCachegroups[cache]
if !ok {
@@ -116,14 +116,6 @@ func addAvailableData(dsStats *dsdata.Stats, crStates
tc.CRStates, serverCachegr
}
}
-func newLastDSStat() *dsdata.LastDSStat {
- return &dsdata.LastDSStat{
- CacheGroups: map[tc.CacheGroupName]*dsdata.LastStatsData{},
- Type: map[tc.CacheType]*dsdata.LastStatsData{},
- Caches: map[tc.CacheName]*dsdata.LastStatsData{},
- }
-}
-
// BytesPerKilobit is the number of bytes in a kilobit.
const BytesPerKilobit = 125
@@ -141,14 +133,14 @@ func addLastStat(lastData *dsdata.LastStatData, newStat
int64, newStatTime time.
if newStat < lastData.Stat {
// if a new stat comes in lower than current, assume rollover,
set the 'last stat' to the new one, but leave PerSec what it was (not negative).
- err := fmt.Errorf("new stat '%d'@'%v' value less than last stat
'%d'@'%v'", newStat, newStatTime, lastData.Stat, lastData.Time)
+ err := fmt.Errorf("new stat '%d'@'%s' value less than last stat
'%d'@'%s'", newStat, newStatTime.Format(time.RFC3339Nano), lastData.Stat,
lastData.Time.Format(time.RFC3339Nano))
lastData.Stat = newStat
lastData.Time = newStatTime
return err
}
if newStatTime.Before(lastData.Time) {
- return fmt.Errorf("new stat '%d'@'%v' time less than last stat
'%d'@'%v'", newStat, newStatTime, lastData.Stat, lastData.Time)
+ return fmt.Errorf("new stat '%d'@'%s' time less than last stat
'%d'@'%s'", newStat, newStatTime.Format(time.RFC3339Nano), lastData.Stat,
lastData.Time.Format(time.RFC3339Nano))
}
if lastData.Stat != 0 {
@@ -233,10 +225,14 @@ func addLastDSStatTotals(lastStat *dsdata.LastDSStat,
cachesReporting map[tc.Cac
// addDSPerSecStats calculates and adds the per-second delivery service stats
to
// both the Stats and LastStats structures. Note this mutates both dsStats and
// lastStats, adding the per-second stats to them.
-func addDSPerSecStats(lastStats *dsdata.LastStats, dsStats *dsdata.Stats,
dsName tc.DeliveryServiceName, stat *dsdata.Stat, serverCachegroups
map[tc.CacheName]tc.CacheGroupName, serverTypes map[tc.CacheName]tc.CacheType,
mc tc.TrafficMonitorConfigMap, events health.ThreadsafeEvents, precomputed
map[tc.CacheName]cache.PrecomputedData, states peer.CRStatesThreadsafe) {
+func addDSPerSecStats(lastStats *dsdata.LastStats, dsName
tc.DeliveryServiceName, stat *dsdata.Stat, serverCachegroups
map[tc.CacheName]tc.CacheGroupName, serverTypes map[tc.CacheName]tc.CacheType,
mc tc.TrafficMonitorConfigMap, events health.ThreadsafeEvents, precomputed
map[tc.CacheName]cache.PrecomputedData, states peer.CRStatesThreadsafe) {
lastStat, lastStatExists := lastStats.DeliveryServices[dsName]
if !lastStatExists {
- lastStat = newLastDSStat() // TODO sync.Pool?
+ lastStat = &dsdata.LastDSStat{
+ CacheGroups:
map[tc.CacheGroupName]*dsdata.LastStatsData{},
+ Type: map[tc.CacheType]*dsdata.LastStatsData{},
+ Caches: map[tc.CacheName]*dsdata.LastStatsData{},
+ }
lastStats.DeliveryServices[dsName] = lastStat
}
for cacheName, cacheStats := range stat.Caches {
@@ -245,14 +241,12 @@ func addDSPerSecStats(lastStats *dsdata.LastStats,
dsStats *dsdata.Stats, dsName
stat.Caches[cacheName] = &dsdata.StatCacheStats{}
}
- lastStatCache := lastStat.Caches[cacheName]
- if lastStatCache == nil {
- lastStatCache = &dsdata.LastStatsData{}
- lastStat.Caches[cacheName] = lastStatCache
+ if lastStat.Caches[cacheName] == nil {
+ lastStat.Caches[cacheName] = &dsdata.LastStatsData{}
}
if _, ok := precomputed[cacheName]; ok {
- if err := addLastStats(lastStatCache, cacheStats,
precomputed[cacheName].Time); err != nil {
- log.Warnf("%v adding per-second stats for cache
%v: %v", dsName, cacheName, err)
+ if err := addLastStats(lastStat.Caches[cacheName],
cacheStats, precomputed[cacheName].Time); err != nil {
+ log.Warnf("%s adding per-second stats for cache
%s: %s", dsName.String(), cacheName.String(), err.Error())
continue
}
}
@@ -360,7 +354,7 @@ func addCachePerSecStats(lastStats *dsdata.LastStats,
cacheName tc.CacheName, pr
// them.
func addPerSecStats(precomputed map[tc.CacheName]cache.PrecomputedData,
dsStats *dsdata.Stats, lastStats *dsdata.LastStats, serverCachegroups
map[tc.CacheName]tc.CacheGroupName, serverTypes map[tc.CacheName]tc.CacheType,
mc tc.TrafficMonitorConfigMap, events health.ThreadsafeEvents, states
peer.CRStatesThreadsafe) {
for dsName, stat := range dsStats.DeliveryService {
- addDSPerSecStats(lastStats, dsStats, dsName, stat,
serverCachegroups, serverTypes, mc, events, precomputed, states)
+ addDSPerSecStats(lastStats, dsName, stat, serverCachegroups,
serverTypes, mc, events, precomputed, states)
}
for cacheName, precomputedData := range precomputed {
addCachePerSecStats(lastStats, cacheName, precomputedData)
@@ -369,7 +363,7 @@ func addPerSecStats(precomputed
map[tc.CacheName]cache.PrecomputedData, dsStats
// CreateStats aggregates and creates statistics from given precomputed stat
history. It returns the created stats, information about these stats necessary
for the next calculation, and any error.
// Note lastStats is mutated, being set with the new last stats.
-func CreateStats(precomputed map[tc.CacheName]cache.PrecomputedData, toData
todata.TOData, crStates tc.CRStates, lastStats *dsdata.LastStats, now
time.Time, mc tc.TrafficMonitorConfigMap, events health.ThreadsafeEvents,
states peer.CRStatesThreadsafe) (*dsdata.Stats, error) {
+func CreateStats(precomputed map[tc.CacheName]cache.PrecomputedData, toData
todata.TOData, crStates tc.CRStates, lastStats *dsdata.LastStats, mc
tc.TrafficMonitorConfigMap, events health.ThreadsafeEvents, states
peer.CRStatesThreadsafe) *dsdata.Stats {
start := time.Now()
dsStats := dsdata.NewStats(len(toData.DeliveryServiceServers)) // TODO
sync.Pool?
for deliveryService := range toData.DeliveryServiceServers {
@@ -380,12 +374,13 @@ func CreateStats(precomputed
map[tc.CacheName]cache.PrecomputedData, toData toda
dsStats.DeliveryService[deliveryService] = dsdata.NewStat() //
TODO sync.Pool?
}
setStaticData(dsStats, toData.DeliveryServiceServers)
- addAvailableData(dsStats, crStates, toData.ServerCachegroups,
toData.ServerDeliveryServices, toData.ServerTypes, precomputed, lastStats,
events) // TODO move after stat summarisation
+ addAvailableData(dsStats, crStates, toData.ServerCachegroups,
toData.ServerDeliveryServices, toData.ServerTypes, precomputed) // TODO move
after stat summarisation
for server, precomputedData := range precomputed {
cachegroup, ok := toData.ServerCachegroups[server]
if !ok {
- log.Warnf("server %s has no cachegroup, skipping\n",
server)
+ // this can happen if we have precomputed data for a
cache but the cache has since been deleted from Traffic Ops
+ log.Infof("server %s has no cachegroup, skipping",
server)
continue
}
serverType, ok := toData.ServerTypes[server]
@@ -436,7 +431,7 @@ func CreateStats(precomputed
map[tc.CacheName]cache.PrecomputedData, toData toda
addPerSecStats(precomputed, dsStats, lastStats,
toData.ServerCachegroups, toData.ServerTypes, mc, events, states)
log.Infof("CreateStats took %v\n", time.Since(start))
dsStats.Time = time.Now()
- return dsStats, nil
+ return dsStats
}
func getDSErr(dsName tc.DeliveryServiceName, dsStats dsdata.StatCacheStats,
monitorConfig tc.TrafficMonitorConfigMap) error {
diff --git a/traffic_monitor/ds/stat_test.go b/traffic_monitor/ds/stat_test.go
index 62d442a..00e2814 100644
--- a/traffic_monitor/ds/stat_test.go
+++ b/traffic_monitor/ds/stat_test.go
@@ -28,7 +28,6 @@ import (
"math/rand"
"regexp"
"testing"
- "time"
tc_log "github.com/apache/trafficcontrol/lib/go-log"
"github.com/apache/trafficcontrol/lib/go-tc"
@@ -60,7 +59,6 @@ func TestCreateStats(t *testing.T) {
toData := getMockTOData()
combinedCRStates := peer.NewCRStatesThreadsafe()
lastStatsThs := threadsafe.NewLastStats()
- now := time.Now()
maxEvents := uint64(4)
events := health.NewThreadsafeEvents(maxEvents)
localCRStates := peer.NewCRStatesThreadsafe()
@@ -87,20 +85,13 @@ func TestCreateStats(t *testing.T) {
lastStatsVal := lastStatsThs.Get()
lastStatsCopy := lastStatsVal.Copy()
- dsStats, err := CreateStats(precomputeds, toData,
combinedCRStates.Get(), lastStatsCopy, now, monitorConfig, events,
localCRStates)
-
- if err != nil {
- t.Fatalf("CreateStats err expected: nil, actual: " +
err.Error())
- }
+ dsStats := CreateStats(precomputeds, toData, combinedCRStates.Get(),
lastStatsCopy, monitorConfig, events, localCRStates)
serverDeliveryServices := toData.ServerDeliveryServices
toData.ServerDeliveryServices =
map[tc.CacheName][]tc.DeliveryServiceName{} // temporarily unassign servers to
generate warnings about caches not assigned to delivery services
buffer := bytes.NewBuffer(make([]byte, 0, 10000))
tc_log.Info = log.New(buffer,
"TestAddAvailabilityDataNotFoundInDeliveryService", log.Lshortfile)
- _, err = CreateStats(precomputeds, toData, combinedCRStates.Get(),
lastStatsCopy, now, monitorConfig, events, localCRStates)
- if err != nil {
- t.Fatalf("CreateStats err expected: nil, actual: " +
err.Error())
- }
+ _ = CreateStats(precomputeds, toData, combinedCRStates.Get(),
lastStatsCopy, monitorConfig, events, localCRStates)
checkLogOutput(t, buffer, toData, caches)
toData.ServerDeliveryServices = serverDeliveryServices
diff --git a/traffic_monitor/manager/manager.go
b/traffic_monitor/manager/manager.go
index c7fe838..3d38c76 100644
--- a/traffic_monitor/manager/manager.go
+++ b/traffic_monitor/manager/manager.go
@@ -112,7 +112,6 @@ func Start(opsConfigFile string, cfg config.Config, appData
config.StaticAppData
combinedStates,
toData,
cachesChangedForStatMgr,
- errorCount,
cfg,
monitorConfig,
events,
diff --git a/traffic_monitor/manager/stat.go b/traffic_monitor/manager/stat.go
index 18fa698..d3878b5 100644
--- a/traffic_monitor/manager/stat.go
+++ b/traffic_monitor/manager/stat.go
@@ -64,7 +64,6 @@ func StartStatHistoryManager(
combinedStates peer.CRStatesThreadsafe,
toData todata.TODataThreadsafe,
cachesChanged <-chan struct{},
- errorCount threadsafe.Uint,
cfg config.Config,
monitorConfig threadsafe.TrafficMonitorConfigMap,
events health.ThreadsafeEvents,
@@ -97,7 +96,7 @@ func StartStatHistoryManager(
if haveCachesChanged() {
statUnpolledCaches.SetNewCaches(getNewCaches(localStates, monitorConfig))
}
- processStatResults(results, statInfoHistory, statResultHistory,
statMaxKbpses, combinedStates, lastStats, toData.Get(), errorCount, dsStats,
lastStatEndTimes, lastStatDurations, statUnpolledCaches, monitorConfig.Get(),
precomputedData, lastResults, localStates, events, localCacheStatus,
combineState, cfg.CachePollingProtocol)
+ processStatResults(results, statInfoHistory, statResultHistory,
statMaxKbpses, combinedStates, lastStats, toData.Get(), dsStats,
lastStatEndTimes, lastStatDurations, statUnpolledCaches, monitorConfig.Get(),
precomputedData, lastResults, localStates, events, localCacheStatus,
combineState, cfg.CachePollingProtocol)
}
go func() {
@@ -241,7 +240,6 @@ func processStatResults(
combinedStatesThreadsafe peer.CRStatesThreadsafe,
lastStats threadsafe.LastStats,
toData todata.TOData,
- errorCount threadsafe.Uint,
dsStats threadsafe.DSStats,
lastStatEndTimes map[tc.CacheName]time.Time,
lastStatDurationsThreadsafe threadsafe.DurationMap,
@@ -307,15 +305,10 @@ func processStatResults(
lastStatsVal := lastStats.Get()
lastStatsCopy := lastStatsVal.Copy()
- newDsStats, err := ds.CreateStats(precomputedData, toData,
combinedStates, lastStatsCopy, time.Now(), mc, events, localStates)
+ newDsStats := ds.CreateStats(precomputedData, toData, combinedStates,
lastStatsCopy, mc, events, localStates)
- if err != nil {
- errorCount.Inc()
- log.Errorf("getting deliveryservice: %v\n", err)
- } else {
- dsStats.Set(*newDsStats)
- lastStats.Set(*lastStatsCopy)
- }
+ dsStats.Set(*newDsStats)
+ lastStats.Set(*lastStatsCopy)
pollerName := "stat"
health.CalcAvailability(results, pollerName,
&statResultHistoryThreadsafe, mc, toData, localCacheStatusThreadsafe,
localStates, events, pollingProtocol)