This is an automated email from the ASF dual-hosted git repository.
zrhoffman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git
The following commit(s) were added to refs/heads/master by this push:
new 16dda64 Make TM recover from TO being unavailable at startup (#6146)
16dda64 is described below
commit 16dda648671ad438fcb8b6b08eaf6fbeb2fa9589
Author: Rawlin Peters <[email protected]>
AuthorDate: Mon Aug 30 09:32:11 2021 -0600
Make TM recover from TO being unavailable at startup (#6146)
* Make TM recover from TO being unavailable at startup
Always set non-nil TO sessions that can eventually authenticate
themselves when TO recovers. Additionally, instead of exclusively using
one major client version after starting up and logging in, always
attempt the latest major version and fall back to the legacy version in
case of error. This will allow TM to seamlessly transition between using
either major version no matter which order TM and TO are upgraded in.
Closes: #6129
* Organize imports, add UsingDummyTO back
---
CHANGELOG.md | 1 +
traffic_monitor/handler/handler.go | 2 +-
traffic_monitor/manager/opsconfig.go | 4 +-
traffic_monitor/towrap/towrap.go | 144 ++++++++++++++-------
.../{handler/handler.go => towrap/towrap_test.go} | 33 ++---
5 files changed, 111 insertions(+), 73 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d327bdf..c17d888 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,7 @@ The format is based on [Keep a
Changelog](http://keepachangelog.com/en/1.0.0/).
### Fixed
- [#5690](https://github.com/apache/trafficcontrol/issues/5690) - Fixed github
action for added/modified db migration file.
- [#2471](https://github.com/apache/trafficcontrol/issues/2471) - A PR check
to ensure added db migration file is the latest.
+- [#6129](https://github.com/apache/trafficcontrol/issues/6129) - Traffic
Monitor start doesn't recover when Traffic Ops is unavailable
- [#5609](https://github.com/apache/trafficcontrol/issues/5609) - Fixed GET
/servercheck filter for an extra query param.
- [#5954](https://github.com/apache/trafficcontrol/issues/5954) - Traffic Ops
HTTP response write errors are ignored
- [#6048](https://github.com/apache/trafficcontrol/issues/6048) - TM sometimes
sets cachegroups to unavailable even though all caches are online
diff --git a/traffic_monitor/handler/handler.go
b/traffic_monitor/handler/handler.go
index 88832e8..31b7d6d 100644
--- a/traffic_monitor/handler/handler.go
+++ b/traffic_monitor/handler/handler.go
@@ -40,7 +40,7 @@ type OpsConfig struct {
HttpsListener string `json:"httpsListener"`
CertFile string `json:"certFile"`
KeyFile string `json:"keyFile"`
- UsingDummyTO bool `json:"usingDummyTO"`
+ UsingDummyTO bool `json:"usingDummyTO"` // only used in the TM UI to
indicate if TM started up with on-disk backup snapshots
}
type Handler interface {
diff --git a/traffic_monitor/manager/opsconfig.go
b/traffic_monitor/manager/opsconfig.go
index 29fe954..0361825 100644
--- a/traffic_monitor/manager/opsconfig.go
+++ b/traffic_monitor/manager/opsconfig.go
@@ -39,7 +39,7 @@ import (
"github.com/apache/trafficcontrol/traffic_monitor/todata"
"github.com/apache/trafficcontrol/traffic_monitor/towrap"
- "github.com/json-iterator/go"
+ jsoniter "github.com/json-iterator/go"
)
// StartOpsConfigManager starts the ops config manager goroutine, returning
the (threadsafe) variables which it sets.
@@ -172,8 +172,8 @@ func StartOpsConfigManager(
time.Sleep(duration)
if toSession.BackupFileExists() &&
(toLoginCount >= cfg.TrafficOpsDiskRetryMax) {
- log.Errorf("error instantiating Session
with traffic_ops, backup disk files exist, creating empty traffic_ops session
to read")
newOpsConfig.UsingDummyTO = true
+ log.Errorf("error instantiating
authenticated session with Traffic Ops, backup disk files exist, continuing
with unauthenticated session")
break
}
diff --git a/traffic_monitor/towrap/towrap.go b/traffic_monitor/towrap/towrap.go
index e0857e8..fb330a3 100644
--- a/traffic_monitor/towrap/towrap.go
+++ b/traffic_monitor/towrap/towrap.go
@@ -22,9 +22,12 @@ package towrap
*/
import (
+ "crypto/tls"
"errors"
"fmt"
"io/ioutil"
+ "net/http"
+ "net/http/cookiejar"
"net/url"
"os"
"strconv"
@@ -38,6 +41,7 @@ import (
client "github.com/apache/trafficcontrol/traffic_ops/v3-client"
jsoniter "github.com/json-iterator/go"
+ "golang.org/x/net/publicsuffix"
)
const localHostIP = "127.0.0.1"
@@ -193,7 +197,6 @@ type TrafficOpsSessionThreadsafe struct {
m *sync.Mutex
lastCRConfig ByteMapCache
crConfigHist CRConfigHistoryThreadsafe
- useLegacy bool
CRConfigBackupFile string
TMConfigBackupFile string
}
@@ -209,17 +212,13 @@ func NewTrafficOpsSessionThreadsafe(s *client.Session, ls
*legacyClient.Session,
session: &s,
legacySession: &ls,
TMConfigBackupFile: cfg.TMConfigBackupFile,
- useLegacy: false,
}
}
// Initialized tells whether or not the TrafficOpsSessionThreadsafe has been
-// properly initialized (by calling 'Update').
+// properly initialized with non-nil sessions.
func (s TrafficOpsSessionThreadsafe) Initialized() bool {
- if s.useLegacy {
- return s.legacySession != nil && *s.legacySession != nil
- }
- return s.session != nil && *s.session != nil
+ return s.session != nil && *s.session != nil && s.legacySession != nil
&& *s.legacySession != nil
}
// Update updates the TrafficOpsSessionThreadsafe's connection information with
@@ -240,24 +239,70 @@ func (s *TrafficOpsSessionThreadsafe) Update(
s.m.Lock()
defer s.m.Unlock()
+ // always set unauthenticated sessions first which can eventually
authenticate themselves when attempting requests
+ if err := s.setSession(url, username, password, insecure, userAgent,
useCache, timeout); err != nil {
+ return err
+ }
+ if err := s.setLegacySession(url, username, password, insecure,
userAgent, useCache, timeout); err != nil {
+ return err
+ }
+
session, _, err := client.LoginWithAgent(url, username, password,
insecure, userAgent, useCache, timeout)
if err != nil {
- log.Errorf("Error logging in using up-to-date client: %v", err)
+ log.Errorf("logging in using up-to-date client: %v", err)
legacySession, _, err := legacyClient.LoginWithAgent(url,
username, password, insecure, userAgent, useCache, timeout)
if err != nil || legacySession == nil {
err = fmt.Errorf("logging in using legacy client: %v",
err)
return err
}
*s.legacySession = legacySession
- s.useLegacy = true
} else {
*s.session = session
- s.useLegacy = false
}
return nil
}
+// setSession sets the session for the up-to-date client without logging in.
+func (s *TrafficOpsSessionThreadsafe) setSession(url, username, password
string, insecure bool, userAgent string, useCache bool, timeout time.Duration)
error {
+ options := cookiejar.Options{
+ PublicSuffixList: publicsuffix.List,
+ }
+ jar, err := cookiejar.New(&options)
+ if err != nil {
+ return err
+ }
+ to := client.NewSession(username, password, url, userAgent,
&http.Client{
+ Timeout: timeout,
+ Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify:
insecure},
+ },
+ Jar: jar,
+ }, useCache)
+ *s.session = to
+ return nil
+}
+
+// setSession sets the session for the legacy client without logging in.
+func (s *TrafficOpsSessionThreadsafe) setLegacySession(url, username, password
string, insecure bool, userAgent string, useCache bool, timeout time.Duration)
error {
+ options := cookiejar.Options{
+ PublicSuffixList: publicsuffix.List,
+ }
+ jar, err := cookiejar.New(&options)
+ if err != nil {
+ return err
+ }
+ to := legacyClient.NewSession(username, password, url, userAgent,
&http.Client{
+ Timeout: timeout,
+ Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify:
insecure},
+ },
+ Jar: jar,
+ }, useCache)
+ *s.legacySession = to
+ return nil
+}
+
// getThreadsafeSession is used internally to get a copy of the session
pointer,
// or nil if it doesn't exist. This should not be used outside
// TrafficOpsSessionThreadsafe, and never stored, because part of the purpose
of
@@ -334,44 +379,47 @@ func (s TrafficOpsSessionThreadsafe) CRConfigRaw(cdn
string) ([]byte, error) {
var remoteAddr string
var err error
var data []byte
+ var legacyReqInf legacyClient.ReqInf
- if s.useLegacy {
- ss := s.getLegacy()
- if ss == nil {
+ ss := s.get()
+ if ss == nil {
+ return nil, ErrNilSession
+ }
+ data, reqInf, err := ss.GetCRConfig(cdn)
+ if reqInf.RemoteAddr != nil {
+ remoteAddr = reqInf.RemoteAddr.String()
+ }
+ if err != nil {
+ log.Warnln("getting CRConfig from Traffic Ops using up-to-date
client: " + err.Error() + ". Retrying with legacy client")
+ ls := s.getLegacy()
+ if ls == nil {
return nil, ErrNilSession
}
- b, reqInf, e := ss.GetCRConfig(cdn)
- err = e
- data = b
- if reqInf.RemoteAddr != nil {
- remoteAddr = reqInf.RemoteAddr.String()
+ data, legacyReqInf, err = ls.GetCRConfig(cdn)
+ if legacyReqInf.RemoteAddr != nil {
+ remoteAddr = legacyReqInf.RemoteAddr.String()
}
- } else {
- ss := s.get()
- if ss == nil {
- return nil, ErrNilSession
- }
- b, reqInf, e := ss.GetCRConfig(cdn)
- err = e
- data = b
- if reqInf.RemoteAddr != nil {
- remoteAddr = reqInf.RemoteAddr.String()
+ if err != nil {
+ log.Errorln("getting CRConfig from Traffic Ops using
legacy client: " + err.Error() + ". Checking for backup")
}
}
if err == nil {
- ioutil.WriteFile(s.CRConfigBackupFile, data, 0644)
+ log.Infoln("successfully got CRConfig from Traffic Ops. Writing
to backup file")
+ if wErr := ioutil.WriteFile(s.CRConfigBackupFile, data, 0644);
wErr != nil {
+ log.Errorf("failed to write CRConfig backup file: %v",
wErr)
+ }
} else {
if s.BackupFileExists() {
log.Errorln("using backup file for CRConfig snapshot
due to error fetching CRConfig snapshot from Traffic Ops: " + err.Error())
data, err = ioutil.ReadFile(s.CRConfigBackupFile)
if err != nil {
- return nil, fmt.Errorf("file Read Error: %v",
err)
+ return nil, fmt.Errorf("reading CRConfig backup
file: %v", err)
}
remoteAddr = localHostIP
err = nil
} else {
- return nil, fmt.Errorf("Failed to get CRConfig from
Traffic Ops (%v), and there is no backup file", err)
+ return nil, fmt.Errorf("failed to get CRConfig from
Traffic Ops (%v), and there is no backup file", err)
}
}
@@ -383,11 +431,6 @@ func (s TrafficOpsSessionThreadsafe) CRConfigRaw(cdn
string) ([]byte, error) {
}
defer s.crConfigHist.Add(hist)
- // TODO: per the above logic, I don't think this is possible
- if err != nil {
- return data, err
- }
-
crc := &tc.CRConfig{}
json := jsoniter.ConfigFastest
if err = json.Unmarshal(data, crc); err != nil {
@@ -438,6 +481,9 @@ func (s TrafficOpsSessionThreadsafe)
fetchLegacyTMConfig(cdn string) (*tc.Traffi
}
m, _, e := ss.GetTrafficMonitorConfig(cdn)
+ if m == nil {
+ return nil, e
+ }
return m.Upgrade(), e
}
@@ -451,20 +497,20 @@ func (s TrafficOpsSessionThreadsafe)
trafficMonitorConfigMapRaw(cdn string) (*tc
var configMap *tc.TrafficMonitorConfigMap
var err error
- if s.useLegacy {
+ config, err = s.fetchTMConfig(cdn)
+ if err != nil {
+ log.Warnln("getting Traffic Monitor config from Traffic Ops
using up-to-date client: " + err.Error() + ". Retrying with legacy client")
config, err = s.fetchLegacyTMConfig(cdn)
- } else {
- config, err = s.fetchTMConfig(cdn)
- }
-
- if config == nil {
if err != nil {
- return nil, fmt.Errorf("getting Traffic Monitor
configuration map: %v", err)
+ log.Errorln("getting Traffic Monitor config from
Traffic Ops using legacy client: " + err.Error())
}
- return nil, errors.New("nil configMap after fetching")
}
if err == nil {
+ log.Infoln("successfully got Traffic Monitor config from
Traffic Ops")
+ if config == nil {
+ return nil, fmt.Errorf("nil Traffic Monitor config
after successful fetch")
+ }
configMap, err = tc.TrafficMonitorTransformToMap(config)
}
@@ -491,7 +537,9 @@ func (s TrafficOpsSessionThreadsafe)
trafficMonitorConfigMapRaw(cdn string) (*tc
json := jsoniter.ConfigFastest
data, err := json.Marshal(*config)
if err == nil {
- ioutil.WriteFile(s.TMConfigBackupFile, data, 0644)
+ if wErr := ioutil.WriteFile(s.TMConfigBackupFile, data, 0644);
wErr != nil {
+ log.Errorf("failed to write TM config backup file: %v",
wErr)
+ }
}
return configMap, err
@@ -613,10 +661,10 @@ func (s TrafficOpsSessionThreadsafe) MonitorCDN(hostName
string) (string, error)
var server tc.ServerV30
var err error
- if s.useLegacy {
+ server, err = s.fetchServerByHostname(hostName)
+ if err != nil {
+ log.Warnln("getting server by hostname '" + hostName + "' using
up-to-date client: " + err.Error() + ". Retrying with legacy client")
server, err = s.fetchLegacyServerByHostname(hostName)
- } else {
- server, err = s.fetchServerByHostname(hostName)
}
if err != nil {
diff --git a/traffic_monitor/handler/handler.go
b/traffic_monitor/towrap/towrap_test.go
similarity index 56%
copy from traffic_monitor/handler/handler.go
copy to traffic_monitor/towrap/towrap_test.go
index 88832e8..e8482f3 100644
--- a/traffic_monitor/handler/handler.go
+++ b/traffic_monitor/towrap/towrap_test.go
@@ -1,4 +1,4 @@
-package handler
+package towrap
/*
* Licensed to the Apache Software Foundation (ASF) under one
@@ -20,29 +20,18 @@ package handler
*/
import (
- "io"
+ "testing"
"time"
-)
-const (
- NOTIFY_NEVER = iota
- NOTIFY_CHANGE
- NOTIFY_ALWAYS
+ "github.com/apache/trafficcontrol/traffic_monitor/config"
)
-type OpsConfig struct {
- Username string `json:"username"`
- Password string `json:"password"`
- Url string `json:"url"`
- Insecure bool `json:"insecure"`
- CdnName string `json:"cdnName"`
- HttpListener string `json:"httpListener"`
- HttpsListener string `json:"httpsListener"`
- CertFile string `json:"certFile"`
- KeyFile string `json:"keyFile"`
- UsingDummyTO bool `json:"usingDummyTO"`
-}
-
-type Handler interface {
- Handle(string, io.Reader, string, time.Duration, time.Time, error,
uint64, bool, interface{}, chan<- uint64)
+func TestTrafficOpsSessionThreadsafeUpdateSetsNonNilSessions(t *testing.T) {
+ s := NewTrafficOpsSessionThreadsafe(nil, nil, 5, config.Config{})
+ err := s.Update("", "", "", true, "", false, 10*time.Second)
+ if err == nil {
+ t.Error("expected an error, got nil")
+ } else if s.session == nil || *s.session == nil || s.legacySession ==
nil || *s.legacySession == nil {
+ t.Errorf("expected non-nil sessions after getting error from
Update()")
+ }
}