This is an automated email from the ASF dual-hosted git repository.

zrhoffman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git


The following commit(s) were added to refs/heads/master by this push:
     new 16dda64  Make TM recover from TO being unavailable at startup (#6146)
16dda64 is described below

commit 16dda648671ad438fcb8b6b08eaf6fbeb2fa9589
Author: Rawlin Peters <[email protected]>
AuthorDate: Mon Aug 30 09:32:11 2021 -0600

    Make TM recover from TO being unavailable at startup (#6146)
    
    * Make TM recover from TO being unavailable at startup
    
    Always set non-nil TO sessions that can eventually authenticate
    themselves when TO recovers. Additionally, instead of exclusively using
    one major client version after starting up and logging in, always
    attempt the latest major version and fall back to the legacy version in
    case of error. This will allow TM to seamlessly transition between using
    either major version no matter which order TM and TO are upgraded in.
    
    Closes: #6129
    
    * Organize imports, add UsingDummyTO back
---
 CHANGELOG.md                                       |   1 +
 traffic_monitor/handler/handler.go                 |   2 +-
 traffic_monitor/manager/opsconfig.go               |   4 +-
 traffic_monitor/towrap/towrap.go                   | 144 ++++++++++++++-------
 .../{handler/handler.go => towrap/towrap_test.go}  |  33 ++---
 5 files changed, 111 insertions(+), 73 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d327bdf..c17d888 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,7 @@ The format is based on [Keep a 
Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Fixed
 - [#5690](https://github.com/apache/trafficcontrol/issues/5690) - Fixed github 
action for added/modified db migration file.
 - [#2471](https://github.com/apache/trafficcontrol/issues/2471) - A PR check 
to ensure added db migration file is the latest.
+- [#6129](https://github.com/apache/trafficcontrol/issues/6129) - Traffic 
Monitor start doesn't recover when Traffic Ops is unavailable
 - [#5609](https://github.com/apache/trafficcontrol/issues/5609) - Fixed GET 
/servercheck filter for an extra query param.
 - [#5954](https://github.com/apache/trafficcontrol/issues/5954) - Traffic Ops 
HTTP response write errors are ignored
 - [#6048](https://github.com/apache/trafficcontrol/issues/6048) - TM sometimes 
sets cachegroups to unavailable even though all caches are online
diff --git a/traffic_monitor/handler/handler.go 
b/traffic_monitor/handler/handler.go
index 88832e8..31b7d6d 100644
--- a/traffic_monitor/handler/handler.go
+++ b/traffic_monitor/handler/handler.go
@@ -40,7 +40,7 @@ type OpsConfig struct {
        HttpsListener string `json:"httpsListener"`
        CertFile      string `json:"certFile"`
        KeyFile       string `json:"keyFile"`
-       UsingDummyTO  bool   `json:"usingDummyTO"`
+       UsingDummyTO  bool   `json:"usingDummyTO"` // only used in the TM UI to 
indicate if TM started up with on-disk backup snapshots
 }
 
 type Handler interface {
diff --git a/traffic_monitor/manager/opsconfig.go 
b/traffic_monitor/manager/opsconfig.go
index 29fe954..0361825 100644
--- a/traffic_monitor/manager/opsconfig.go
+++ b/traffic_monitor/manager/opsconfig.go
@@ -39,7 +39,7 @@ import (
        "github.com/apache/trafficcontrol/traffic_monitor/todata"
        "github.com/apache/trafficcontrol/traffic_monitor/towrap"
 
-       "github.com/json-iterator/go"
+       jsoniter "github.com/json-iterator/go"
 )
 
 // StartOpsConfigManager starts the ops config manager goroutine, returning 
the (threadsafe) variables which it sets.
@@ -172,8 +172,8 @@ func StartOpsConfigManager(
                                time.Sleep(duration)
 
                                if toSession.BackupFileExists() && 
(toLoginCount >= cfg.TrafficOpsDiskRetryMax) {
-                                       log.Errorf("error instantiating Session 
with traffic_ops, backup disk files exist, creating empty traffic_ops session 
to read")
                                        newOpsConfig.UsingDummyTO = true
+                                       log.Errorf("error instantiating 
authenticated session with Traffic Ops, backup disk files exist, continuing 
with unauthenticated session")
                                        break
                                }
 
diff --git a/traffic_monitor/towrap/towrap.go b/traffic_monitor/towrap/towrap.go
index e0857e8..fb330a3 100644
--- a/traffic_monitor/towrap/towrap.go
+++ b/traffic_monitor/towrap/towrap.go
@@ -22,9 +22,12 @@ package towrap
  */
 
 import (
+       "crypto/tls"
        "errors"
        "fmt"
        "io/ioutil"
+       "net/http"
+       "net/http/cookiejar"
        "net/url"
        "os"
        "strconv"
@@ -38,6 +41,7 @@ import (
        client "github.com/apache/trafficcontrol/traffic_ops/v3-client"
 
        jsoniter "github.com/json-iterator/go"
+       "golang.org/x/net/publicsuffix"
 )
 
 const localHostIP = "127.0.0.1"
@@ -193,7 +197,6 @@ type TrafficOpsSessionThreadsafe struct {
        m                  *sync.Mutex
        lastCRConfig       ByteMapCache
        crConfigHist       CRConfigHistoryThreadsafe
-       useLegacy          bool
        CRConfigBackupFile string
        TMConfigBackupFile string
 }
@@ -209,17 +212,13 @@ func NewTrafficOpsSessionThreadsafe(s *client.Session, ls 
*legacyClient.Session,
                session:            &s,
                legacySession:      &ls,
                TMConfigBackupFile: cfg.TMConfigBackupFile,
-               useLegacy:          false,
        }
 }
 
 // Initialized tells whether or not the TrafficOpsSessionThreadsafe has been
-// properly initialized (by calling 'Update').
+// properly initialized with non-nil sessions.
 func (s TrafficOpsSessionThreadsafe) Initialized() bool {
-       if s.useLegacy {
-               return s.legacySession != nil && *s.legacySession != nil
-       }
-       return s.session != nil && *s.session != nil
+       return s.session != nil && *s.session != nil && s.legacySession != nil 
&& *s.legacySession != nil
 }
 
 // Update updates the TrafficOpsSessionThreadsafe's connection information with
@@ -240,24 +239,70 @@ func (s *TrafficOpsSessionThreadsafe) Update(
        s.m.Lock()
        defer s.m.Unlock()
 
+       // always set unauthenticated sessions first which can eventually 
authenticate themselves when attempting requests
+       if err := s.setSession(url, username, password, insecure, userAgent, 
useCache, timeout); err != nil {
+               return err
+       }
+       if err := s.setLegacySession(url, username, password, insecure, 
userAgent, useCache, timeout); err != nil {
+               return err
+       }
+
        session, _, err := client.LoginWithAgent(url, username, password, 
insecure, userAgent, useCache, timeout)
        if err != nil {
-               log.Errorf("Error logging in using up-to-date client: %v", err)
+               log.Errorf("logging in using up-to-date client: %v", err)
                legacySession, _, err := legacyClient.LoginWithAgent(url, 
username, password, insecure, userAgent, useCache, timeout)
                if err != nil || legacySession == nil {
                        err = fmt.Errorf("logging in using legacy client: %v", 
err)
                        return err
                }
                *s.legacySession = legacySession
-               s.useLegacy = true
        } else {
                *s.session = session
-               s.useLegacy = false
        }
 
        return nil
 }
 
+// setSession sets the session for the up-to-date client without logging in.
+func (s *TrafficOpsSessionThreadsafe) setSession(url, username, password 
string, insecure bool, userAgent string, useCache bool, timeout time.Duration) 
error {
+       options := cookiejar.Options{
+               PublicSuffixList: publicsuffix.List,
+       }
+       jar, err := cookiejar.New(&options)
+       if err != nil {
+               return err
+       }
+       to := client.NewSession(username, password, url, userAgent, 
&http.Client{
+               Timeout: timeout,
+               Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: 
insecure},
+               },
+               Jar: jar,
+       }, useCache)
+       *s.session = to
+       return nil
+}
+
+// setSession sets the session for the legacy client without logging in.
+func (s *TrafficOpsSessionThreadsafe) setLegacySession(url, username, password 
string, insecure bool, userAgent string, useCache bool, timeout time.Duration) 
error {
+       options := cookiejar.Options{
+               PublicSuffixList: publicsuffix.List,
+       }
+       jar, err := cookiejar.New(&options)
+       if err != nil {
+               return err
+       }
+       to := legacyClient.NewSession(username, password, url, userAgent, 
&http.Client{
+               Timeout: timeout,
+               Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: 
insecure},
+               },
+               Jar: jar,
+       }, useCache)
+       *s.legacySession = to
+       return nil
+}
+
 // getThreadsafeSession is used internally to get a copy of the session 
pointer,
 // or nil if it doesn't exist. This should not be used outside
 // TrafficOpsSessionThreadsafe, and never stored, because part of the purpose 
of
@@ -334,44 +379,47 @@ func (s TrafficOpsSessionThreadsafe) CRConfigRaw(cdn 
string) ([]byte, error) {
        var remoteAddr string
        var err error
        var data []byte
+       var legacyReqInf legacyClient.ReqInf
 
-       if s.useLegacy {
-               ss := s.getLegacy()
-               if ss == nil {
+       ss := s.get()
+       if ss == nil {
+               return nil, ErrNilSession
+       }
+       data, reqInf, err := ss.GetCRConfig(cdn)
+       if reqInf.RemoteAddr != nil {
+               remoteAddr = reqInf.RemoteAddr.String()
+       }
+       if err != nil {
+               log.Warnln("getting CRConfig from Traffic Ops using up-to-date 
client: " + err.Error() + ". Retrying with legacy client")
+               ls := s.getLegacy()
+               if ls == nil {
                        return nil, ErrNilSession
                }
-               b, reqInf, e := ss.GetCRConfig(cdn)
-               err = e
-               data = b
-               if reqInf.RemoteAddr != nil {
-                       remoteAddr = reqInf.RemoteAddr.String()
+               data, legacyReqInf, err = ls.GetCRConfig(cdn)
+               if legacyReqInf.RemoteAddr != nil {
+                       remoteAddr = legacyReqInf.RemoteAddr.String()
                }
-       } else {
-               ss := s.get()
-               if ss == nil {
-                       return nil, ErrNilSession
-               }
-               b, reqInf, e := ss.GetCRConfig(cdn)
-               err = e
-               data = b
-               if reqInf.RemoteAddr != nil {
-                       remoteAddr = reqInf.RemoteAddr.String()
+               if err != nil {
+                       log.Errorln("getting CRConfig from Traffic Ops using 
legacy client: " + err.Error() + ". Checking for backup")
                }
        }
 
        if err == nil {
-               ioutil.WriteFile(s.CRConfigBackupFile, data, 0644)
+               log.Infoln("successfully got CRConfig from Traffic Ops. Writing 
to backup file")
+               if wErr := ioutil.WriteFile(s.CRConfigBackupFile, data, 0644); 
wErr != nil {
+                       log.Errorf("failed to write CRConfig backup file: %v", 
wErr)
+               }
        } else {
                if s.BackupFileExists() {
                        log.Errorln("using backup file for CRConfig snapshot 
due to error fetching CRConfig snapshot from Traffic Ops: " + err.Error())
                        data, err = ioutil.ReadFile(s.CRConfigBackupFile)
                        if err != nil {
-                               return nil, fmt.Errorf("file Read Error: %v", 
err)
+                               return nil, fmt.Errorf("reading CRConfig backup 
file: %v", err)
                        }
                        remoteAddr = localHostIP
                        err = nil
                } else {
-                       return nil, fmt.Errorf("Failed to get CRConfig from 
Traffic Ops (%v), and there is no backup file", err)
+                       return nil, fmt.Errorf("failed to get CRConfig from 
Traffic Ops (%v), and there is no backup file", err)
                }
        }
 
@@ -383,11 +431,6 @@ func (s TrafficOpsSessionThreadsafe) CRConfigRaw(cdn 
string) ([]byte, error) {
        }
        defer s.crConfigHist.Add(hist)
 
-       // TODO: per the above logic, I don't think this is possible
-       if err != nil {
-               return data, err
-       }
-
        crc := &tc.CRConfig{}
        json := jsoniter.ConfigFastest
        if err = json.Unmarshal(data, crc); err != nil {
@@ -438,6 +481,9 @@ func (s TrafficOpsSessionThreadsafe) 
fetchLegacyTMConfig(cdn string) (*tc.Traffi
        }
 
        m, _, e := ss.GetTrafficMonitorConfig(cdn)
+       if m == nil {
+               return nil, e
+       }
        return m.Upgrade(), e
 }
 
@@ -451,20 +497,20 @@ func (s TrafficOpsSessionThreadsafe) 
trafficMonitorConfigMapRaw(cdn string) (*tc
        var configMap *tc.TrafficMonitorConfigMap
        var err error
 
-       if s.useLegacy {
+       config, err = s.fetchTMConfig(cdn)
+       if err != nil {
+               log.Warnln("getting Traffic Monitor config from Traffic Ops 
using up-to-date client: " + err.Error() + ". Retrying with legacy client")
                config, err = s.fetchLegacyTMConfig(cdn)
-       } else {
-               config, err = s.fetchTMConfig(cdn)
-       }
-
-       if config == nil {
                if err != nil {
-                       return nil, fmt.Errorf("getting Traffic Monitor 
configuration map: %v", err)
+                       log.Errorln("getting Traffic Monitor config from 
Traffic Ops using legacy client: " + err.Error())
                }
-               return nil, errors.New("nil configMap after fetching")
        }
 
        if err == nil {
+               log.Infoln("successfully got Traffic Monitor config from 
Traffic Ops")
+               if config == nil {
+                       return nil, fmt.Errorf("nil Traffic Monitor config 
after successful fetch")
+               }
                configMap, err = tc.TrafficMonitorTransformToMap(config)
        }
 
@@ -491,7 +537,9 @@ func (s TrafficOpsSessionThreadsafe) 
trafficMonitorConfigMapRaw(cdn string) (*tc
        json := jsoniter.ConfigFastest
        data, err := json.Marshal(*config)
        if err == nil {
-               ioutil.WriteFile(s.TMConfigBackupFile, data, 0644)
+               if wErr := ioutil.WriteFile(s.TMConfigBackupFile, data, 0644); 
wErr != nil {
+                       log.Errorf("failed to write TM config backup file: %v", 
wErr)
+               }
        }
 
        return configMap, err
@@ -613,10 +661,10 @@ func (s TrafficOpsSessionThreadsafe) MonitorCDN(hostName 
string) (string, error)
        var server tc.ServerV30
        var err error
 
-       if s.useLegacy {
+       server, err = s.fetchServerByHostname(hostName)
+       if err != nil {
+               log.Warnln("getting server by hostname '" + hostName + "' using 
up-to-date client: " + err.Error() + ". Retrying with legacy client")
                server, err = s.fetchLegacyServerByHostname(hostName)
-       } else {
-               server, err = s.fetchServerByHostname(hostName)
        }
 
        if err != nil {
diff --git a/traffic_monitor/handler/handler.go 
b/traffic_monitor/towrap/towrap_test.go
similarity index 56%
copy from traffic_monitor/handler/handler.go
copy to traffic_monitor/towrap/towrap_test.go
index 88832e8..e8482f3 100644
--- a/traffic_monitor/handler/handler.go
+++ b/traffic_monitor/towrap/towrap_test.go
@@ -1,4 +1,4 @@
-package handler
+package towrap
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one
@@ -20,29 +20,18 @@ package handler
  */
 
 import (
-       "io"
+       "testing"
        "time"
-)
 
-const (
-       NOTIFY_NEVER = iota
-       NOTIFY_CHANGE
-       NOTIFY_ALWAYS
+       "github.com/apache/trafficcontrol/traffic_monitor/config"
 )
 
-type OpsConfig struct {
-       Username      string `json:"username"`
-       Password      string `json:"password"`
-       Url           string `json:"url"`
-       Insecure      bool   `json:"insecure"`
-       CdnName       string `json:"cdnName"`
-       HttpListener  string `json:"httpListener"`
-       HttpsListener string `json:"httpsListener"`
-       CertFile      string `json:"certFile"`
-       KeyFile       string `json:"keyFile"`
-       UsingDummyTO  bool   `json:"usingDummyTO"`
-}
-
-type Handler interface {
-       Handle(string, io.Reader, string, time.Duration, time.Time, error, 
uint64, bool, interface{}, chan<- uint64)
+func TestTrafficOpsSessionThreadsafeUpdateSetsNonNilSessions(t *testing.T) {
+       s := NewTrafficOpsSessionThreadsafe(nil, nil, 5, config.Config{})
+       err := s.Update("", "", "", true, "", false, 10*time.Second)
+       if err == nil {
+               t.Error("expected an error, got nil")
+       } else if s.session == nil || *s.session == nil || s.legacySession == 
nil || *s.legacySession == nil {
+               t.Errorf("expected non-nil sessions after getting error from 
Update()")
+       }
 }

Reply via email to