(trafficserver) branch master updated: escalate plugin: handle origin not reachable (#11847)

bneradt Thu, 12 Dec 2024 14:56:30 -0800

This is an automated email from the ASF dual-hosted git repository.

bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git



The following commit(s) were added to refs/heads/master by this push:
     new ec6fceec3b escalate plugin: handle origin not reachable (#11847)
ec6fceec3b is described below

commit ec6fceec3bec8844b462edff61ab55a2dd8b131d
Author: Brian Neradt <[email protected]>
AuthorDate: Thu Dec 12 16:56:19 2024 -0600

    escalate plugin: handle origin not reachable (#11847)
    
    Update the escalate plugin to handle dispatching to the failover server
    if the original server is down.
    
    This also adds a new autest for the escalate plugin.
    
    Fixes: #11836
---
 plugins/escalate/escalate.cc                       | 102 ++++++++------
 .../pluginTest/escalate/escalate.test.py           | 130 ++++++++++++++++++
 .../escalate/escalate_failover.replay.yaml         | 126 ++++++++++++++++++
 .../escalate/escalate_original.replay.yaml         | 147 +++++++++++++++++++++
 4 files changed, 468 insertions(+), 37 deletions(-)

diff --git a/plugins/escalate/escalate.cc b/plugins/escalate/escalate.cc
index ba0f394d29..0f1104ffff 100644
--- a/plugins/escalate/escalate.cc
+++ b/plugins/escalate/escalate.cc
@@ -30,6 +30,9 @@
 #include <iterator>
 #include <map>
 
+#include "swoc/IPEndpoint.h"
+#include "swoc/TextView.h"
+
 // Constants and some declarations
 
 const char PLUGIN_NAME[] = "escalate";
@@ -72,12 +75,19 @@ struct EscalationState {
 char *
 MakeEscalateUrl(TSMBuffer mbuf, TSMLoc url, const char *host, size_t host_len, 
int &url_len)
 {
-  char *url_str = nullptr;
-
+  swoc::TextView   input_host_view{host, host_len};
+  std::string_view host_view;
+  std::string_view port_view;
+  swoc::IPEndpoint::tokenize(input_host_view, &host_view, &port_view);
   // Update the request URL with the new Host to try.
-  TSUrlHostSet(mbuf, url, host, host_len);
-  url_str = TSUrlStringGet(mbuf, url, &url_len);
-  Dbg(dbg_ctl, "Setting new URL to %.*s", url_len, url_str);
+  TSUrlHostSet(mbuf, url, host_view.data(), host_view.size());
+  if (port_view.size()) {
+    int const port_int = swoc::svtou(port_view);
+    TSUrlPortSet(mbuf, url, port_int);
+    Dbg(dbg_ctl, "Setting port to %d", port_int);
+  }
+  char *url_str = TSUrlStringGet(mbuf, url, &url_len);
+  Dbg(dbg_ctl, "Setting new URL from configured %.*s to %.*s", (int)host_len, 
host, url_len, url_str);
 
   return url_str;
 }
@@ -88,58 +98,76 @@ MakeEscalateUrl(TSMBuffer mbuf, TSMLoc url, const char 
*host, size_t host_len, i
 static int
 EscalateResponse(TSCont cont, TSEvent event, void *edata)
 {
-  TSHttpTxn                                      txn = 
static_cast<TSHttpTxn>(edata);
-  EscalationState                               *es  = 
static_cast<EscalationState *>(TSContDataGet(cont));
-  EscalationState::StatusMapType::const_iterator entry;
-  TSMBuffer                                      mbuf;
-  TSMLoc                                         hdrp, url;
-  TSHttpStatus                                   status;
-  char                                          *url_str = nullptr;
-  int                                            url_len, tries;
-
-  TSAssert(event == TS_EVENT_HTTP_READ_RESPONSE_HDR);
-
-  // First, we need the server response ...
-  if (TS_SUCCESS != TSHttpTxnServerRespGet(txn, &mbuf, &hdrp)) {
-    goto no_action;
+  TSHttpTxn        txn = static_cast<TSHttpTxn>(edata);
+  EscalationState *es  = static_cast<EscalationState *>(TSContDataGet(cont));
+  TSMBuffer        mbuf;
+  TSMLoc           hdrp, url;
+
+  TSAssert(event == TS_EVENT_HTTP_READ_RESPONSE_HDR || event == 
TS_EVENT_HTTP_SEND_RESPONSE_HDR);
+  bool const processing_connection_error = (event == 
TS_EVENT_HTTP_SEND_RESPONSE_HDR);
+
+  if (processing_connection_error) {
+    TSServerState const state = TSHttpTxnServerStateGet(txn);
+    if (state == TS_SRVSTATE_CONNECTION_ALIVE) {
+      // There is no connection error, so nothing to do.
+      TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
+      return TS_EVENT_NONE;
+    }
   }
 
-  tries = TSHttpTxnRedirectRetries(txn);
+  int const tries = TSHttpTxnRedirectRetries(txn);
   if (0 != tries) { // ToDo: Future support for more than one retry-URL
-    goto no_action;
+    Dbg(dbg_ctl, "Not pursuing failover due previous redirect already, num 
tries: %d", tries);
+    TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
+    return TS_EVENT_NONE;
+  }
+
+  int ret = 0;
+  if (processing_connection_error) {
+    ret = TSHttpTxnClientRespGet(txn, &mbuf, &hdrp);
+  } else {
+    ret = TSHttpTxnServerRespGet(txn, &mbuf, &hdrp);
+  }
+  if (TS_SUCCESS != ret) {
+    TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
+    return TS_EVENT_NONE;
   }
-  Dbg(dbg_ctl, "This is try %d, proceeding", tries);
 
   // Next, the response status ...
-  status = TSHttpHdrStatusGet(mbuf, hdrp);
-  TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp); // Don't need this any more
+  TSHttpStatus const status = TSHttpHdrStatusGet(mbuf, hdrp);
+  TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
 
-  // See if we have an escalation retry config for this response code
-  entry = es->status_map.find(static_cast<unsigned>(status));
+  // See if we have an escalation retry config for this response code.
+  auto const entry = es->status_map.find(status);
   if (entry == es->status_map.end()) {
-    goto no_action;
+    TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
+    return TS_EVENT_NONE;
   }
-
-  Dbg(dbg_ctl, "Found an entry for HTTP status %u", 
static_cast<unsigned>(status));
-  if (EscalationState::RETRY_URL == entry->second.type) {
-    url_str = TSstrdup(entry->second.target.c_str());
-    url_len = entry->second.target.size();
+  EscalationState::RetryInfo const &retry_info = entry->second;
+
+  Dbg(dbg_ctl, "Handling failover redirect for HTTP status %d", status);
+  char const *url_str = nullptr;
+  int         url_len = 0;
+  if (EscalationState::RETRY_URL == retry_info.type) {
+    url_str = TSstrdup(retry_info.target.c_str());
+    url_len = retry_info.target.size();
     Dbg(dbg_ctl, "Setting new URL to %.*s", url_len, url_str);
-  } else if (EscalationState::RETRY_HOST == entry->second.type) {
+  } else if (EscalationState::RETRY_HOST == retry_info.type) {
     if (es->use_pristine) {
       if (TS_SUCCESS == TSHttpTxnPristineUrlGet(txn, &mbuf, &url)) {
-        url_str = MakeEscalateUrl(mbuf, url, entry->second.target.c_str(), 
entry->second.target.size(), url_len);
+        url_str = MakeEscalateUrl(mbuf, url, retry_info.target.c_str(), 
retry_info.target.size(), url_len);
         TSHandleMLocRelease(mbuf, TS_NULL_MLOC, url);
       }
     } else {
       if (TS_SUCCESS == TSHttpTxnClientReqGet(txn, &mbuf, &hdrp)) {
         if (TS_SUCCESS == TSHttpHdrUrlGet(mbuf, hdrp, &url)) {
-          url_str = MakeEscalateUrl(mbuf, url, entry->second.target.c_str(), 
entry->second.target.size(), url_len);
+          url_str = MakeEscalateUrl(mbuf, url, retry_info.target.c_str(), 
retry_info.target.size(), url_len);
         }
         // Release the request MLoc
         TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
       }
     }
+    Dbg(dbg_ctl, "Setting host URL to %.*s", url_len, url_str);
   }
 
   // Now update the Redirect URL, if set
@@ -147,8 +175,7 @@ EscalateResponse(TSCont cont, TSEvent event, void *edata)
     TSHttpTxnRedirectUrlSet(txn, url_str, url_len); // Transfers ownership
   }
 
-// Set the transaction free ...
-no_action:
+  // Set the transaction free ...
   TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
   return TS_EVENT_NONE;
 }
@@ -229,5 +256,6 @@ TSRemapDoRemap(void *instance, TSHttpTxn txn, 
TSRemapRequestInfo * /* rri */)
   EscalationState *es = static_cast<EscalationState *>(instance);
 
   TSHttpTxnHookAdd(txn, TS_HTTP_READ_RESPONSE_HDR_HOOK, es->cont);
+  TSHttpTxnHookAdd(txn, TS_HTTP_SEND_RESPONSE_HDR_HOOK, es->cont);
   return TSREMAP_NO_REMAP;
 }
diff --git a/tests/gold_tests/pluginTest/escalate/escalate.test.py 
b/tests/gold_tests/pluginTest/escalate/escalate.test.py
new file mode 100644
index 0000000000..68c4e7a915
--- /dev/null
+++ b/tests/gold_tests/pluginTest/escalate/escalate.test.py
@@ -0,0 +1,130 @@
+'''
+'''
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import os
+from ports import get_port
+
+Test.Summary = '''
+Test the escalate plugin.
+'''
+
+Test.SkipUnless(Condition.PluginExists('escalate.so'))
+
+
+class EscalateTest:
+    """
+    Test the escalate plugin.
+    """
+
+    _replay_original_file: str = 'escalate_original.replay.yaml'
+    _replay_failover_file: str = 'escalate_failover.replay.yaml'
+
+    def __init__(self):
+        '''Configure the test run.'''
+        tr = Test.AddTestRun('Test escalate plugin.')
+        self._setup_dns(tr)
+        self._setup_servers(tr)
+        self._setup_ts(tr)
+        self._setup_client(tr)
+
+    def _setup_dns(self, tr: 'Process') -> None:
+        '''Set up the DNS server.
+
+        :param tr: The test run to add the DNS server to.
+        '''
+        self._dns = tr.MakeDNServer(f"dns", default='127.0.0.1')
+
+    def _setup_servers(self, tr: 'Process') -> None:
+        '''Set up the origin and failover servers.
+
+        :param tr: The test run to add the servers to.
+        '''
+        tr.Setup.Copy(self._replay_original_file)
+        tr.Setup.Copy(self._replay_failover_file)
+        self._server_origin = tr.AddVerifierServerProcess(f"server_origin", 
self._replay_original_file)
+        self._server_failover = 
tr.AddVerifierServerProcess(f"server_failover", self._replay_failover_file)
+
+        self._server_origin.Streams.All += Testers.ContainsExpression(
+            'uuid: GET', "Verify the origin server received the GET request.")
+        self._server_origin.Streams.All += Testers.ContainsExpression(
+            'uuid: GET_chunked', "Verify the origin server GET request for 
chunked content.")
+        self._server_origin.Streams.All += Testers.ContainsExpression(
+            'uuid: GET_failed', "Verify the origin server received the GET 
request that it returns a 502 with.")
+        self._server_origin.Streams.All += Testers.ExcludesExpression(
+            'uuid: GET_down_origin', "Verify the origin server did not receive 
the down origin request.")
+
+        self._server_failover.Streams.All += Testers.ContainsExpression(
+            'uuid: GET_failed', "Verify the failover server received the 
failed GET request.")
+        self._server_failover.Streams.All += Testers.ContainsExpression(
+            'uuid: GET_down_origin', "Verify the failover server received the 
GET request for the down origin.")
+
+        self._server_failover.Streams.All += Testers.ExcludesExpression(
+            'x-request: first', "Verify the failover server did not receive 
the GET request.")
+        self._server_failover.Streams.All += Testers.ExcludesExpression(
+            'uuid: GET_chunked', "Verify the failover server did not receive 
the GET request for chunked content.")
+
+    def _setup_ts(self, tr: 'Process') -> None:
+        '''Set up Traffic Server.
+
+        :param tr: The test run to add Traffic Server to.
+        '''
+        self._ts = tr.MakeATSProcess(f"ts", enable_cache=False)
+        # Select a port that is guaranteed to not be used at the moment.
+        dead_port = get_port(self._ts, "dead_port")
+        self._ts.Disk.records_config.update(
+            {
+                'proxy.config.diags.debug.enabled': 1,
+                'proxy.config.diags.debug.tags': 'http|escalate',
+                'proxy.config.dns.nameservers': 
f'127.0.0.1:{self._dns.Variables.Port}',
+                'proxy.config.dns.resolv_conf': 'NULL',
+                'proxy.config.http.redirect.actions': 'self:follow',
+                'proxy.config.http.number_of_redirections': 4,
+            })
+        self._ts.Disk.remap_config.AddLines(
+            [
+                f'map http://origin.server.com 
http://backend.origin.server.com:{self._server_origin.Variables.http_port} '
+                f'@plugin=escalate.so 
@pparam=500,502:failover.server.com:{self._server_failover.Variables.http_port}',
+
+                # Now create remap entries for the multiplexed hosts: one that
+                # verifies HTTP, and another that verifies HTTPS.
+                f'map http://down_origin.server.com 
http://backend.down_origin.server.com:{dead_port} '
+                f'@plugin=escalate.so 
@pparam=500,502:failover.server.com:{self._server_failover.Variables.http_port} 
',
+            ])
+
+    def _setup_client(self, tr: 'Process') -> None:
+        '''Set up the client.
+
+        :param tr: The test run to add the client to.
+        '''
+        client = tr.AddVerifierClientProcess(f"client", 
self._replay_original_file, http_ports=[self._ts.Variables.port])
+        client.StartBefore(self._dns)
+        client.StartBefore(self._server_origin)
+        client.StartBefore(self._server_failover)
+        client.StartBefore(self._ts)
+
+        client.Streams.All += Testers.ExcludesExpression(r'\[ERROR\]', 'Verify 
there were no errors in the replay.')
+        client.Streams.All += Testers.ExcludesExpression('400 Bad', 'Verify 
none of the 400 responses make it to the client.')
+        client.Streams.All += Testers.ExcludesExpression('502 Bad', 'Verify 
none of the 502 responses make it to the client.')
+        client.Streams.All += Testers.ExcludesExpression('500 Internal', 
'Verify none of the 500 responses make it to the client.')
+        client.Streams.All += Testers.ContainsExpression('x-response: first', 
'Verify that the first response was received.')
+        client.Streams.All += Testers.ContainsExpression('x-response: second', 
'Verify that the second response was received.')
+        client.Streams.All += Testers.ContainsExpression('x-response: third', 
'Verify that the third response was received.')
+        client.Streams.All += Testers.ContainsExpression('x-response: fourth', 
'Verify that the fourth response was received.')
+
+
+EscalateTest()
diff --git a/tests/gold_tests/pluginTest/escalate/escalate_failover.replay.yaml 
b/tests/gold_tests/pluginTest/escalate/escalate_failover.replay.yaml
new file mode 100644
index 0000000000..3adffddeda
--- /dev/null
+++ b/tests/gold_tests/pluginTest/escalate/escalate_failover.replay.yaml
@@ -0,0 +1,126 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+meta:
+  version: "1.0"
+
+sessions:
+- transactions:
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, first ]
+        - [ uuid, GET ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: first, as: equal } ]
+
+    server-response:
+      # The failover server should not receive this request since the original
+      # server should handle it.
+      status: 400
+      reason: Bad Request
+      headers:
+        fields:
+        - [ Content-Length, 0 ]
+
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_chunked
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, second ]
+        - [ uuid, GET_chunked ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: second, as: equal } ]
+
+    server-response:
+      # The failover server should not receive this request since the original
+      # server should handle it.
+      status: 400
+      reason: Bad Request
+      headers:
+        fields:
+        - [ Content-Length, 0 ]
+
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_failed
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, third ]
+        - [ uuid, GET_failed ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: third, as: equal } ]
+
+    server-response:
+      status: 200
+      reason: OK
+      headers:
+        fields:
+        - [ Content-Length, 320000 ]
+        - [ X-Response, third ]
+
+  # This will not make it to the origin server since the Host is set to a
+  # non-responsive server. But the failover server should reply with a 200 OK.
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_down
+      headers:
+        fields:
+        - [ Host, down_origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, fourth ]
+        - [ uuid, GET_down_origin ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: fourth, as: equal } ]
+
+    server-response:
+      status: 200
+      reason: OK
+      headers:
+        fields:
+        - [ Transfer-Encoding, chunked ]
+        - [ X-Response, fourth ]
+      content:
+        size: 320000
diff --git a/tests/gold_tests/pluginTest/escalate/escalate_original.replay.yaml 
b/tests/gold_tests/pluginTest/escalate/escalate_original.replay.yaml
new file mode 100644
index 0000000000..898464bee2
--- /dev/null
+++ b/tests/gold_tests/pluginTest/escalate/escalate_original.replay.yaml
@@ -0,0 +1,147 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+meta:
+  version: "1.0"
+
+sessions:
+- transactions:
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, first ]
+        - [ uuid, GET ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: first, as: equal } ]
+
+    server-response:
+      status: 200
+      reason: OK
+      headers:
+        fields:
+        - [ Content-Length, 320000 ]
+        - [ X-Response, first ]
+
+    proxy-response:
+      status: 200
+      headers:
+        fields:
+        - [ X-Response, { value: first, as: equal } ]
+
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_chunked
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, second ]
+        - [ uuid, GET_chunked ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: second, as: equal } ]
+
+    server-response:
+      status: 200
+      reason: OK
+      headers:
+        fields:
+        - [ Transfer-Encoding, chunked ]
+        - [ X-Response, second ]
+      content:
+        size: 320000
+
+    proxy-response:
+      status: 200
+      headers:
+        fields:
+        - [ X-Response, { value: second, as: equal } ]
+
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_failed
+      headers:
+        fields:
+        - [ Host, origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, third ]
+        - [ uuid, GET_failed ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: third, as: equal } ]
+
+    server-response:
+      status: 502
+      reason: Bad Gateway
+      headers:
+        fields:
+        - [ Content-Length, 0 ]
+
+    proxy-response:
+      # The failover server should reply with a 200 OK.
+      status: 200
+      headers:
+        fields:
+        - [ X-Response, { value: third, as: equal } ]
+
+  # This will not make it to the origin server since the Host is set to a
+  # non-responsive server. But the failover server should reply with a 200 OK.
+  - client-request:
+      method: "GET"
+      version: "1.1"
+      url: /path/get_down
+      headers:
+        fields:
+        - [ Host, down_origin.server.com ]
+        - [ Content-Length, 0 ]
+        - [ X-Request, fourth ]
+        - [ uuid, GET_down_origin ]
+
+    proxy-request:
+      method: "GET"
+      headers:
+        fields:
+        - [ X-Request, { value: fourth, as: equal } ]
+
+    server-response:
+      status: 200
+      reason: OK
+      headers:
+        fields:
+        - [ Content-Length, 320000 ]
+
+    proxy-response:
+      status: 200
+      headers:
+        fields:
+        - [ X-Response, { value: fourth, as: equal } ]

(trafficserver) branch master updated: escalate plugin: handle origin not reachable (#11847)

Reply via email to