[
https://issues.apache.org/jira/browse/TS-3455?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Luca Bruno updated TS-3455:
---------------------------
Description:
I've written a simple test case plugin for demonstrating this problem, not sure
if it's a problem on my side, but that would also mean that the regex
invalidate plugin would also abort().
What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then set
it to STALE.
To reproduce:
1) Send a first cacheable request to ATS, which gets cached.
2) Request again the same url, the plugin triggers and set the cache to STALE.
Then ATS does abort().
Plugin code:
{noformat}
#include <ts/ts.h>
#include <ts/remap.h>
#include <ts/experimental.h>
#include <stdlib.h>
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include <string>
#include <iterator>
#include <map>
const char PLUGIN_NAME[] = "maybebug";
static int Handler(TSCont cont, TSEvent event, void *edata);
struct PluginState {
PluginState()
{
cont = TSContCreate(Handler, NULL);
TSContDataSet(cont, this);
}
~PluginState()
{
TSContDestroy(cont);
}
TSCont cont;
};
static int Handler(TSCont cont, TSEvent event, void* edata) {
TSHttpTxn txn = (TSHttpTxn)edata;
if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) {
int lookup_status;
if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) {
TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status);
if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) {
TSDebug(PLUGIN_NAME, "set stale");
TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE);
}
}
}
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
void TSPluginInit (int argc, const char *argv[]) {
TSPluginRegistrationInfo info;
info.plugin_name = strdup("cappello");
info.vendor_name = strdup("foo");
info.support_email = strdup("[email protected]");
if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) {
TSError("Plugin registration failed");
}
PluginState* state = new PluginState();
TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont);
}
{noformat}
Output:
{noformat}
[Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup complete: 0
[Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup complete:
2
[Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale
FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL`
traffic_server - STACK TRACE:
/usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d]
/usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0]
traffic_server[0x60d0aa]
traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206]
...
{noformat}
What happens in gdb is that HandleCacheOpenReadHit is called twice in the same
request. The first time s->pending_work is NULL, the second time it's not NULL.
The patch below fixes the problem:
{noformat}
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 0078ef1..852f285 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
// ink_release_assert(s->current.request_to == PARENT_PROXY ||
// s->http_config_param->no_dns_forward_to_parent != 0);
- // Set ourselves up to handle pending revalidate issues
- // after the PP DNS lookup
- ink_assert(s->pending_work == NULL);
- s->pending_work = issue_revalidate;
-
// We must be going a PARENT PROXY since so did
// origin server DNS lookup right after state Start
//
@@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
// missing ip but we won't take down the system
//
if (s->current.request_to == PARENT_PROXY) {
+ // Set ourselves up to handle pending revalidate issues
+ // after the PP DNS lookup
+ ink_assert(s->pending_work == NULL);
+ s->pending_work = issue_revalidate;
+
TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
} else if (s->current.request_to == ORIGIN_SERVER) {
TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
{noformat}
was:
I've written a simple test case plugin for demonstrating this problem, not sure
if it's a problem on my side, but that would also mean that the regex
invalidate plugin would also abort().
What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then set
it to STALE.
To reproduce:
1) First cacheable request to ATS gets cached
2) Request again the same url, the plugin triggers and set the cache to STALE.
Then ATS does abort().
Plugin code:
{noformat}
#include <ts/ts.h>
#include <ts/remap.h>
#include <ts/experimental.h>
#include <stdlib.h>
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include <string>
#include <iterator>
#include <map>
const char PLUGIN_NAME[] = "maybebug";
static int Handler(TSCont cont, TSEvent event, void *edata);
struct PluginState {
PluginState()
{
cont = TSContCreate(Handler, NULL);
TSContDataSet(cont, this);
}
~PluginState()
{
TSContDestroy(cont);
}
TSCont cont;
};
static int Handler(TSCont cont, TSEvent event, void* edata) {
TSHttpTxn txn = (TSHttpTxn)edata;
if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) {
int lookup_status;
if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) {
TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status);
if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) {
TSDebug(PLUGIN_NAME, "set stale");
TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE);
}
}
}
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
void TSPluginInit (int argc, const char *argv[]) {
TSPluginRegistrationInfo info;
info.plugin_name = strdup("cappello");
info.vendor_name = strdup("foo");
info.support_email = strdup("[email protected]");
if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) {
TSError("Plugin registration failed");
}
PluginState* state = new PluginState();
TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont);
}
{noformat}
Output:
{noformat}
[Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup complete: 0
[Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup complete:
2
[Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale
FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL`
traffic_server - STACK TRACE:
/usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d]
/usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0]
traffic_server[0x60d0aa]
traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206]
...
{noformat}
What happens in gdb is that HandleCacheOpenReadHit is called twice in the same
request. The first time s->pending_work is NULL, the second time it's not NULL.
The patch below fixes the problem:
{noformat}
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 0078ef1..852f285 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
// ink_release_assert(s->current.request_to == PARENT_PROXY ||
// s->http_config_param->no_dns_forward_to_parent != 0);
- // Set ourselves up to handle pending revalidate issues
- // after the PP DNS lookup
- ink_assert(s->pending_work == NULL);
- s->pending_work = issue_revalidate;
-
// We must be going a PARENT PROXY since so did
// origin server DNS lookup right after state Start
//
@@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
// missing ip but we won't take down the system
//
if (s->current.request_to == PARENT_PROXY) {
+ // Set ourselves up to handle pending revalidate issues
+ // after the PP DNS lookup
+ ink_assert(s->pending_work == NULL);
+ s->pending_work = issue_revalidate;
+
TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
} else if (s->current.request_to == ORIGIN_SERVER) {
TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
{noformat}
> Marking the cache STALE in lookup-complete causes abort()
> ---------------------------------------------------------
>
> Key: TS-3455
> URL: https://issues.apache.org/jira/browse/TS-3455
> Project: Traffic Server
> Issue Type: Bug
> Reporter: Luca Bruno
>
> I've written a simple test case plugin for demonstrating this problem, not
> sure if it's a problem on my side, but that would also mean that the regex
> invalidate plugin would also abort().
> What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then
> set it to STALE.
> To reproduce:
> 1) Send a first cacheable request to ATS, which gets cached.
> 2) Request again the same url, the plugin triggers and set the cache to
> STALE. Then ATS does abort().
> Plugin code:
> {noformat}
> #include <ts/ts.h>
> #include <ts/remap.h>
> #include <ts/experimental.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <getopt.h>
> #include <string.h>
> #include <string>
> #include <iterator>
> #include <map>
> const char PLUGIN_NAME[] = "maybebug";
> static int Handler(TSCont cont, TSEvent event, void *edata);
> struct PluginState {
> PluginState()
> {
> cont = TSContCreate(Handler, NULL);
> TSContDataSet(cont, this);
> }
> ~PluginState()
> {
> TSContDestroy(cont);
> }
> TSCont cont;
> };
> static int Handler(TSCont cont, TSEvent event, void* edata) {
> TSHttpTxn txn = (TSHttpTxn)edata;
> if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) {
> int lookup_status;
> if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) {
> TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status);
> if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) {
> TSDebug(PLUGIN_NAME, "set stale");
> TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE);
> }
> }
> }
> TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
> return TS_EVENT_NONE;
> }
> void TSPluginInit (int argc, const char *argv[]) {
> TSPluginRegistrationInfo info;
> info.plugin_name = strdup("cappello");
> info.vendor_name = strdup("foo");
> info.support_email = strdup("[email protected]");
> if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) {
> TSError("Plugin registration failed");
> }
> PluginState* state = new PluginState();
> TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont);
> }
> {noformat}
> Output:
> {noformat}
> [Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup
> complete: 0
> [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup
> complete: 2
> [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale
> FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL`
> traffic_server - STACK TRACE:
> /usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d]
> /usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0]
> traffic_server[0x60d0aa]
> traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206]
> ...
> {noformat}
> What happens in gdb is that HandleCacheOpenReadHit is called twice in the
> same request. The first time s->pending_work is NULL, the second time it's
> not NULL.
> The patch below fixes the problem:
> {noformat}
> diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
> index 0078ef1..852f285 100644
> --- a/proxy/http/HttpTransact.cc
> +++ b/proxy/http/HttpTransact.cc
> @@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
> // ink_release_assert(s->current.request_to == PARENT_PROXY ||
> // s->http_config_param->no_dns_forward_to_parent != 0);
>
> - // Set ourselves up to handle pending revalidate issues
> - // after the PP DNS lookup
> - ink_assert(s->pending_work == NULL);
> - s->pending_work = issue_revalidate;
> -
> // We must be going a PARENT PROXY since so did
> // origin server DNS lookup right after state Start
> //
> @@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
> // missing ip but we won't take down the system
> //
> if (s->current.request_to == PARENT_PROXY) {
> + // Set ourselves up to handle pending revalidate issues
> + // after the PP DNS lookup
> + ink_assert(s->pending_work == NULL);
> + s->pending_work = issue_revalidate;
> +
> TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
> } else if (s->current.request_to == ORIGIN_SERVER) {
> TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)