Hello,
attached patch is preliminary implementation of selective zone flush.
Implementation is not so straight-forward as I want to see. Before discussing
the patch itself - can we consider per-zone caches? In that case, we can
simply deallocate whole per-zone RBT and we are done.
Pros:
* Potentially better concurrency, simpler code, much less corner cases.
Cons:
* We have to look into Zone register before searching the cache.
* It can limit concurrency ... with many extra small zones? I'm not sure.
------------
Now we can dive into all the gory details of single-tree-cache implementation:
Function discard_zone_from_cache() contains a long comment about potential
problems, please send me your opinions.
Functions dbg_* can be simply deleted after end of testing.
I encountered some questions about locking:
How I should lock these two locks properly?
RWLOCK(&zr->rwlock, isc_rwlocktype_read);
LOCK(&cache->mutex);
AFAIK without some more intelligent algorithm or locking protocol it can
simply deadlock if two threads attempt to get both locks in different order.
For now I chosen isc_task_beginexclusive() way. Hopefully, zone flush should
be rare operation so it can be enough.
It raises another question:
Is it possible for a thread to hold some lock during isc_task_beginexclusive()?
I mean this situation:
thread 1: lock(&cache->mutex)
thread 1: store a pointer to the middle of the cache
thread 2: isc_task_beginexclusive()
thread 2: do something with cache
thread 2: isc_task_endexclusive()
thread 1: dereference stored pointer -> CRASH - thread 2 changed the data and
pointer is invalid ... but thread 1 held the lock!
I'm not really sure about this part of BIND. My guess:
During "exclusive mode" all threads except single one (= thread which called
isc_task_beginexclusive()) are blocked somewhere near dispatch() ... so they
do nothing and thus they should not hold any lock.
Is my guess correct? I looked into task.c and related code but I can't say "I
understood!" :-(
Now the funny part - RBT tree before and after per-zone flush.
Expected behaviour when removing zone 'test.'
=============================================
// cache tree before "test." zone removal
. (empty node)
4.34.10.in-addr.arpa
89.4.34.10.in-addr.arpa
brq.redhat.com
pspacek.brq.redhat.com
test.brq.redhat.com
test
e.test
_kerberos.e.test
_tcp.e.test (empty node)
_kerberos._tcp.e.test
_kerberos-master._tcp.e.test
_kpasswd._tcp.e.test
_ldap._tcp.e.test
_udp.e.test (empty node)
_kerberos._udp.e.test
_kerberos-master._udp.e.test
_kpasswd._udp.e.test
_ntp._udp.e.test
c182.e.test
pspacek.e.test
test.e.test
rec.test
sec.test
sub.test
ns.sub.test
rec.sub.test
// cache tree after 'test.' zone removal
// zones 'e.test.', 'sub.test.' and 'sec.test.' are still present
// record 'rec.test.' disappeared
. (empty node)
4.34.10.in-addr.arpa
89.4.34.10.in-addr.arpa
brq.redhat.com
pspacek.brq.redhat.com
test.brq.redhat.com
test (empty node)
e.test
_kerberos.e.test
_tcp.e.test (empty node)
_kerberos._tcp.e.test
_kerberos-master._tcp.e.test
_kpasswd._tcp.e.test
_ldap._tcp.e.test
_udp.e.test (empty node)
_kerberos._udp.e.test
_kerberos-master._udp.e.test
_kpasswd._udp.e.test
_ntp._udp.e.test
c182.e.test
pspacek.e.test
test.e.test
sec.test
sub.test
ns.sub.test
rec.sub.test
--
Petr^2 Spacek
From 91df0bb70398f985d6b13c282672c9a87cf98f41 Mon Sep 17 00:00:00 2001
From: Petr Spacek <[email protected]>
Date: Thu, 15 Nov 2012 18:32:00 +0100
Subject: [PATCH] [WIP] Flush whole zone from cache during zone
renaming/removal.
Signed-off-by: Petr Spacek <[email protected]>
---
src/cache.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/cache.h | 5 ++
src/ldap_helper.c | 4 +-
3 files changed, 249 insertions(+), 1 deletion(-)
diff --git a/src/cache.c b/src/cache.c
index 898d48b291a83da7f77dbcf79e2bd3e7ff8281aa..5f969f33d911bd4f111059beeb3e0dd920fba226 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -23,19 +23,23 @@
#include <isc/result.h>
#include <isc/time.h>
#include <isc/util.h>
+#include <isc/rwlock.h>
+#include <isc/task.h>
#include <dns/rbt.h>
#include <dns/result.h>
#include <dns/log.h>
+#include <dns/fixedname.h>
#include <string.h>
#include "cache.h"
#include "ldap_helper.h"
#include "log.h"
#include "rdlist.h"
#include "settings.h"
#include "util.h"
+#include "zone_register.h"
struct ldap_cache {
isc_mutex_t mutex; /* TODO: RWLOCK? */
@@ -51,6 +55,70 @@ typedef struct {
isc_time_t valid_until;
} cache_node_t;
+/************** Use following functions only for debug purposes **************/
+void
+dbg_print_name_indent(dns_name_t *name) {
+ int label_count;
+
+ label_count = dns_name_countlabels(name);
+ label_count -= 1; /* root is not indented */
+ label_count *= 4; /* indentation for single domain level */
+
+ printf("%2$*1$s", label_count, "");
+}
+
+void
+dbg_print_name(dns_name_t *name) {
+ char printbuff[DNS_NAME_FORMATSIZE];
+
+ dns_name_format(name, printbuff, DNS_NAME_FORMATSIZE);
+ printf("%s", printbuff);
+}
+
+void
+dbg_print_rbt_names(isc_mem_t *mctx, dns_rbt_t *rbt) {
+ isc_result_t result = ISC_R_SUCCESS;
+ dns_rbtnodechain_t chain;
+
+ dns_rbtnodechain_init(&chain, mctx);
+ result = dns_rbtnodechain_first(&chain, rbt, NULL, NULL);
+ while (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
+ dns_fixedname_t name;
+ dns_fixedname_t origin;
+ dns_fixedname_t concat;
+ dns_fixedname_init(&name);
+ dns_fixedname_init(&origin);
+ dns_fixedname_init(&concat);
+ dns_rbtnode_t *node = NULL;
+ char *node_desc = "";
+
+ result = dns_rbtnodechain_current(&chain, dns_fixedname_name(&name),
+ dns_fixedname_name(&origin), &node);
+ if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
+ break;
+
+ CHECK(dns_name_concatenate(dns_fixedname_name(&name),
+ dns_fixedname_name(&origin),
+ dns_fixedname_name(&concat),
+ NULL));
+ dbg_print_name_indent(dns_fixedname_name(&concat));
+ dbg_print_name(dns_fixedname_name(&concat));
+ if (node->data == NULL) {
+ node_desc = " (empty node)";
+ }
+ printf("%s\n", node_desc);
+
+ result = dns_rbtnodechain_next(&chain, NULL, NULL);
+ if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
+ break;
+ }
+ dns_rbtnodechain_invalidate(&chain);
+
+cleanup:
+ return;
+}
+/************************ End of debug only functions ************************/
+
static void
cache_node_deleter(void *data, void *deleter_arg)
{
@@ -179,6 +247,7 @@ ldap_cache_getrdatalist(isc_mem_t *mctx, ldap_cache_t *cache,
return ISC_R_NOTFOUND;
LOCK(&cache->mutex);
+ //dbg_print_rbt_names(cache->mctx, cache->rbt);
result = dns_rbt_findname(cache->rbt, name, 0, NULL, (void *)&node);
switch (result) {
case ISC_R_SUCCESS:
@@ -304,6 +373,178 @@ discard_from_cache(ldap_cache_t *cache, dns_name_t *name)
return result;
}
+/**
+ * Delete all records associated with single DNS zone from the cache.
+ *
+ * Each individual record in the cache is tested and deleted only if:
+ * 1) Cache record belongs to deleted zone (specified by del_zone parameter).
+ * and at same time
+ * 2) Cache record doesn't belong to some *sub*-zone of deleted zone
+ * (according to zone register).
+ *
+ * Task will be switched to and from exclusive mode automatically.
+ *
+ * @param[in] del_zone Absolute DNS name of deleted zone
+ * @param[in] task Task which will be locked prior the cache RBT iteration
+ *
+ * @retval ISC_R_SUCCESS All records beloging to del_zone were deleted.
+ * @retval other Any error including ISC_R_NOMEMORY.
+ *
+ * @attention
+ * There is a case when some records wouldn't be deleted.
+ * Let's have two master zones 'test' and 'sub.test' with following records:
+ *
+ * @attention
+ * zone 'test.':
+ * - @ SOA
+ * - sub NS ns.sub
+ * - ns.sub A 1.2.3.4
+ * - eg.sub TXT "blah blah"
+ *
+ * @attention
+ * zone 'sub.test.':
+ * - @ SOA
+ * - @ NS ns
+ * - ns A 1.2.3.4
+ *
+ * @attention
+ * In that case deletion of zone 'test.' woudn't delete records 'sub.test.',
+ * 'ns.sub.test.' and 'eg.sub.test.' from cache because zone 'sub.test.'
+ * is present in ZR.
+ *
+ * @attention
+ * Records in zone 'test.' with names ending with 'sub.test.' are not
+ * authoritative and have to be exactly same as in zone 'sub.test'
+ * (see http://tools.ietf.org/html/rfc1034#section-4.2.1)
+ *
+ * @warning
+ * For reasons stated above record 'eg.sub.test.' in zone 'test.'
+ * should not exist at all and should never appear in cache.
+ */
+isc_result_t
+discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
+ dns_name_t *del_zone, isc_task_t *task)
+{
+ isc_result_t result;
+ dns_rbtnodechain_t chain;
+ dns_fixedname_t name;
+ dns_fixedname_t origin;
+ dns_fixedname_t concat;
+ void *nodedata = NULL;
+ dns_rbt_t *zr_rbt = NULL;
+ dns_fixedname_t zr_foundname;
+ dns_namelist_t del_names_list;
+ dns_name_t *del_name = NULL;
+ isc_result_t lock_status = ISC_R_IGNORE;
+
+ REQUIRE(cache != NULL);
+ REQUIRE(dns_name_isabsolute(del_zone));
+
+ dns_fixedname_init(&name);
+ dns_fixedname_init(&origin);
+ dns_fixedname_init(&concat);
+ dns_rbtnodechain_init(&chain, cache->mctx);
+ ISC_LIST_INIT(del_names_list);
+
+ if (cache->rbt == NULL)
+ CLEANUP_WITH(ISC_R_NOMORE);
+
+ lock_status = isc_task_beginexclusive(task);
+ RUNTIME_CHECK(lock_status == ISC_R_SUCCESS ||
+ lock_status == ISC_R_LOCKBUSY);
+
+ /* ???? can somebody hold some locks during isc_task_beginexclusive()?
+ RWLOCK(&zr->rwlock, isc_rwlocktype_read);
+ LOCK(&cache->mutex);
+ */
+
+ /* Iterate over cache RBT and remember names for deletion. */
+ zr_rbt = zr_get_rbt(zr);
+ result = dns_rbtnodechain_first(&chain,
+ cache->rbt,
+ dns_fixedname_name(&name),
+ dns_fixedname_name(&origin));
+ while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
+ nodedata = NULL;
+
+ CHECK(dns_name_concatenate(dns_fixedname_name(&name),
+ dns_fixedname_name(&origin),
+ dns_fixedname_name(&concat),
+ NULL));
+
+ if (dns_name_issubdomain(dns_fixedname_name(&concat), del_zone)) {
+ dns_fixedname_init(&zr_foundname);
+ result = dns_rbt_findname(zr_rbt,
+ dns_fixedname_name(&concat),
+ DNS_RBTFIND_NOOPTIONS,
+ dns_fixedname_name(&zr_foundname),
+ &nodedata);
+
+ /* Delete record if no sub-domain of del_zone was found
+ * in zone register. */
+ if (result == ISC_R_NOTFOUND ||
+ ((result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) &&
+ (dns_name_equal(dns_fixedname_name(&zr_foundname), del_zone) ||
+ !dns_name_issubdomain(dns_fixedname_name(&zr_foundname), del_zone))) ) {
+ del_name = isc_mem_get(cache->mctx, sizeof(dns_name_t));
+ if (del_name == NULL)
+ CLEANUP_WITH(ISC_R_NOMEMORY);
+ dns_name_init(del_name, NULL);
+ ISC_LIST_APPEND(del_names_list, del_name, link);
+ CHECK(dns_name_dup(dns_fixedname_name(&concat),
+ cache->mctx, del_name));
+ } else if (result != ISC_R_SUCCESS &&
+ result != DNS_R_PARTIALMATCH)
+ goto cleanup;
+ }
+
+ result = dns_rbtnodechain_next(&chain, dns_fixedname_name(&name),
+ dns_fixedname_name(&origin));
+ }
+
+cleanup:
+ if (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
+ log_error_r("cache flush failed during 'sieve' phase");
+ goto cleanup;
+ } else {
+ result = ISC_R_SUCCESS;
+ }
+
+ dbg_print_rbt_names(cache->mctx, cache->rbt);
+
+ /* free all memory - even in case of error */
+ while (!ISC_LIST_EMPTY(del_names_list)) {
+ del_name = ISC_LIST_HEAD(del_names_list);
+ ISC_LIST_UNLINK(del_names_list, del_name, link);
+ if (result == ISC_R_SUCCESS) {
+ result = dns_rbt_deletename(cache->rbt, del_name,
+ ISC_FALSE);
+ if (result == ISC_R_NOTFOUND)
+ result = ISC_R_SUCCESS;
+ else if (result != ISC_R_SUCCESS)
+ log_error_r("unable to delete name from cache");
+ }
+ if (dns_name_dynamic(del_name))
+ dns_name_free(del_name, cache->mctx);
+ isc_mem_put(cache->mctx, del_name, sizeof(dns_name_t));
+ }
+
+ dbg_print_rbt_names(cache->mctx, cache->rbt);
+
+ if (lock_status == ISC_R_SUCCESS)
+ isc_task_endexclusive(task);
+
+
+ /* ???? can somebody hold some locks during isc_task_beginexclusive()?
+ if (cache->rbt != NULL) {
+ UNLOCK(&cache->mutex);
+ RWUNLOCK(&zr->rwlock, isc_rwlocktype_read);
+ }
+ */
+
+ return result;
+}
+
isc_result_t
flush_ldap_cache(ldap_cache_t *cache)
{
diff --git a/src/cache.h b/src/cache.h
index a7aa5b7e889d9e195484a11dcf4f9a10d811f623..63f0d5ea727121af898d7f8651067d2433385ffc 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -23,6 +23,7 @@
#define _LD_CACHE_H_
#include "types.h"
+#include "zone_register.h"
typedef struct ldap_cache ldap_cache_t;
@@ -77,6 +78,10 @@ ldap_cache_enabled(ldap_cache_t *cache);
isc_result_t
discard_from_cache(ldap_cache_t *cache, dns_name_t *name);
+isc_result_t
+discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
+ dns_name_t *del_zone, isc_task_t *task);
+
/**
* Discard all names from the cache and re-initialize internal RB-tree.
* @return ISC_R_SUCCESS even if cache is disabled.
diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index 8a6f603d1393d322561a8cbb8fe4abf188c71dd0..abe19f1534c0794b69400589175fbd41937e0a59 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -870,7 +870,9 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock,
}
/* TODO: flush cache records belonging to deleted zone */
- CHECK(discard_from_cache(inst->cache, name));
+ //CHECK(discard_from_cache(inst->cache, name));
+ CHECK(discard_zone_from_cache(inst->cache, inst->zone_register, name,
+ inst->task));
result = zr_get_zone_ptr(inst->zone_register, name, &zone);
if (result == ISC_R_NOTFOUND || result == DNS_R_PARTIALMATCH) {
--
1.7.11.7
_______________________________________________
Freeipa-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/freeipa-devel