Hello,

attached patch is preliminary implementation of selective zone flush.


Implementation is not so straight-forward as I want to see. Before discussing the patch itself - can we consider per-zone caches? In that case, we can simply deallocate whole per-zone RBT and we are done.

Pros:
* Potentially better concurrency, simpler code, much less corner cases.

Cons:
* We have to look into Zone register before searching the cache.
* It can limit concurrency ... with many extra small zones? I'm not sure.

------------

Now we can dive into all the gory details of single-tree-cache implementation:

Function discard_zone_from_cache() contains a long comment about potential problems, please send me your opinions.

Functions dbg_* can be simply deleted after end of testing.


I encountered some questions about locking:
How I should lock these two locks properly?
        RWLOCK(&zr->rwlock, isc_rwlocktype_read);
        LOCK(&cache->mutex);

AFAIK without some more intelligent algorithm or locking protocol it can simply deadlock if two threads attempt to get both locks in different order.

For now I chosen isc_task_beginexclusive() way. Hopefully, zone flush should be rare operation so it can be enough.

It raises another question:
Is it possible for a thread to hold some lock during isc_task_beginexclusive()?

I mean this situation:
thread 1: lock(&cache->mutex)
thread 1: store a pointer to the middle of the cache
thread 2: isc_task_beginexclusive()
thread 2: do something with cache
thread 2: isc_task_endexclusive()
thread 1: dereference stored pointer -> CRASH - thread 2 changed the data and pointer is invalid ... but thread 1 held the lock!

I'm not really sure about this part of BIND. My guess:
During "exclusive mode" all threads except single one (= thread which called isc_task_beginexclusive()) are blocked somewhere near dispatch() ... so they do nothing and thus they should not hold any lock.

Is my guess correct? I looked into task.c and related code but I can't say "I understood!" :-(


Now the funny part - RBT tree before and after per-zone flush.

Expected behaviour when removing zone 'test.'
=============================================
// cache tree before "test." zone removal
. (empty node)
                    4.34.10.in-addr.arpa
                        89.4.34.10.in-addr.arpa
            brq.redhat.com
                pspacek.brq.redhat.com
                test.brq.redhat.com
    test
        e.test
            _kerberos.e.test
            _tcp.e.test (empty node)
                _kerberos._tcp.e.test
                _kerberos-master._tcp.e.test
                _kpasswd._tcp.e.test
                _ldap._tcp.e.test
            _udp.e.test (empty node)
                _kerberos._udp.e.test
                _kerberos-master._udp.e.test
                _kpasswd._udp.e.test
                _ntp._udp.e.test
            c182.e.test
            pspacek.e.test
            test.e.test
        rec.test
        sec.test
        sub.test
            ns.sub.test
            rec.sub.test


// cache tree after 'test.' zone removal
// zones 'e.test.', 'sub.test.' and 'sec.test.' are still present
// record 'rec.test.' disappeared
. (empty node)
                    4.34.10.in-addr.arpa
                        89.4.34.10.in-addr.arpa
            brq.redhat.com
                pspacek.brq.redhat.com
                test.brq.redhat.com
    test (empty node)
        e.test
            _kerberos.e.test
            _tcp.e.test (empty node)
                _kerberos._tcp.e.test
                _kerberos-master._tcp.e.test
                _kpasswd._tcp.e.test
                _ldap._tcp.e.test
            _udp.e.test (empty node)
                _kerberos._udp.e.test
                _kerberos-master._udp.e.test
                _kpasswd._udp.e.test
                _ntp._udp.e.test
            c182.e.test
            pspacek.e.test
            test.e.test
        sec.test
        sub.test
            ns.sub.test
            rec.sub.test

--
Petr^2 Spacek
From 91df0bb70398f985d6b13c282672c9a87cf98f41 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Thu, 15 Nov 2012 18:32:00 +0100
Subject: [PATCH] [WIP] Flush whole zone from cache during zone
 renaming/removal.

Signed-off-by: Petr Spacek <pspa...@redhat.com>
---
 src/cache.c       | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/cache.h       |   5 ++
 src/ldap_helper.c |   4 +-
 3 files changed, 249 insertions(+), 1 deletion(-)

diff --git a/src/cache.c b/src/cache.c
index 898d48b291a83da7f77dbcf79e2bd3e7ff8281aa..5f969f33d911bd4f111059beeb3e0dd920fba226 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -23,19 +23,23 @@
 #include <isc/result.h>
 #include <isc/time.h>
 #include <isc/util.h>
+#include <isc/rwlock.h>
+#include <isc/task.h>
 
 #include <dns/rbt.h>
 #include <dns/result.h>
 #include <dns/log.h>
+#include <dns/fixedname.h>
 
 #include <string.h>
 
 #include "cache.h"
 #include "ldap_helper.h"
 #include "log.h"
 #include "rdlist.h"
 #include "settings.h"
 #include "util.h"
+#include "zone_register.h"
 
 struct ldap_cache {
 	isc_mutex_t	mutex; /* TODO: RWLOCK? */
@@ -51,6 +55,70 @@ typedef struct {
 	isc_time_t		valid_until;
 } cache_node_t;
 
+/************** Use following functions only for debug purposes **************/
+void
+dbg_print_name_indent(dns_name_t *name) {
+	int label_count;
+
+	label_count = dns_name_countlabels(name);
+	label_count -= 1; /* root is not indented */
+	label_count *= 4; /* indentation for single domain level */
+
+	printf("%2$*1$s", label_count, "");
+}
+
+void
+dbg_print_name(dns_name_t *name) {
+	char printbuff[DNS_NAME_FORMATSIZE];
+
+	dns_name_format(name, printbuff, DNS_NAME_FORMATSIZE);
+	printf("%s", printbuff);
+}
+
+void
+dbg_print_rbt_names(isc_mem_t *mctx, dns_rbt_t *rbt) {
+	isc_result_t result = ISC_R_SUCCESS;
+	dns_rbtnodechain_t chain;
+
+	dns_rbtnodechain_init(&chain, mctx);
+	result = dns_rbtnodechain_first(&chain, rbt, NULL, NULL);
+	while (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
+		dns_fixedname_t name;
+		dns_fixedname_t origin;
+		dns_fixedname_t concat;
+		dns_fixedname_init(&name);
+		dns_fixedname_init(&origin);
+		dns_fixedname_init(&concat);
+		dns_rbtnode_t *node = NULL;
+		char *node_desc = "";
+
+		result = dns_rbtnodechain_current(&chain, dns_fixedname_name(&name),
+						  dns_fixedname_name(&origin), &node);
+		if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
+			break;
+
+		CHECK(dns_name_concatenate(dns_fixedname_name(&name),
+					   dns_fixedname_name(&origin),
+					   dns_fixedname_name(&concat),
+					   NULL));
+		dbg_print_name_indent(dns_fixedname_name(&concat));
+		dbg_print_name(dns_fixedname_name(&concat));
+		if (node->data == NULL) {
+			node_desc = " (empty node)";
+		}
+		printf("%s\n", node_desc);
+
+		result = dns_rbtnodechain_next(&chain, NULL, NULL);
+		if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
+			break;
+	}
+	dns_rbtnodechain_invalidate(&chain);
+
+cleanup:
+	return;
+}
+/************************ End of debug only functions ************************/
+
 static void
 cache_node_deleter(void *data, void *deleter_arg)
 {
@@ -179,6 +247,7 @@ ldap_cache_getrdatalist(isc_mem_t *mctx, ldap_cache_t *cache,
 		return ISC_R_NOTFOUND;
 
 	LOCK(&cache->mutex);
+	//dbg_print_rbt_names(cache->mctx, cache->rbt);
 	result = dns_rbt_findname(cache->rbt, name, 0, NULL, (void *)&node);
 	switch (result) {
 	case ISC_R_SUCCESS:
@@ -304,6 +373,178 @@ discard_from_cache(ldap_cache_t *cache, dns_name_t *name)
 	return result;
 }
 
+/**
+ * Delete all records associated with single DNS zone from the cache.
+ *
+ * Each individual record in the cache is tested and deleted only if:
+ * 1) Cache record belongs to deleted zone (specified by del_zone parameter).
+ *    and at same time
+ * 2) Cache record doesn't belong to some *sub*-zone of deleted zone
+ *    (according to zone register).
+ *
+ * Task will be switched to and from exclusive mode automatically.
+ *
+ * @param[in] del_zone Absolute DNS name of deleted zone
+ * @param[in] task     Task which will be locked prior the cache RBT iteration
+ *
+ * @retval ISC_R_SUCCESS All records beloging to del_zone were deleted.
+ * @retval other         Any error including ISC_R_NOMEMORY.
+ *
+ * @attention
+ * There is a case when some records wouldn't be deleted.
+ * Let's have two master zones 'test' and 'sub.test' with following records:
+ *
+ * @attention
+ * zone 'test.':
+ * - @      SOA
+ * - sub    NS  ns.sub
+ * - ns.sub A   1.2.3.4
+ * - eg.sub TXT "blah blah"
+ *
+ * @attention
+ * zone 'sub.test.':
+ * - @      SOA
+ * - @      NS  ns
+ * - ns     A   1.2.3.4
+ *
+ * @attention
+ * In that case deletion of zone 'test.' woudn't delete records 'sub.test.',
+ * 'ns.sub.test.' and 'eg.sub.test.' from cache because zone 'sub.test.'
+ * is present in ZR.
+ *
+ * @attention
+ * Records in zone 'test.' with names ending with 'sub.test.' are not
+ * authoritative and have to be exactly same as in zone 'sub.test'
+ * (see http://tools.ietf.org/html/rfc1034#section-4.2.1)
+ *
+ * @warning
+ * For reasons stated above record 'eg.sub.test.' in zone 'test.'
+ * should not exist at all and should never appear in cache.
+ */
+isc_result_t
+discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
+			dns_name_t *del_zone, isc_task_t *task)
+{
+	isc_result_t result;
+	dns_rbtnodechain_t chain;
+	dns_fixedname_t name;
+	dns_fixedname_t origin;
+	dns_fixedname_t concat;
+	void *nodedata = NULL;
+	dns_rbt_t *zr_rbt = NULL;
+	dns_fixedname_t zr_foundname;
+	dns_namelist_t del_names_list;
+	dns_name_t *del_name = NULL;
+	isc_result_t lock_status = ISC_R_IGNORE;
+
+	REQUIRE(cache != NULL);
+	REQUIRE(dns_name_isabsolute(del_zone));
+
+	dns_fixedname_init(&name);
+	dns_fixedname_init(&origin);
+	dns_fixedname_init(&concat);
+	dns_rbtnodechain_init(&chain, cache->mctx);
+	ISC_LIST_INIT(del_names_list);
+
+	if (cache->rbt == NULL)
+		CLEANUP_WITH(ISC_R_NOMORE);
+
+	lock_status = isc_task_beginexclusive(task);
+	RUNTIME_CHECK(lock_status == ISC_R_SUCCESS ||
+		      lock_status == ISC_R_LOCKBUSY);
+
+	/* ???? can somebody hold some locks during isc_task_beginexclusive()?
+	RWLOCK(&zr->rwlock, isc_rwlocktype_read);
+	LOCK(&cache->mutex);
+	*/
+
+	/* Iterate over cache RBT and remember names for deletion. */
+	zr_rbt = zr_get_rbt(zr);
+	result = dns_rbtnodechain_first(&chain,
+					cache->rbt,
+					dns_fixedname_name(&name),
+					dns_fixedname_name(&origin));
+	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
+		nodedata = NULL;
+
+		CHECK(dns_name_concatenate(dns_fixedname_name(&name),
+					   dns_fixedname_name(&origin),
+					   dns_fixedname_name(&concat),
+					   NULL));
+
+		if (dns_name_issubdomain(dns_fixedname_name(&concat), del_zone)) {
+			dns_fixedname_init(&zr_foundname);
+			result = dns_rbt_findname(zr_rbt,
+						  dns_fixedname_name(&concat),
+						  DNS_RBTFIND_NOOPTIONS,
+						  dns_fixedname_name(&zr_foundname),
+						  &nodedata);
+
+			/* Delete record if no sub-domain of del_zone was found
+			 * in zone register. */
+			if (result == ISC_R_NOTFOUND ||
+			   ((result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) &&
+			    (dns_name_equal(dns_fixedname_name(&zr_foundname), del_zone) ||
+			     !dns_name_issubdomain(dns_fixedname_name(&zr_foundname), del_zone)))			    ) {
+				del_name = isc_mem_get(cache->mctx, sizeof(dns_name_t));
+				if (del_name == NULL)
+					CLEANUP_WITH(ISC_R_NOMEMORY);
+				dns_name_init(del_name, NULL);
+				ISC_LIST_APPEND(del_names_list, del_name, link);
+				CHECK(dns_name_dup(dns_fixedname_name(&concat),
+						   cache->mctx, del_name));
+			} else if (result != ISC_R_SUCCESS &&
+				   result != DNS_R_PARTIALMATCH)
+					goto cleanup;
+		}
+
+		result = dns_rbtnodechain_next(&chain, dns_fixedname_name(&name),
+				  dns_fixedname_name(&origin));
+	}
+
+cleanup:
+	if (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
+		log_error_r("cache flush failed during 'sieve' phase");
+		goto cleanup;
+	} else {
+		result = ISC_R_SUCCESS;
+	}
+
+	dbg_print_rbt_names(cache->mctx, cache->rbt);
+
+	/* free all memory - even in case of error */
+	while (!ISC_LIST_EMPTY(del_names_list)) {
+		del_name = ISC_LIST_HEAD(del_names_list);
+		ISC_LIST_UNLINK(del_names_list, del_name, link);
+		if (result == ISC_R_SUCCESS) {
+			result = dns_rbt_deletename(cache->rbt, del_name,
+						    ISC_FALSE);
+			if (result == ISC_R_NOTFOUND)
+				result = ISC_R_SUCCESS;
+			else if (result != ISC_R_SUCCESS)
+				log_error_r("unable to delete name from cache");
+		}
+		if (dns_name_dynamic(del_name))
+			dns_name_free(del_name, cache->mctx);
+		isc_mem_put(cache->mctx, del_name, sizeof(dns_name_t));
+	}
+
+	dbg_print_rbt_names(cache->mctx, cache->rbt);
+
+	if (lock_status == ISC_R_SUCCESS)
+		isc_task_endexclusive(task);
+
+
+	/* ???? can somebody hold some locks during isc_task_beginexclusive()?
+	if (cache->rbt != NULL) {
+		UNLOCK(&cache->mutex);
+		RWUNLOCK(&zr->rwlock, isc_rwlocktype_read);
+	}
+	*/
+
+	return result;
+}
+
 isc_result_t
 flush_ldap_cache(ldap_cache_t *cache)
 {
diff --git a/src/cache.h b/src/cache.h
index a7aa5b7e889d9e195484a11dcf4f9a10d811f623..63f0d5ea727121af898d7f8651067d2433385ffc 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -23,6 +23,7 @@
 #define _LD_CACHE_H_
 
 #include "types.h"
+#include "zone_register.h"
 
 typedef struct ldap_cache ldap_cache_t;
 
@@ -77,6 +78,10 @@ ldap_cache_enabled(ldap_cache_t *cache);
 isc_result_t
 discard_from_cache(ldap_cache_t *cache, dns_name_t *name);
 
+isc_result_t
+discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
+			dns_name_t *del_zone, isc_task_t *task);
+
 /**
  * Discard all names from the cache and re-initialize internal RB-tree.
  * @return ISC_R_SUCCESS even if cache is disabled.
diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index 8a6f603d1393d322561a8cbb8fe4abf188c71dd0..abe19f1534c0794b69400589175fbd41937e0a59 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -870,7 +870,9 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock,
 	}
 
 	/* TODO: flush cache records belonging to deleted zone */
-	CHECK(discard_from_cache(inst->cache, name));
+	//CHECK(discard_from_cache(inst->cache, name));
+	CHECK(discard_zone_from_cache(inst->cache, inst->zone_register, name,
+				      inst->task));
 
 	result = zr_get_zone_ptr(inst->zone_register, name, &zone);
 	if (result == ISC_R_NOTFOUND || result == DNS_R_PARTIALMATCH) {
-- 
1.7.11.7

_______________________________________________
Freeipa-devel mailing list
Freeipa-devel@redhat.com
https://www.redhat.com/mailman/listinfo/freeipa-devel

Reply via email to