Hello,

https://fedorahosted.org/bind-dyndb-ldap/ticket/128

Previously records deleted when connection to LDAP server was down were not
synchronized properly. It should work now.

I use this command to simulate broken connections and connection 
re-establishment:
$ socat tcp-listen:3899,reuseaddr,fork tcp-connect:localhost:389

It should be enough to add "ldap://$(hostname):3899" as LDAP URI to
/etc/named.conf and then simulate changes by killing and restarting socat.

Let me know if you need any assistance!

-- 
Petr^2 Spacek
From 57e87e325bbfe60709a53c8d5422339bb5f2b664 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Tue, 26 May 2015 09:32:59 +0200
Subject: [PATCH] Add functions for MetaLDAP generation number manipulation.

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/mldap.c | 38 ++++++++++++++++++++++++++++++++++++--
 src/mldap.h |  6 ++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/mldap.c b/src/mldap.c
index 28fd2296b7ba6b97d1688ccf0be8ae63bbf6c2e8..d207df52ed92975c5d678add6dadfbcedfa052b8 100644
--- a/src/mldap.c
+++ b/src/mldap.c
@@ -18,6 +18,7 @@
 #include <dns/enumclass.h>
 #include <dns/name.h>
 #include <dns/types.h>
+#include <dns/update.h>
 
 #include "ldap_entry.h"
 #include "metadb.h"
@@ -60,7 +61,7 @@ mldap_new(isc_mem_t *mctx, mldapdb_t **mldapp) {
 	isc_mem_attach(mctx, &mldap->mctx);
 
 	CHECK(metadb_new(mctx, &mldap->mdb));
-	mldap->generation = 0;
+	mldap->generation = 1;
 
 	*mldapp = mldap;
 	return result;
@@ -99,6 +100,34 @@ mldap_closeversion(mldapdb_t *mldap, isc_boolean_t commit) {
 }
 
 /**
+ * Atomically increment MetaLDAP generation number.
+ */
+void mldap_cur_generation_bump(mldapdb_t *mldap) {
+	isc_uint32_t oldgen;
+	isc_uint32_t curgen;
+	isc_uint32_t newgen;
+
+	REQUIRE(mldap != NULL);
+
+	curgen = isc_atomic_cmpxchg((isc_int32_t *)&mldap->generation, 0, 0);
+	do {
+		oldgen = curgen;
+		newgen = dns_update_soaserial(oldgen, dns_updatemethod_increment);
+		curgen = isc_atomic_cmpxchg((isc_int32_t *)&mldap->generation, oldgen, newgen);
+	} while (curgen != oldgen);
+}
+
+/**
+ * Get current MetaLDAP generation number.
+ *
+ * Generation numbers have to be compared using isc_serial_* functions.
+ */
+isc_uint32_t
+mldap_cur_generation_get(mldapdb_t *mldap) {
+	return isc_atomic_cmpxchg((isc_int32_t *)&mldap->generation, 0, 0);
+}
+
+/**
  * Convert UUID to "01234567-89ab-cdef-0123-456789abcdef.uuid.ldap." DNS name.
  *
  * @param[in]  beruuid
@@ -191,12 +220,17 @@ mldap_generation_store(mldapdb_t *mldap, metadb_node_t *node) {
 	unsigned char buff[sizeof(mldap->generation)];
 	isc_region_t region = { .base = buff, .length = sizeof(buff) };
 	dns_rdata_t rdata;
+	isc_uint32_t generation;
+
+	STATIC_ASSERT((sizeof(((mldapdb_t *)0)->generation) == sizeof(generation)), \
+		       "mldapdb_t->generation and local generation size does not match");
 
 	dns_rdata_init(&rdata);
 
 	/* Bytes should be in network-order but we do not care because:
 	 * 1) It is used only internally and always compared on this machine. */
-	memcpy(buff, &mldap->generation, sizeof(mldap->generation));
+	generation = mldap_cur_generation_get(mldap);
+	memcpy(buff, &generation, sizeof(generation));
 	dns_rdata_fromregion(&rdata, dns_rdataclass_in, dns_rdatatype_a, &region);
 	CHECK(metadb_rdata_store(&rdata, node));
 
diff --git a/src/mldap.h b/src/mldap.h
index 752ab28b2da956817ad27c9c928863bc9e601c0c..c7eb725d2c1743da6037a2dc95d6bc000ccb0928 100644
--- a/src/mldap.h
+++ b/src/mldap.h
@@ -42,4 +42,10 @@ mldap_dnsname_get(metadb_node_t *node, dns_name_t *fqdn, dns_name_t *zone);
 isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
 mldap_dnsname_store(dns_name_t *fqdn, dns_name_t *zone, metadb_node_t *node);
 
+void ATTR_NONNULLS
+mldap_cur_generation_bump(mldapdb_t *mldap);
+
+isc_uint32_t ATTR_CHECKRESULT ATTR_NONNULLS
+mldap_cur_generation_get(mldapdb_t *mldap);
+
 #endif /* SRC_MLDAP_H_ */
-- 
2.1.0

From b476041bd6a88b88cd1739e61960a666868e1b23 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Tue, 26 May 2015 09:34:58 +0200
Subject: [PATCH] Increment MetaLDAP generation number on reconnect.

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/ldap_helper.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index abda9f54e84f097214ac0ba69a5cd657a39b5862..e9d2229d5369fc9ac9d692f426a2bdab99958184 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -4588,6 +4588,7 @@ ldap_syncrepl_watcher(isc_threadarg_t arg)
 			sane_sleep(inst, 1);
 			continue;
 		}
+		mldap_cur_generation_bump(inst->mldapdb);
 
 		log_info("LDAP instance '%s' is being synchronized, "
 			 "please ignore message 'all zones loaded'",
-- 
2.1.0

From 77ecee87f551567b94bd26290c734c7feb5ed93f Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Wed, 27 May 2015 08:02:57 +0200
Subject: [PATCH] Add iterators for dead nodes in metaLDAP.

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/metadb.c |  55 ++++++++++++++++++++
 src/metadb.h |  20 ++++++++
 src/mldap.c  | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/mldap.h  |   8 +++
 4 files changed, 247 insertions(+), 1 deletion(-)

diff --git a/src/metadb.c b/src/metadb.c
index 610df5a50d5b4ce02d584e8b78f7a9bf74ab159a..f4a1dea1a3039a6120be5a4b19d319376fce9fa0 100644
--- a/src/metadb.c
+++ b/src/metadb.c
@@ -8,6 +8,7 @@
 #include <isc/util.h>
 
 #include <dns/db.h>
+#include <dns/dbiterator.h>
 #include <dns/rdatalist.h>
 #include <dns/rdatasetiter.h>
 
@@ -121,6 +122,60 @@ metadb_closeversion(metadb_t *mdb, isc_boolean_t commit) {
 	dns_db_closeversion(mdb->rbtdb, &mdb->newversion, commit);
 }
 
+void
+metadb_iterator_destroy(metadb_iter_t **miterp) {
+	metadb_iter_t *miter = NULL;
+
+	REQUIRE(miterp != NULL && *miterp != NULL);
+	miter = *miterp;
+	/* user has to deallocate state before calling destroy() */
+	INSIST(miter->state == NULL);
+
+	if (miter == NULL)
+		return;
+
+	if (miter->iter != NULL)
+		dns_dbiterator_destroy(&miter->iter);
+
+	if (miter->rbtdb != NULL) {
+		if (miter->version != NULL)
+			dns_db_closeversion(miter->rbtdb,
+					    &miter->version,
+					    ISC_FALSE);
+		dns_db_detach(&miter->rbtdb);
+	}
+
+	MEM_PUT_AND_DETACH(miter);
+	*miterp = NULL;
+}
+
+/**
+ * Create an iterator for current read-only version of metaDB.
+ */
+isc_result_t
+metadb_iterator_create(metadb_t *mdb, metadb_iter_t **miterp) {
+	isc_result_t result;
+	metadb_iter_t *miter = NULL;
+
+	REQUIRE(mdb != NULL);
+	REQUIRE(miterp != NULL && *miterp == NULL);
+
+	CHECKED_MEM_GET_PTR(mdb->mctx, miter);
+	ZERO_PTR(miter);
+
+	isc_mem_attach(mdb->mctx, &miter->mctx);
+	dns_db_attach(mdb->rbtdb, &miter->rbtdb);
+	dns_db_currentversion(miter->rbtdb, &miter->version);
+	CHECK(dns_db_createiterator(mdb->rbtdb, 0, &miter->iter));
+
+	*miterp = miter;
+	return ISC_R_SUCCESS;
+
+cleanup:
+	metadb_iterator_destroy(&miter);
+	return result;
+}
+
 /**
  * Close metaDB node and detach associated DB version. All changes will be lost
  * if this was the last reference to particular metaDB version.
diff --git a/src/metadb.h b/src/metadb.h
index d6e3490f65a4bdd616e7567f86a875415b9d4c10..a1cd1363364bcc9b40768ab294a294483984d9ec 100644
--- a/src/metadb.h
+++ b/src/metadb.h
@@ -22,7 +22,21 @@ struct metadb_node {
 	dns_dbnode_t			*dbnode;
 };
 
+/**
+ * All-in-one structure for metaDB iteration. This structure can be directly
+ * used by metaDB users. Member state can be used to store arbitrary data
+ * for the user. User is responsible for 'state' allocation and deallocation.
+ */
+struct metadb_iter {
+	isc_mem_t			*mctx;
+	dns_db_t			*rbtdb;
+	dns_dbversion_t			*version;
+	dns_dbiterator_t		*iter;
+	void				*state;
+};
+
 typedef struct metadb_node metadb_node_t;
+typedef struct metadb_iter metadb_iter_t;
 typedef struct metadb metadb_t;
 
 isc_result_t
@@ -38,6 +52,12 @@ void ATTR_NONNULLS
 metadb_closeversion(metadb_t *mdb, isc_boolean_t commit);
 
 isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
+metadb_iterator_create(metadb_t *mdb, metadb_iter_t **miterp);
+
+void ATTR_NONNULLS
+metadb_iterator_destroy(metadb_iter_t **miterp);
+
+isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
 metadb_readnode_open(metadb_t *mdb, dns_name_t *mname, metadb_node_t **nodep);
 
 isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
diff --git a/src/mldap.c b/src/mldap.c
index d207df52ed92975c5d678add6dadfbcedfa052b8..0c8327ccd7be802c9ee97838d19efb57715328fc 100644
--- a/src/mldap.c
+++ b/src/mldap.c
@@ -13,8 +13,10 @@
 #include <isc/net.h>
 #include <isc/result.h>
 #include <isc/util.h>
+#include <isc/serial.h>
 
 #include <dns/db.h>
+#include <dns/dbiterator.h>
 #include <dns/enumclass.h>
 #include <dns/name.h>
 #include <dns/types.h>
@@ -88,7 +90,6 @@ mldap_destroy(mldapdb_t **mldapp) {
 	*mldapp = NULL;
 }
 
-
 isc_result_t
 mldap_newversion(mldapdb_t *mldap) {
 	return metadb_newversion(mldap->mdb);
@@ -238,6 +239,29 @@ cleanup:
 	return result;
 }
 
+static isc_result_t
+mldap_generation_get(metadb_node_t *node, isc_uint32_t *generationp) {
+	isc_result_t result;
+	dns_rdataset_t rdataset;
+	dns_rdata_t rdata;
+	isc_region_t region;
+
+	REQUIRE(generationp != NULL);
+
+	dns_rdata_init(&rdata);
+	dns_rdataset_init(&rdataset);
+
+	CHECK(metadb_rdataset_get(node, dns_rdatatype_a, &rdataset));
+	dns_rdataset_current(&rdataset, &rdata);
+	dns_rdata_toregion(&rdata, &region);
+	memcpy(generationp, region.base, sizeof(*generationp));
+
+cleanup:
+	if (dns_rdataset_isassociated(&rdataset))
+		dns_rdataset_disassociate(&rdataset);
+	return result;
+}
+
 /**
  * FQDN and zone name are stored inside RP record type
  */
@@ -363,3 +387,142 @@ mldap_entry_delete(mldapdb_t *mldap, struct berval *uuid) {
 cleanup:
 	return result;
 }
+
+/**
+ * Start iteration over UUID's of dead nodes stored in uuid.ldap. sub-tree
+ * of metaLDAP.
+ *
+ * Dead node is a node with generation number lower than global generation
+ * number in in metaLDAP.
+ *
+ * @param[in]  mldap
+ * @param[out] iterp
+ * @param[out] uuid  Pre-allocated struct berval of size == 16 bytes.
+ *                   LDAP entry UUID of the first dead node will be filled in.
+ *
+ * @retval ISC_R_SUCCESS LDAP entry UUID of the first dead node in database
+ *                       is in uuid variable. Resulting iterp can be used for
+ *                       subsequent mldap_iter_deadnodes_next() calls.
+ * @retval ISC_R_NOMORE  There is no dead node in metaLDAP.
+ *                       Iterp and uuid are invalid.
+ * @retval other         Various errors.
+ *
+ * @warning MetaLDAP generation number cannot change during iteration.
+ *          This is safety check to prevent hard-to-debug inconsistencies.
+ */
+isc_result_t
+mldap_iter_deadnodes_start(mldapdb_t *mldap, metadb_iter_t **iterp,
+			   struct berval *uuid) {
+	isc_result_t result;
+	metadb_iter_t *iter = NULL;
+
+	REQUIRE(iterp != NULL && *iterp == NULL);
+
+	CHECK(metadb_iterator_create(mldap->mdb, &iter));
+	CHECKED_MEM_GET(mldap->mctx, iter->state, sizeof(isc_uint32_t));
+	result = dns_dbiterator_seek(iter->iter, &uuid_rootname);
+	if (result == ISC_R_NOTFOUND) /* metaLDAP is empty */
+		CLEANUP_WITH(ISC_R_NOMORE);
+	else if (result != ISC_R_SUCCESS)
+		goto cleanup;
+	/* store current generation value for sanity checking */
+	*(isc_uint32_t *)iter->state = mldap_cur_generation_get(mldap);
+
+	CHECK(mldap_iter_deadnodes_next(mldap, &iter, uuid));
+
+	*iterp = iter;
+	return result;
+
+cleanup:
+	if (iter != NULL) {
+		SAFE_MEM_PUT(mldap->mctx, iter->state, sizeof(isc_uint32_t));
+		iter->state = NULL;
+		metadb_iterator_destroy(&iter);
+	}
+
+	return result;
+}
+
+/**
+ * Continue iteration over UUID's of dead nodes stored in uuid.ldap. sub-tree
+ * of metaLDAP.
+ *
+ * @param[in]     mldap
+ * @param[in,out] iterp
+ * @param[out]    uuid  Pre-allocated struct berval of size == 16 bytes.
+ *                      LDAP entry UUID of the next dead node will be filled in.
+ *
+ * @retval ISC_R_SUCCESS LDAP entry UUID of the next dead node in database
+ *                       is in uuid variable. Resulting iterp can be used for
+ *                       subsequent mldap_iter_deadnodes_next() calls.
+ * @retval ISC_R_NOMORE  End of iteration. Iterp and uuid are no longer valid.
+ * @retval other         Various errors.
+ *
+ * @warning MetaLDAP generation number cannot change during iteration.
+ *          This is safety check to prevent hard-to-debug inconsistencies.
+ */
+isc_result_t
+mldap_iter_deadnodes_next(mldapdb_t *mldap, metadb_iter_t **iterp,
+		   struct berval *uuid) {
+	isc_result_t result;
+	dns_dbnode_t *rbt_node = NULL;
+	metadb_iter_t *iter = NULL;
+	isc_uint32_t node_generation;
+	isc_uint32_t cur_generation;
+	metadb_node_t metadb_node;
+	DECLARE_BUFFERED_NAME(name);
+	isc_region_t name_region;
+
+	REQUIRE(uuid != NULL);
+	REQUIRE(uuid->bv_len == 16 && uuid->bv_val != NULL);
+
+	INIT_BUFFERED_NAME(name);
+	iter = *iterp;
+
+	/* create fake metaDB node for use with metaDB interface */
+	metadb_node.mctx = iter->mctx;
+	metadb_node.version = iter->version;
+	metadb_node.rbtdb = iter->rbtdb;
+
+	/* skip nodes which do not belong to UUID sub-tree or are 'fresh' */
+	while (ISC_TRUE) {
+		if (rbt_node != NULL)
+			dns_db_detachnode(iter->rbtdb, &rbt_node);
+		dns_name_reset(&name);
+
+		CHECK(dns_dbiterator_next(iter->iter));
+		CHECK(dns_dbiterator_current(iter->iter, &rbt_node, &name));
+		if (dns_name_issubdomain(&name, &uuid_rootname) == ISC_FALSE)
+			continue;
+		metadb_node.dbnode = rbt_node;
+
+		INSIST(mldap_generation_get(&metadb_node, &node_generation)
+		       == ISC_R_SUCCESS);
+		cur_generation = mldap_cur_generation_get(mldap);
+		/* sanity check: generation number cannot change during iteration */
+		INSIST(*(isc_uint32_t *)(*iterp)->state == cur_generation);
+
+		if (isc_serial_lt(node_generation, cur_generation))
+			break; /* this node is from previous mLDAP generation */
+	}
+	DNS_NAME_TOREGION(&name, &name_region);
+	/* parse UUID from DNS name
+	 * "$e4113e03-03b4-11e5-b478-c78116fa7f8b\004uuid\004ldap"
+	 * - first byte of any label is length
+	 * - names derived from UUID has to have constant length */
+	INSIST(name_region.length == 37 + sizeof(uuid_rootname_ndata));
+	INSIST(name_region.base[0] == 36);
+	name_region.base[37] = '\0';
+	INSIST(uuid_parse((const char *)name_region.base + 1,
+			  *(uuid_t *)(uuid->bv_val)) == 0);
+
+cleanup:
+	if (rbt_node != NULL)
+		dns_db_detachnode(iter->rbtdb, &rbt_node);
+	if (result != ISC_R_SUCCESS) {
+		SAFE_MEM_PUT(iter->mctx, iter->state, sizeof(isc_uint32_t));
+		iter->state = NULL;
+		metadb_iterator_destroy(iterp);
+	}
+	return result;
+}
diff --git a/src/mldap.h b/src/mldap.h
index c7eb725d2c1743da6037a2dc95d6bc000ccb0928..c682d48ffbf7442ae55d3286f1b7a4b84a55e9a3 100644
--- a/src/mldap.h
+++ b/src/mldap.h
@@ -48,4 +48,12 @@ mldap_cur_generation_bump(mldapdb_t *mldap);
 isc_uint32_t ATTR_CHECKRESULT ATTR_NONNULLS
 mldap_cur_generation_get(mldapdb_t *mldap);
 
+isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
+mldap_iter_deadnodes_start(mldapdb_t *mldap, metadb_iter_t **iterp,
+			   struct berval *uuid);
+
+isc_result_t ATTR_CHECKRESULT ATTR_NONNULLS
+mldap_iter_deadnodes_next(mldapdb_t *mldap, metadb_iter_t **iterp,
+		   struct berval *uuid);
+
 #endif /* SRC_MLDAP_H_ */
-- 
2.1.0

From c727f40cae75b9f2e05f2789bade937c90202f11 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Wed, 27 May 2015 08:05:25 +0200
Subject: [PATCH] On reconnect, detect and delete RBT nodes which were removed
 from LDAP.

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/ldap_helper.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index e9d2229d5369fc9ac9d692f426a2bdab99958184..94d0d8ca88c477870c40a84ddd72e88327aa4bb8 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -4420,21 +4420,41 @@ int ldap_sync_intermediate (
 
 	isc_result_t	result;
 	ldap_instance_t *inst = ls->ls_private;
+	metadb_iter_t *mldap_iter = NULL;
+	char entryUUID_buf[16];
+	struct berval entryUUID = { .bv_len = sizeof(entryUUID_buf),
+				    .bv_val = entryUUID_buf };
 
 	UNUSED(msg);
 	UNUSED(syncUUIDs);
-	UNUSED(phase);
 
 	if (inst->exiting)
-		return LDAP_SUCCESS;
-
-	if (phase == LDAP_SYNC_CAPI_DONE) {
-		log_debug(1, "ldap_sync_intermediate RECEIVED");
-		result = sync_barrier_wait(inst->sctx, inst->db_name);
-		if (result != ISC_R_SUCCESS)
-			log_error_r("sync_barrier_wait() failed for instance '%s'",
-				    inst->db_name);
+		goto cleanup;
+
+	log_debug(1, "ldap_sync_intermediate 0x%x", phase);
+	if (phase != LDAP_SYNC_CAPI_DONE)
+		goto cleanup;
+
+	result = sync_barrier_wait(inst->sctx, inst->db_name);
+	if (result != ISC_R_SUCCESS) {
+		log_error_r("sync_barrier_wait() failed for instance '%s'",
+			    inst->db_name);
+		goto cleanup;
+	}
+
+	for (result = mldap_iter_deadnodes_start(inst->mldapdb, &mldap_iter,
+						 &entryUUID);
+	     result == ISC_R_SUCCESS;
+	     result = mldap_iter_deadnodes_next(inst->mldapdb, &mldap_iter,
+					        &entryUUID)) {
+		ldap_sync_search_entry(ls, NULL, &entryUUID,
+				       LDAP_SYNC_CAPI_DELETE);
+
 	}
+	if (result != ISC_R_SUCCESS && result != ISC_R_NOMORE)
+		log_error_r("mldap_iter_deadnodes_* failed, run rndc reload");
+
+cleanup:
 	return LDAP_SUCCESS;
 }
 
-- 
2.1.0

From 783b04c87575205388a1277da8b46a781508f4a7 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Wed, 27 May 2015 10:18:41 +0200
Subject: [PATCH] Consolidate synchronization state machine to
 sync_state_change().

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/ldap_helper.c |  2 +-
 src/syncrepl.c    | 57 ++++++++++++++++++++++++++++++++++++++++++-------------
 src/syncrepl.h    |  2 +-
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index 94d0d8ca88c477870c40a84ddd72e88327aa4bb8..d6123e7b6705ea477c49a23decbcd5e3c6ec7396 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -4504,7 +4504,7 @@ ldap_sync_prepare(ldap_instance_t *inst, settings_set_t *settings,
 	REQUIRE(inst != NULL);
 	REQUIRE(ldap_syncp != NULL && *ldap_syncp == NULL);
 
-	sync_state_reset(inst->sctx);
+	sync_state_change(inst->sctx, sync_init, ISC_TRUE);
 
 	/* Remove stale zone & journal files. */
 	CHECK(cleanup_files(inst));
diff --git a/src/syncrepl.c b/src/syncrepl.c
index 49399da4adaeeed2a7c317bf37759e4d47226b7e..cc5d66f145bd5f5f0f53b5717256a566d53420bc 100644
--- a/src/syncrepl.c
+++ b/src/syncrepl.c
@@ -119,8 +119,7 @@ finish(isc_task_t *task, isc_event_t *event) {
 	CHECK(manager_get_ldap_instance(bev->dbname, &inst));
 	log_debug(1, "sync_barrier_wait(): finish reached");
 	LOCK(&bev->sctx->mutex);
-	REQUIRE(bev->sctx->state == sync_barrier);
-	bev->sctx->state = sync_finished;
+	sync_state_change(bev->sctx, sync_finished, ISC_FALSE);
 	isc_condition_broadcast(&bev->sctx->cond);
 	UNLOCK(&bev->sctx->mutex);
 	activate_zones(task, inst);
@@ -326,13 +325,49 @@ sync_state_get(sync_ctx_t *sctx, sync_state_t *statep) {
 	UNLOCK(&sctx->mutex);
 }
 
+/**
+ * Change state of synchronization finite state machine.
+ *
+ * @param[in] lock Request to lock sctx. This is a workaround for missing
+ *                 support recursive mutexes in ISC mutex API.
+ *
+ * @warning Caller has to ensure that sctx is properly locked either externally
+ *          or by lock = ISC_TRUE parameter. Attempt to lock sctx recursively
+ *          will lead to deadlock.
+ */
 void
-sync_state_reset(sync_ctx_t *sctx) {
+sync_state_change(sync_ctx_t *sctx, sync_state_t new_state, isc_boolean_t lock) {
 	REQUIRE(sctx != NULL);
 
-	LOCK(&sctx->mutex);
-	sctx->state = sync_init;
-	UNLOCK(&sctx->mutex);
+	if (lock == ISC_TRUE)
+		LOCK(&sctx->mutex);
+
+	switch (sctx->state) {
+	case sync_init:
+		/* reconnect before ldap_sync_intermediate() call */
+		INSIST(new_state == sync_init
+		/* refresh phase is finished
+		 * and ldap_sync_intermediate() was called */
+		       || new_state == sync_barrier);
+		break;
+
+	case sync_barrier:
+		/* sync_barrier_wait() finished */
+		INSIST(new_state == sync_finished);
+		break;
+
+	case sync_finished:
+		/* reconnect */
+		INSIST(new_state == sync_init);
+		break;
+
+	default:
+		fatal_error("undefined state 0x%x", sctx->state);
+	}
+
+	sctx->state = new_state;
+	if (lock == ISC_TRUE)
+		UNLOCK(&sctx->mutex);
 }
 
 /**
@@ -348,16 +383,16 @@ sync_task_add(sync_ctx_t *sctx, isc_task_t *task) {
 	task_element_t *newel = NULL;
 
 	REQUIRE(sctx != NULL);
-	REQUIRE(sctx->state == sync_init);
 	REQUIRE(ISCAPI_TASK_VALID(task));
 
 	CHECKED_MEM_GET_PTR(sctx->mctx, newel);
 	ZERO_PTR(newel);
 	ISC_LINK_INIT(newel, link);
 	newel->task = NULL;
 	isc_task_attach(task, &newel->task);
 
 	LOCK(&sctx->mutex);
+	REQUIRE(sctx->state == sync_init);
 	ISC_LIST_APPEND(sctx->tasks, newel, link);
 	isc_refcount_increment0(&sctx->task_cnt, &cnt);
 	UNLOCK(&sctx->mutex);
@@ -390,13 +425,9 @@ sync_barrier_wait(sync_ctx_t *sctx, const char *inst_name) {
 
 	LOCK(&sctx->mutex);
 	REQUIRE(sctx->state == sync_init);
-	if (EMPTY(sctx->tasks)) {
-		log_bug("sync_barrier_wait(): called with empty task list");
-		sctx->state = sync_finished;
-		CLEANUP_WITH(ISC_R_SUCCESS);
-	}
+	REQUIRE(!EMPTY(sctx->tasks));
 
-	sctx->state = sync_barrier;
+	sync_state_change(sctx, sync_barrier, ISC_FALSE);
 	for (taskel = next_taskel = HEAD(sctx->tasks);
 	     taskel != NULL;
 	     taskel = next_taskel) {
diff --git a/src/syncrepl.h b/src/syncrepl.h
index 205e1f849eac033ff6861c408a2f1e073747c835..10fbbc1fa80d65bdb8bba65a316a4815996985e9 100644
--- a/src/syncrepl.h
+++ b/src/syncrepl.h
@@ -36,7 +36,7 @@ void
 sync_state_get(sync_ctx_t *sctx, sync_state_t *statep) ATTR_NONNULLS;
 
 void
-sync_state_reset(sync_ctx_t *sctx) ATTR_NONNULLS;
+sync_state_change(sync_ctx_t *sctx, sync_state_t new_state, isc_boolean_t lock) ATTR_NONNULLS;
 
 isc_result_t
 sync_task_add(sync_ctx_t *sctx, isc_task_t *task) ATTR_NONNULLS ATTR_CHECKRESULT;
-- 
2.1.0

From 9b4a6373c868f8858253d5e9bf850e1cbbed2a7f Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspa...@redhat.com>
Date: Wed, 27 May 2015 11:34:06 +0200
Subject: [PATCH] Avoid synchronization state resets.

Going back to sync_init state is a bad idea because it prevents
update_*() functions from writting to journal. As a result, zone
journals were missing changes done during resynchronization, i.e. the
journal was corrupted and unusable for IXFR and DNSSEC in-line signing.

https://fedorahosted.org/bind-dyndb-ldap/ticket/128
---
 src/ldap_helper.c | 16 +++++++++-------
 src/syncrepl.c    | 12 ++++--------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index d6123e7b6705ea477c49a23decbcd5e3c6ec7396..d6461a3e83b63555a46ff3f60761e3703d9a6b4e 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -4424,6 +4424,7 @@ int ldap_sync_intermediate (
 	char entryUUID_buf[16];
 	struct berval entryUUID = { .bv_len = sizeof(entryUUID_buf),
 				    .bv_val = entryUUID_buf };
+	sync_state_t state;
 
 	UNUSED(msg);
 	UNUSED(syncUUIDs);
@@ -4435,11 +4436,14 @@ int ldap_sync_intermediate (
 	if (phase != LDAP_SYNC_CAPI_DONE)
 		goto cleanup;
 
-	result = sync_barrier_wait(inst->sctx, inst->db_name);
-	if (result != ISC_R_SUCCESS) {
-		log_error_r("sync_barrier_wait() failed for instance '%s'",
-			    inst->db_name);
-		goto cleanup;
+	sync_state_get(inst->sctx, &state);
+	if (state == sync_init) {
+		result = sync_barrier_wait(inst->sctx, inst->db_name);
+		if (result != ISC_R_SUCCESS) {
+			log_error_r("sync_barrier_wait() failed for instance "
+				    "'%s'", inst->db_name);
+			goto cleanup;
+		}
 	}
 
 	for (result = mldap_iter_deadnodes_start(inst->mldapdb, &mldap_iter,
@@ -4504,8 +4508,6 @@ ldap_sync_prepare(ldap_instance_t *inst, settings_set_t *settings,
 	REQUIRE(inst != NULL);
 	REQUIRE(ldap_syncp != NULL && *ldap_syncp == NULL);
 
-	sync_state_change(inst->sctx, sync_init, ISC_TRUE);
-
 	/* Remove stale zone & journal files. */
 	CHECK(cleanup_files(inst));
 
diff --git a/src/syncrepl.c b/src/syncrepl.c
index cc5d66f145bd5f5f0f53b5717256a566d53420bc..94e161997a5eaf19cc71816cfe8fc0fee17d31f4 100644
--- a/src/syncrepl.c
+++ b/src/syncrepl.c
@@ -344,25 +344,21 @@ sync_state_change(sync_ctx_t *sctx, sync_state_t new_state, isc_boolean_t lock)
 
 	switch (sctx->state) {
 	case sync_init:
-		/* reconnect before ldap_sync_intermediate() call */
-		INSIST(new_state == sync_init
 		/* refresh phase is finished
 		 * and ldap_sync_intermediate() was called */
-		       || new_state == sync_barrier);
+		INSIST(new_state == sync_barrier);
 		break;
 
 	case sync_barrier:
 		/* sync_barrier_wait() finished */
 		INSIST(new_state == sync_finished);
 		break;
 
 	case sync_finished:
-		/* reconnect */
-		INSIST(new_state == sync_init);
-		break;
-
+		/* state finished cannot be taken back, ever */
 	default:
-		fatal_error("undefined state 0x%x", sctx->state);
+		fatal_error("invalid synchronization state change %u -> %u",
+			    sctx->state, new_state);
 	}
 
 	sctx->state = new_state;
-- 
2.1.0

-- 
Manage your subscription for the Freeipa-devel mailing list:
https://www.redhat.com/mailman/listinfo/freeipa-devel
Contribute to FreeIPA: http://www.freeipa.org/page/Contribute/Code

Reply via email to