URL: https://github.com/freeipa/freeipa/pull/2059 Author: tiran Title: #2059: [Backport][ipa-4-6] Fix replication races in Dogtag admin code Action: opened
PR body: """ Manual backport of PR #2051 PR fixes two related issues: # Fix replication races in Dogtag admin code DogtagInstance.setup_admin and related methods have multiple LDAP replication race conditions. The bugs can cause parallel ipa-replica-install to fail. The code from __add_admin_to_group() has been changed to use MOD_ADD ather than search + MOD_REPLACE. The MOD_REPLACE approach can lead to data loss, when more than one writer changes a group. setup_admin() now waits until both admin user and group membership have been replicated to the master peer. Fixes: https://pagure.io/freeipa/issue/7593 # Improve and fix timeout bug in wait_for_entry() replication.wait_for_entry() now can wait for an attribute value to appear on a replica. Fixed timeout handling caused by bad rounding and comparison. For small timeouts, the actual time was rounded down. For example for 60 seconds timeout and fast replica, the query accumulated to about 0.45 seconds plus 60 seconds sleep. 60.45 is large enough to terminate the loop "while int(time.time()) < timeout", but not large enough to trigger the exception in "if int(time.time()) > timeout", because int(60.65) == 60. Fixes: https://pagure.io/freeipa/issue/7595 """ To pull the PR as Git branch: git remote add ghfreeipa https://github.com/freeipa/freeipa git fetch ghfreeipa pull/2059/head:pr2059 git checkout pr2059
From 5e981b4384b7814b2de74fcdab88f36bf2011c29 Mon Sep 17 00:00:00 2001 From: Christian Heimes <chei...@redhat.com> Date: Fri, 22 Jun 2018 09:39:26 +0200 Subject: [PATCH 1/3] Improve and fix timeout bug in wait_for_entry() replication.wait_for_entry() now can wait for an attribute value to appear on a replica. Fixed timeout handling caused by bad rounding and comparison. For small timeouts, the actual time was rounded down. For example for 60 seconds timeout and fast replica, the query accumulated to about 0.45 seconds plus 60 seconds sleep. 60.45 is large enough to terminate the loop "while int(time.time()) < timeout", but not large enough to trigger the exception in "if int(time.time()) > timeout", because int(60.65) == 60. See: https://pagure.io/freeipa/issue/7593 Fixes: https://pagure.io/freeipa/issue/7595 Signed-off-by: Christian Heimes <chei...@redhat.com> --- ipaserver/install/replication.py | 54 +++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/ipaserver/install/replication.py b/ipaserver/install/replication.py index 2e7ce53fef..6d9878e16f 100644 --- a/ipaserver/install/replication.py +++ b/ipaserver/install/replication.py @@ -20,6 +20,7 @@ from __future__ import print_function, absolute_import import logging +import itertools import six import time @@ -160,40 +161,43 @@ def wait_for_task(conn, dn): return exit_code -def wait_for_entry(connection, dn, timeout=7200, attr='', quiet=True): - """Wait for entry and/or attr to show up""" - - filter = "(objectclass=*)" +def wait_for_entry(connection, dn, timeout=7200, attr=None, attrvalue='*', + quiet=True): + """Wait for entry and/or attr to show up + """ + log = logger.debug if quiet else logger.info attrlist = [] - if attr: - filter = "(%s=*)" % attr + if attr is not None: + filterstr = ipaldap.LDAPClient.make_filter_from_attr(attr, attrvalue) attrlist.append(attr) - timeout += int(time.time()) - - if not quiet: - sys.stdout.write("Waiting for %s %s:%s " % (connection, dn, attr)) - sys.stdout.flush() - entry = None - while not entry and int(time.time()) < timeout: + else: + filterstr = "(objectclass=*)" + log("Waiting for replication (%s) %s %s", connection, dn, filterstr) + entry = [] + deadline = time.time() + timeout + for i in itertools.count(start=1): try: - [entry] = connection.get_entries( - dn, ldap.SCOPE_BASE, filter, attrlist) + entry = connection.get_entries( + dn, ldap.SCOPE_BASE, filterstr, attrlist) except errors.NotFound: pass # no entry yet except Exception as e: # badness logger.error("Error reading entry %s: %s", dn, e) raise - if not entry: - if not quiet: - sys.stdout.write(".") - sys.stdout.flush() - time.sleep(1) - if not entry and int(time.time()) > timeout: - raise errors.NotFound( - reason="wait_for_entry timeout for %s for %s" % (connection, dn)) - elif entry and not quiet: - logger.error("The waited for entry is: %s", entry) + if entry: + log("Entry found %r", entry) + return + elif time.time() > deadline: + raise errors.NotFound( + reason="wait_for_entry timeout on {} for {}".format( + connection, dn + ) + ) + else: + if i % 10 == 0: + logger.debug("Still waiting for replication of %s", dn) + time.sleep(1) class ReplicationManager(object): From 9dd9982977f7f5f80b93903256537f1be357485d Mon Sep 17 00:00:00 2001 From: Christian Heimes <chei...@redhat.com> Date: Fri, 22 Jun 2018 10:00:24 +0200 Subject: [PATCH 2/3] Use common replication wait timeout of 5min Instead of multiple timeout values all over the code base, all replication waits now use a common timeout value from api.env of 5 minutes. Waiting for HTTP/replica principal takes 90 to 120 seconds, so 5 minutes seem like a sufficient value for slow setups. Fixes: https://pagure.io/freeipa/issue/7595 Signed-off-by: Christian Heimes <chei...@redhat.com> --- ipalib/constants.py | 2 ++ ipaserver/install/custodiainstance.py | 4 +++- ipaserver/install/httpinstance.py | 6 +++++- ipaserver/install/krbinstance.py | 13 ++++++++----- ipaserver/install/replication.py | 6 ++++-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/ipalib/constants.py b/ipalib/constants.py index 9ae6e0aaae..38ed57b3cc 100644 --- a/ipalib/constants.py +++ b/ipalib/constants.py @@ -149,6 +149,8 @@ ('startup_timeout', 300), # How long http connection should wait for reply [seconds]. ('http_timeout', 30), + # How long to wait for an entry to appear on a replica + ('replication_wait_timeout', 300), # Web Application mount points ('mount_ipa', '/ipa/'), diff --git a/ipaserver/install/custodiainstance.py b/ipaserver/install/custodiainstance.py index c87306d2f4..fcfe090842 100644 --- a/ipaserver/install/custodiainstance.py +++ b/ipaserver/install/custodiainstance.py @@ -5,6 +5,7 @@ import enum import logging +from ipalib import api from ipaserver.secrets.kem import IPAKEMKeys, KEMLdap from ipaserver.secrets.client import CustodiaClient from ipaplatform.paths import paths @@ -214,7 +215,8 @@ def import_dm_password(self): cli = self._get_custodia_client() cli.fetch_key('dm/DMHash') - def _wait_keys(self, timeout=300): + def _wait_keys(self): + timeout = api.env.replication_wait_timeout deadline = int(time.time()) + timeout logger.info("Waiting up to %s seconds to see our keys " "appear on host %s", timeout, self.ldap_uri) diff --git a/ipaserver/install/httpinstance.py b/ipaserver/install/httpinstance.py index 3764870ee7..bdd79b1daf 100644 --- a/ipaserver/install/httpinstance.py +++ b/ipaserver/install/httpinstance.py @@ -644,4 +644,8 @@ def request_service_keytab(self): else: remote_ldap.simple_bind(ipaldap.DIRMAN_DN, self.dm_password) - replication.wait_for_entry(remote_ldap, service_dn, timeout=60) + replication.wait_for_entry( + remote_ldap, + service_dn, + timeout=api.env.replication_wait_timeout + ) diff --git a/ipaserver/install/krbinstance.py b/ipaserver/install/krbinstance.py index 4a5a52dcc8..a356d5e0c1 100644 --- a/ipaserver/install/krbinstance.py +++ b/ipaserver/install/krbinstance.py @@ -399,13 +399,16 @@ def __create_host_keytab(self): def _wait_for_replica_kdc_entry(self): master_dn = self.api.Object.server.get_dn(self.fqdn) kdc_dn = DN(('cn', 'KDC'), master_dn) - - ldap_uri = 'ldap://{}'.format(self.master_fqdn) - + ldap_uri = ipaldap.get_ldap_uri(self.master_fqdn) with ipaldap.LDAPClient( - ldap_uri, cacert=paths.IPA_CA_CRT) as remote_ldap: + ldap_uri, cacert=paths.IPA_CA_CRT, start_tls=True + ) as remote_ldap: remote_ldap.gssapi_bind() - replication.wait_for_entry(remote_ldap, kdc_dn, timeout=60) + replication.wait_for_entry( + remote_ldap, + kdc_dn, + timeout=api.env.replication_wait_timeout + ) def _call_certmonger(self, certmonger_ca='IPA'): subject = str(DN(('cn', self.fqdn), self.subject_base)) diff --git a/ipaserver/install/replication.py b/ipaserver/install/replication.py index 6d9878e16f..5ce8fa689c 100644 --- a/ipaserver/install/replication.py +++ b/ipaserver/install/replication.py @@ -161,7 +161,7 @@ def wait_for_task(conn, dn): return exit_code -def wait_for_entry(connection, dn, timeout=7200, attr=None, attrvalue='*', +def wait_for_entry(connection, dn, timeout, attr=None, attrvalue='*', quiet=True): """Wait for entry and/or attr to show up """ @@ -751,7 +751,9 @@ def setup_agreement(self, a_conn, b_hostname, port=389, # that we will have to set the memberof fixup task self.need_memberof_fixup = True - wait_for_entry(a_conn, entry.dn) + wait_for_entry( + a_conn, entry.dn, timeout=api.env.replication_wait_timeout + ) def needs_memberof_fixup(self): return self.need_memberof_fixup From 040cc162a2369fb4e458acfcb16e52c7204ff922 Mon Sep 17 00:00:00 2001 From: Christian Heimes <chei...@redhat.com> Date: Fri, 22 Jun 2018 10:04:38 +0200 Subject: [PATCH 3/3] Fix replication races in Dogtag admin code DogtagInstance.setup_admin and related methods have multiple LDAP replication race conditions. The bugs can cause parallel ipa-replica-install to fail. The code from __add_admin_to_group() has been changed to use MOD_ADD ather than search + MOD_REPLACE. The MOD_REPLACE approach can lead to data loss, when more than one writer changes a group. setup_admin() now waits until both admin user and group membership have been replicated to the master peer. The method also adds a new ACI to allow querying group member in the replication check. Fixes: https://pagure.io/freeipa/issue/7593 Signed-off-by: Christian Heimes <chei...@redhat.com> --- ipaserver/install/cainstance.py | 1 + ipaserver/install/dogtaginstance.py | 96 +++++++++++++++++++++++++++++-------- ipaserver/install/krainstance.py | 1 + 3 files changed, 77 insertions(+), 21 deletions(-) diff --git a/ipaserver/install/cainstance.py b/ipaserver/install/cainstance.py index 5bdac82a57..b58fbb4c88 100644 --- a/ipaserver/install/cainstance.py +++ b/ipaserver/install/cainstance.py @@ -392,6 +392,7 @@ def configure_instance(self, host_name, dm_password, admin_password, # Setup Database self.step("creating certificate server db", self.__create_ds_db) self.step("setting up initial replication", self.__setup_replication) + self.step("creating ACIs for admin", self.add_ipaca_aci) self.step("creating installation admin user", self.setup_admin) self.step("configuring certificate server instance", self.__spawn_instance) diff --git a/ipaserver/install/dogtaginstance.py b/ipaserver/install/dogtaginstance.py index 07e63024c6..5b2c30f8a1 100644 --- a/ipaserver/install/dogtaginstance.py +++ b/ipaserver/install/dogtaginstance.py @@ -21,6 +21,7 @@ import base64 import logging +import time import ldap import os @@ -90,6 +91,16 @@ class DogtagInstance(service.Service): tracking_reqs = None server_cert_name = None + ipaca_groups = DN(('ou', 'groups'), ('o', 'ipaca')) + ipaca_people = DN(('ou', 'people'), ('o', 'ipaca')) + groups_aci = ( + b'(targetfilter="(objectClass=groupOfUniqueNames)")' + b'(targetattr="cn || description || objectclass || uniquemember")' + b'(version 3.0; acl "Allow users from o=ipaca to read groups"; ' + b'allow (read, search, compare) ' + b'userdn="ldap:///uid=*,ou=people,o=ipaca";)' + ) + def __init__(self, realm, subsystem, service_desc, host_name=None, nss_db=paths.PKI_TOMCAT_ALIAS_DIR, service_prefix=None, config=None): @@ -108,10 +119,11 @@ def __init__(self, realm, subsystem, service_desc, host_name=None, self.pkcs12_info = None self.clone = False - self.basedn = DN(('o', 'ipa%s' % subsystem.lower())) + self.basedn = DN(('o', 'ipaca')) self.admin_user = "admin" - self.admin_dn = DN(('uid', self.admin_user), - ('ou', 'people'), ('o', 'ipaca')) + self.admin_dn = DN( + ('uid', self.admin_user), self.ipaca_people + ) self.admin_groups = None self.tmp_agent_db = None self.subsystem = subsystem @@ -393,27 +405,31 @@ def handle_setup_error(self, e): raise RuntimeError("%s configuration failed." % self.subsystem) - def __add_admin_to_group(self, group): - dn = DN(('cn', group), ('ou', 'groups'), ('o', 'ipaca')) - entry = api.Backend.ldap2.get_entry(dn) - members = entry.get('uniqueMember', []) - members.append(self.admin_dn) - mod = [(ldap.MOD_REPLACE, 'uniqueMember', members)] + def add_ipaca_aci(self): + """Add ACI to allow ipaca users to read their own group information + + Dogtag users aren't allowed to enumerate their own groups. The + setup_admin() method needs the permission to wait, until all group + information has been replicated. + """ + dn = self.ipaca_groups + mod = [(ldap.MOD_ADD, 'aci', [self.groups_aci])] try: api.Backend.ldap2.modify_s(dn, mod) except ldap.TYPE_OR_VALUE_EXISTS: - # already there - pass + logger.debug("%s already has ACI to read group information", dn) + else: + logger.debug("Added ACI to read groups to %s", dn) def setup_admin(self): self.admin_user = "admin-%s" % self.fqdn self.admin_password = ipautil.ipa_generate_password() - self.admin_dn = DN(('uid', self.admin_user), - ('ou', 'people'), ('o', 'ipaca')) - + self.admin_dn = DN( + ('uid', self.admin_user), self.ipaca_people + ) # remove user if left-over exists try: - entry = api.Backend.ldap2.delete_entry(self.admin_dn) + api.Backend.ldap2.delete_entry(self.admin_dn) except errors.NotFound: pass @@ -432,18 +448,56 @@ def setup_admin(self): ) api.Backend.ldap2.add_entry(entry) + wait_groups = [] for group in self.admin_groups: - self.__add_admin_to_group(group) + group_dn = DN(('cn', group), self.ipaca_groups) + mod = [(ldap.MOD_ADD, 'uniqueMember', [self.admin_dn])] + try: + api.Backend.ldap2.modify_s(group_dn, mod) + except ldap.TYPE_OR_VALUE_EXISTS: + # already there + return None + else: + wait_groups.append(group_dn) # Now wait until the other server gets replicated this data ldap_uri = ipaldap.get_ldap_uri(self.master_host) - master_conn = ipaldap.LDAPClient(ldap_uri) - master_conn.gssapi_bind() - replication.wait_for_entry(master_conn, entry.dn) - del master_conn + master_conn = ipaldap.LDAPClient(ldap_uri, start_tls=True) + logger.debug( + "Waiting for %s to appear on %s", self.admin_dn, master_conn + ) + deadline = time.time() + api.env.replication_wait_timeout + while time.time() < deadline: + time.sleep(1) + try: + master_conn.simple_bind(self.admin_dn, self.admin_password) + except ldap.INVALID_CREDENTIALS: + pass + else: + logger.debug("Successfully logged in as %s", self.admin_dn) + break + else: + logger.error( + "Unable to log in as %s on %s", self.admin_dn, master_conn + ) + raise errors.NotFound( + reason="{} did not replicate to {}".format( + self.admin_dn, master_conn + ) + ) + + # wait for group membership + for group_dn in wait_groups: + replication.wait_for_entry( + master_conn, + group_dn, + timeout=api.env.replication_wait_timeout, + attr='uniqueMember', + attrvalue=self.admin_dn + ) def __remove_admin_from_group(self, group): - dn = DN(('cn', group), ('ou', 'groups'), ('o', 'ipaca')) + dn = DN(('cn', group), self.ipaca_groups) mod = [(ldap.MOD_DELETE, 'uniqueMember', self.admin_dn)] try: api.Backend.ldap2.modify_s(dn, mod) diff --git a/ipaserver/install/krainstance.py b/ipaserver/install/krainstance.py index b959398706..9483f0ec4e 100644 --- a/ipaserver/install/krainstance.py +++ b/ipaserver/install/krainstance.py @@ -115,6 +115,7 @@ def configure_instance(self, realm_name, host_name, dm_password, "A Dogtag CA must be installed first") if promote: + self.step("creating ACIs for admin", self.add_ipaca_aci) self.step("creating installation admin user", self.setup_admin) self.step("configuring KRA instance", self.__spawn_instance) if not self.clone:
_______________________________________________ FreeIPA-devel mailing list -- freeipa-devel@lists.fedorahosted.org To unsubscribe send an email to freeipa-devel-le...@lists.fedorahosted.org Fedora Code of Conduct: https://getfedora.org/code-of-conduct.html List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines List Archives: https://lists.fedoraproject.org/archives/list/freeipa-devel@lists.fedorahosted.org/message/R5O7YNEOMY2NCDVUBXH26DP5CS6ZPGIB/