URL: https://github.com/freeipa/freeipa/pull/2059
Author: tiran
 Title: #2059: [Backport][ipa-4-6] Fix replication races in Dogtag admin code
Action: opened

PR body:
"""
Manual backport of PR #2051

PR fixes two related issues:

# Fix replication races in Dogtag admin code

DogtagInstance.setup_admin and related methods have multiple LDAP
replication race conditions. The bugs can cause parallel
ipa-replica-install to fail.

The code from __add_admin_to_group() has been changed to use MOD_ADD
ather than search + MOD_REPLACE. The MOD_REPLACE approach can lead to
data loss, when more than one writer changes a group.

setup_admin() now waits until both admin user and group membership have
been replicated to the master peer.

Fixes: https://pagure.io/freeipa/issue/7593


# Improve and fix timeout bug in wait_for_entry()

replication.wait_for_entry() now can wait for an attribute value to
appear on a replica.

Fixed timeout handling caused by bad rounding and comparison. For small
timeouts, the actual time was rounded down. For example for 60 seconds
timeout and fast replica, the query accumulated to about 0.45 seconds
plus 60 seconds sleep. 60.45 is large enough to terminate the loop
"while int(time.time()) < timeout", but not large enough to trigger the
exception in "if int(time.time()) > timeout", because int(60.65) == 60.

Fixes: https://pagure.io/freeipa/issue/7595
"""

To pull the PR as Git branch:
git remote add ghfreeipa https://github.com/freeipa/freeipa
git fetch ghfreeipa pull/2059/head:pr2059
git checkout pr2059
From 5e981b4384b7814b2de74fcdab88f36bf2011c29 Mon Sep 17 00:00:00 2001
From: Christian Heimes <chei...@redhat.com>
Date: Fri, 22 Jun 2018 09:39:26 +0200
Subject: [PATCH 1/3] Improve and fix timeout bug in wait_for_entry()

replication.wait_for_entry() now can wait for an attribute value to
appear on a replica.

Fixed timeout handling caused by bad rounding and comparison. For small
timeouts, the actual time was rounded down. For example for 60 seconds
timeout and fast replica, the query accumulated to about 0.45 seconds
plus 60 seconds sleep. 60.45 is large enough to terminate the loop
"while int(time.time()) < timeout", but not large enough to trigger the
exception in "if int(time.time()) > timeout", because int(60.65) == 60.

See: https://pagure.io/freeipa/issue/7593
Fixes: https://pagure.io/freeipa/issue/7595
Signed-off-by: Christian Heimes <chei...@redhat.com>
---
 ipaserver/install/replication.py | 54 +++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/ipaserver/install/replication.py b/ipaserver/install/replication.py
index 2e7ce53fef..6d9878e16f 100644
--- a/ipaserver/install/replication.py
+++ b/ipaserver/install/replication.py
@@ -20,6 +20,7 @@
 from __future__ import print_function, absolute_import
 
 import logging
+import itertools
 
 import six
 import time
@@ -160,40 +161,43 @@ def wait_for_task(conn, dn):
     return exit_code
 
 
-def wait_for_entry(connection, dn, timeout=7200, attr='', quiet=True):
-    """Wait for entry and/or attr to show up"""
-
-    filter = "(objectclass=*)"
+def wait_for_entry(connection, dn, timeout=7200, attr=None, attrvalue='*',
+                   quiet=True):
+    """Wait for entry and/or attr to show up
+    """
+    log = logger.debug if quiet else logger.info
     attrlist = []
-    if attr:
-        filter = "(%s=*)" % attr
+    if attr is not None:
+        filterstr = ipaldap.LDAPClient.make_filter_from_attr(attr, attrvalue)
         attrlist.append(attr)
-    timeout += int(time.time())
-
-    if not quiet:
-        sys.stdout.write("Waiting for %s %s:%s " % (connection, dn, attr))
-        sys.stdout.flush()
-    entry = None
-    while not entry and int(time.time()) < timeout:
+    else:
+        filterstr = "(objectclass=*)"
+    log("Waiting for replication (%s) %s %s", connection, dn, filterstr)
+    entry = []
+    deadline = time.time() + timeout
+    for i in itertools.count(start=1):
         try:
-            [entry] = connection.get_entries(
-                dn, ldap.SCOPE_BASE, filter, attrlist)
+            entry = connection.get_entries(
+                dn, ldap.SCOPE_BASE, filterstr, attrlist)
         except errors.NotFound:
             pass  # no entry yet
         except Exception as e:  # badness
             logger.error("Error reading entry %s: %s", dn, e)
             raise
-        if not entry:
-            if not quiet:
-                sys.stdout.write(".")
-                sys.stdout.flush()
-            time.sleep(1)
 
-    if not entry and int(time.time()) > timeout:
-        raise errors.NotFound(
-            reason="wait_for_entry timeout for %s for %s" % (connection, dn))
-    elif entry and not quiet:
-        logger.error("The waited for entry is: %s", entry)
+        if entry:
+            log("Entry found %r", entry)
+            return
+        elif time.time() > deadline:
+            raise errors.NotFound(
+                reason="wait_for_entry timeout on {} for {}".format(
+                    connection, dn
+                )
+            )
+        else:
+            if i % 10 == 0:
+                logger.debug("Still waiting for replication of %s", dn)
+            time.sleep(1)
 
 
 class ReplicationManager(object):

From 9dd9982977f7f5f80b93903256537f1be357485d Mon Sep 17 00:00:00 2001
From: Christian Heimes <chei...@redhat.com>
Date: Fri, 22 Jun 2018 10:00:24 +0200
Subject: [PATCH 2/3] Use common replication wait timeout of 5min

Instead of multiple timeout values all over the code base, all
replication waits now use a common timeout value from api.env of 5
minutes. Waiting for HTTP/replica principal takes 90 to 120 seconds, so
5 minutes seem like a sufficient value for slow setups.

Fixes: https://pagure.io/freeipa/issue/7595
Signed-off-by: Christian Heimes <chei...@redhat.com>
---
 ipalib/constants.py                   |  2 ++
 ipaserver/install/custodiainstance.py |  4 +++-
 ipaserver/install/httpinstance.py     |  6 +++++-
 ipaserver/install/krbinstance.py      | 13 ++++++++-----
 ipaserver/install/replication.py      |  6 ++++--
 5 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/ipalib/constants.py b/ipalib/constants.py
index 9ae6e0aaae..38ed57b3cc 100644
--- a/ipalib/constants.py
+++ b/ipalib/constants.py
@@ -149,6 +149,8 @@
     ('startup_timeout', 300),
     # How long http connection should wait for reply [seconds].
     ('http_timeout', 30),
+    # How long to wait for an entry to appear on a replica
+    ('replication_wait_timeout', 300),
 
     # Web Application mount points
     ('mount_ipa', '/ipa/'),
diff --git a/ipaserver/install/custodiainstance.py b/ipaserver/install/custodiainstance.py
index c87306d2f4..fcfe090842 100644
--- a/ipaserver/install/custodiainstance.py
+++ b/ipaserver/install/custodiainstance.py
@@ -5,6 +5,7 @@
 import enum
 import logging
 
+from ipalib import api
 from ipaserver.secrets.kem import IPAKEMKeys, KEMLdap
 from ipaserver.secrets.client import CustodiaClient
 from ipaplatform.paths import paths
@@ -214,7 +215,8 @@ def import_dm_password(self):
         cli = self._get_custodia_client()
         cli.fetch_key('dm/DMHash')
 
-    def _wait_keys(self, timeout=300):
+    def _wait_keys(self):
+        timeout = api.env.replication_wait_timeout
         deadline = int(time.time()) + timeout
         logger.info("Waiting up to %s seconds to see our keys "
                     "appear on host %s", timeout, self.ldap_uri)
diff --git a/ipaserver/install/httpinstance.py b/ipaserver/install/httpinstance.py
index 3764870ee7..bdd79b1daf 100644
--- a/ipaserver/install/httpinstance.py
+++ b/ipaserver/install/httpinstance.py
@@ -644,4 +644,8 @@ def request_service_keytab(self):
                 else:
                     remote_ldap.simple_bind(ipaldap.DIRMAN_DN,
                                             self.dm_password)
-                replication.wait_for_entry(remote_ldap, service_dn, timeout=60)
+                replication.wait_for_entry(
+                    remote_ldap,
+                    service_dn,
+                    timeout=api.env.replication_wait_timeout
+                )
diff --git a/ipaserver/install/krbinstance.py b/ipaserver/install/krbinstance.py
index 4a5a52dcc8..a356d5e0c1 100644
--- a/ipaserver/install/krbinstance.py
+++ b/ipaserver/install/krbinstance.py
@@ -399,13 +399,16 @@ def __create_host_keytab(self):
     def _wait_for_replica_kdc_entry(self):
         master_dn = self.api.Object.server.get_dn(self.fqdn)
         kdc_dn = DN(('cn', 'KDC'), master_dn)
-
-        ldap_uri = 'ldap://{}'.format(self.master_fqdn)
-
+        ldap_uri = ipaldap.get_ldap_uri(self.master_fqdn)
         with ipaldap.LDAPClient(
-                ldap_uri, cacert=paths.IPA_CA_CRT) as remote_ldap:
+                ldap_uri, cacert=paths.IPA_CA_CRT, start_tls=True
+        ) as remote_ldap:
             remote_ldap.gssapi_bind()
-            replication.wait_for_entry(remote_ldap, kdc_dn, timeout=60)
+            replication.wait_for_entry(
+                remote_ldap,
+                kdc_dn,
+                timeout=api.env.replication_wait_timeout
+            )
 
     def _call_certmonger(self, certmonger_ca='IPA'):
         subject = str(DN(('cn', self.fqdn), self.subject_base))
diff --git a/ipaserver/install/replication.py b/ipaserver/install/replication.py
index 6d9878e16f..5ce8fa689c 100644
--- a/ipaserver/install/replication.py
+++ b/ipaserver/install/replication.py
@@ -161,7 +161,7 @@ def wait_for_task(conn, dn):
     return exit_code
 
 
-def wait_for_entry(connection, dn, timeout=7200, attr=None, attrvalue='*',
+def wait_for_entry(connection, dn, timeout, attr=None, attrvalue='*',
                    quiet=True):
     """Wait for entry and/or attr to show up
     """
@@ -751,7 +751,9 @@ def setup_agreement(self, a_conn, b_hostname, port=389,
             # that we will have to set the memberof fixup task
             self.need_memberof_fixup = True
 
-        wait_for_entry(a_conn, entry.dn)
+        wait_for_entry(
+            a_conn, entry.dn, timeout=api.env.replication_wait_timeout
+        )
 
     def needs_memberof_fixup(self):
         return self.need_memberof_fixup

From 040cc162a2369fb4e458acfcb16e52c7204ff922 Mon Sep 17 00:00:00 2001
From: Christian Heimes <chei...@redhat.com>
Date: Fri, 22 Jun 2018 10:04:38 +0200
Subject: [PATCH 3/3] Fix replication races in Dogtag admin code

DogtagInstance.setup_admin and related methods have multiple LDAP
replication race conditions. The bugs can cause parallel
ipa-replica-install to fail.

The code from __add_admin_to_group() has been changed to use MOD_ADD
ather than search + MOD_REPLACE. The MOD_REPLACE approach can lead to
data loss, when more than one writer changes a group.

setup_admin() now waits until both admin user and group membership have
been replicated to the master peer. The method also adds a new ACI to
allow querying group member in the replication check.

Fixes: https://pagure.io/freeipa/issue/7593
Signed-off-by: Christian Heimes <chei...@redhat.com>
---
 ipaserver/install/cainstance.py     |  1 +
 ipaserver/install/dogtaginstance.py | 96 +++++++++++++++++++++++++++++--------
 ipaserver/install/krainstance.py    |  1 +
 3 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/ipaserver/install/cainstance.py b/ipaserver/install/cainstance.py
index 5bdac82a57..b58fbb4c88 100644
--- a/ipaserver/install/cainstance.py
+++ b/ipaserver/install/cainstance.py
@@ -392,6 +392,7 @@ def configure_instance(self, host_name, dm_password, admin_password,
                 # Setup Database
                 self.step("creating certificate server db", self.__create_ds_db)
                 self.step("setting up initial replication", self.__setup_replication)
+                self.step("creating ACIs for admin", self.add_ipaca_aci)
                 self.step("creating installation admin user", self.setup_admin)
             self.step("configuring certificate server instance",
                       self.__spawn_instance)
diff --git a/ipaserver/install/dogtaginstance.py b/ipaserver/install/dogtaginstance.py
index 07e63024c6..5b2c30f8a1 100644
--- a/ipaserver/install/dogtaginstance.py
+++ b/ipaserver/install/dogtaginstance.py
@@ -21,6 +21,7 @@
 
 import base64
 import logging
+import time
 
 import ldap
 import os
@@ -90,6 +91,16 @@ class DogtagInstance(service.Service):
     tracking_reqs = None
     server_cert_name = None
 
+    ipaca_groups = DN(('ou', 'groups'), ('o', 'ipaca'))
+    ipaca_people = DN(('ou', 'people'), ('o', 'ipaca'))
+    groups_aci = (
+        b'(targetfilter="(objectClass=groupOfUniqueNames)")'
+        b'(targetattr="cn || description || objectclass || uniquemember")'
+        b'(version 3.0; acl "Allow users from o=ipaca to read groups"; '
+        b'allow (read, search, compare) '
+        b'userdn="ldap:///uid=*,ou=people,o=ipaca";;)'
+    )
+
     def __init__(self, realm, subsystem, service_desc, host_name=None,
                  nss_db=paths.PKI_TOMCAT_ALIAS_DIR, service_prefix=None,
                  config=None):
@@ -108,10 +119,11 @@ def __init__(self, realm, subsystem, service_desc, host_name=None,
         self.pkcs12_info = None
         self.clone = False
 
-        self.basedn = DN(('o', 'ipa%s' % subsystem.lower()))
+        self.basedn = DN(('o', 'ipaca'))
         self.admin_user = "admin"
-        self.admin_dn = DN(('uid', self.admin_user),
-                           ('ou', 'people'), ('o', 'ipaca'))
+        self.admin_dn = DN(
+            ('uid', self.admin_user), self.ipaca_people
+        )
         self.admin_groups = None
         self.tmp_agent_db = None
         self.subsystem = subsystem
@@ -393,27 +405,31 @@ def handle_setup_error(self, e):
 
         raise RuntimeError("%s configuration failed." % self.subsystem)
 
-    def __add_admin_to_group(self, group):
-        dn = DN(('cn', group), ('ou', 'groups'), ('o', 'ipaca'))
-        entry = api.Backend.ldap2.get_entry(dn)
-        members = entry.get('uniqueMember', [])
-        members.append(self.admin_dn)
-        mod = [(ldap.MOD_REPLACE, 'uniqueMember', members)]
+    def add_ipaca_aci(self):
+        """Add ACI to allow ipaca users to read their own group information
+
+        Dogtag users aren't allowed to enumerate their own groups. The
+        setup_admin() method needs the permission to wait, until all group
+        information has been replicated.
+        """
+        dn = self.ipaca_groups
+        mod = [(ldap.MOD_ADD, 'aci', [self.groups_aci])]
         try:
             api.Backend.ldap2.modify_s(dn, mod)
         except ldap.TYPE_OR_VALUE_EXISTS:
-            # already there
-            pass
+            logger.debug("%s already has ACI to read group information", dn)
+        else:
+            logger.debug("Added ACI to read groups to %s", dn)
 
     def setup_admin(self):
         self.admin_user = "admin-%s" % self.fqdn
         self.admin_password = ipautil.ipa_generate_password()
-        self.admin_dn = DN(('uid', self.admin_user),
-                           ('ou', 'people'), ('o', 'ipaca'))
-
+        self.admin_dn = DN(
+            ('uid', self.admin_user), self.ipaca_people
+        )
         # remove user if left-over exists
         try:
-            entry = api.Backend.ldap2.delete_entry(self.admin_dn)
+            api.Backend.ldap2.delete_entry(self.admin_dn)
         except errors.NotFound:
             pass
 
@@ -432,18 +448,56 @@ def setup_admin(self):
         )
         api.Backend.ldap2.add_entry(entry)
 
+        wait_groups = []
         for group in self.admin_groups:
-            self.__add_admin_to_group(group)
+            group_dn = DN(('cn', group), self.ipaca_groups)
+            mod = [(ldap.MOD_ADD, 'uniqueMember', [self.admin_dn])]
+            try:
+                api.Backend.ldap2.modify_s(group_dn, mod)
+            except ldap.TYPE_OR_VALUE_EXISTS:
+                # already there
+                return None
+            else:
+                wait_groups.append(group_dn)
 
         # Now wait until the other server gets replicated this data
         ldap_uri = ipaldap.get_ldap_uri(self.master_host)
-        master_conn = ipaldap.LDAPClient(ldap_uri)
-        master_conn.gssapi_bind()
-        replication.wait_for_entry(master_conn, entry.dn)
-        del master_conn
+        master_conn = ipaldap.LDAPClient(ldap_uri, start_tls=True)
+        logger.debug(
+            "Waiting for %s to appear on %s", self.admin_dn, master_conn
+        )
+        deadline = time.time() + api.env.replication_wait_timeout
+        while time.time() < deadline:
+            time.sleep(1)
+            try:
+                master_conn.simple_bind(self.admin_dn, self.admin_password)
+            except ldap.INVALID_CREDENTIALS:
+                pass
+            else:
+                logger.debug("Successfully logged in as %s", self.admin_dn)
+                break
+        else:
+            logger.error(
+                "Unable to log in as %s on %s", self.admin_dn, master_conn
+            )
+            raise errors.NotFound(
+                reason="{} did not replicate to {}".format(
+                    self.admin_dn, master_conn
+                )
+            )
+
+        # wait for group membership
+        for group_dn in wait_groups:
+            replication.wait_for_entry(
+                master_conn,
+                group_dn,
+                timeout=api.env.replication_wait_timeout,
+                attr='uniqueMember',
+                attrvalue=self.admin_dn
+            )
 
     def __remove_admin_from_group(self, group):
-        dn = DN(('cn', group), ('ou', 'groups'), ('o', 'ipaca'))
+        dn = DN(('cn', group), self.ipaca_groups)
         mod = [(ldap.MOD_DELETE, 'uniqueMember', self.admin_dn)]
         try:
             api.Backend.ldap2.modify_s(dn, mod)
diff --git a/ipaserver/install/krainstance.py b/ipaserver/install/krainstance.py
index b959398706..9483f0ec4e 100644
--- a/ipaserver/install/krainstance.py
+++ b/ipaserver/install/krainstance.py
@@ -115,6 +115,7 @@ def configure_instance(self, realm_name, host_name, dm_password,
                 "A Dogtag CA must be installed first")
 
         if promote:
+            self.step("creating ACIs for admin", self.add_ipaca_aci)
             self.step("creating installation admin user", self.setup_admin)
         self.step("configuring KRA instance", self.__spawn_instance)
         if not self.clone:
_______________________________________________
FreeIPA-devel mailing list -- freeipa-devel@lists.fedorahosted.org
To unsubscribe send an email to freeipa-devel-le...@lists.fedorahosted.org
Fedora Code of Conduct: https://getfedora.org/code-of-conduct.html
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedoraproject.org/archives/list/freeipa-devel@lists.fedorahosted.org/message/R5O7YNEOMY2NCDVUBXH26DP5CS6ZPGIB/

Reply via email to