Until now, functions that needed to call external programs like openssl
or ipsec commands were using subprocess commands directly.  Most of
these calls had no failure checks or any logging making it hard to
understand what is happening inside the daemon when something doesn't
work as intended.

Some commands also had a chance to not read the command output in full.
That might sound like not a big problem, but in practice it causes
ovs-monitor-ipsec to deadlock pluto and itself with certain versions of
Libreswan (mainly Libreswan 5+).  The order of events is following:

 1. ovs-monitor-ipsec calls ipsec status redirecting the output
    to a pipe.
 2. ipsec status calls ipsec whack.
 3. ipsec whack connects to pluto and asks for status.
 4. ovs-monitor-ipsec doesn't read the pipe in full.
 5. ipsec whack blocks on write to the other side of the pipe
    when it runs out of buffer space.
 6. pluto blocks on sendmsg to ipsec whack for the same reason.
 7. ovs-monitor-ipsec calls another ipsec command and blocks
    on connection to pluto.

In this scenario the running process is at the mercy of garbage
collector and it doesn't run because we're blocked on calling another
ipsec command.  All the processes are completely blocked and will not
do any work until ipsec whack is killed.

With this change we're introducing a new function that will be used
for all the external process execution commands and will read the full
output before returning, avoiding the deadlock.  It will also log all
the failures as warnings and the commands themselves at the debug level.

We'll be adding more logic into this function in later commits as well,
so it will not stay that simple.

Signed-off-by: Ilya Maximets <[email protected]>
---
 ipsec/ovs-monitor-ipsec.in | 296 +++++++++++++++++--------------------
 1 file changed, 134 insertions(+), 162 deletions(-)

diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 37c509ac6..19a401609 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -84,6 +84,28 @@ monitor = None
 xfrm = None
 
 
+def run_command(args, description=None):
+    """ This function runs the process args[0] with args[1:] arguments
+    and returns a tuple: return-code, stdout, stderr. """
+
+    if not description:
+        description = args[-1]
+
+    vlog.dbg("Running %s" % args)
+    proc = subprocess.Popen(args, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    pout, perr = proc.communicate()
+
+    if proc.returncode or len(perr):
+        vlog.warn("Failed to %s; exit code: %d"
+                  % (description, proc.returncode))
+        vlog.warn("cmdline: %s" % proc.args)
+        vlog.warn("stderr: %s" % perr)
+        vlog.warn("stdout: %s" % pout)
+
+    return proc.returncode, pout or b'', perr or b''
+
+
 class XFRM(object):
     """This class is a simple wrapper around ip-xfrm (8) command line
     utility.  We are using this class only for informational purposes
@@ -99,13 +121,14 @@ class XFRM(object):
         where <key> is destination IPv4 address and <value> is SELECTOR of
         the IPsec policy."""
         policies = {}
-        proc = subprocess.Popen([self.IP, 'xfrm', 'policy'],
-                                stdout=subprocess.PIPE)
-        while True:
-            line = proc.stdout.readline().strip().decode()
-            if line == '':
-                break
-            a = line.split(" ")
+
+        ret, pout, perr = run_command([self.IP, 'xfrm', 'policy'],
+                                      "get XFRM policies")
+        if ret:
+            return policies
+
+        for line in pout.decode().splitlines():
+            a = line.strip().split(" ")
             if len(a) >= 4 and a[0] == "src" and a[2] == "dst":
                 dst = (a[3].split("/"))[0]
                 if dst not in policies:
@@ -122,13 +145,14 @@ class XFRM(object):
         in a dictionary where <key> is destination IPv4 address and <value>
         is SELECTOR."""
         securities = {}
-        proc = subprocess.Popen([self.IP, 'xfrm', 'state'],
-                                stdout=subprocess.PIPE)
-        while True:
-            line = proc.stdout.readline().strip().decode()
-            if line == '':
-                break
-            a = line.split(" ")
+
+        ret, pout, perr = run_command([self.IP, 'xfrm', 'state'],
+                                      "get XFRM state")
+        if ret:
+            return securities
+
+        for line in pout.decode().splitlines():
+            a = line.strip().split(" ")
             if len(a) >= 4 and a[0] == "sel" \
                     and a[1] == "src" and a[3] == "dst":
                 remote_ip = a[4].rstrip().split("/")[0]
@@ -242,7 +266,7 @@ conn prevent_unencrypted_vxlan
         f.close()
 
         vlog.info("Restarting StrongSwan")
-        subprocess.call([self.IPSEC, "restart"])
+        run_command([self.IPSEC, "restart"], "restart StrongSwan")
 
     def get_active_conns(self):
         """This function parses output from 'ipsec status' command.
@@ -252,13 +276,13 @@ conn prevent_unencrypted_vxlan
         sample line from the parsed outpus as <value>. """
 
         conns = {}
-        proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE)
+        ret, pout, perr = run_command([self.IPSEC, 'status'],
+                                      "get active connections")
+        if ret:
+            return conns
 
-        while True:
-            line = proc.stdout.readline().strip().decode()
-            if line == '':
-                break
-            tunnel_name = line.split(":")
+        for line in pout.decode().splitlines():
+            tunnel_name = line.strip().split(":")
             if len(tunnel_name) < 2:
                 continue
             m = re.match(r"(.*)(-in-\d+|-out-\d+|-\d+).*", tunnel_name[0])
@@ -341,15 +365,11 @@ conn prevent_unencrypted_vxlan
         Once strongSwan vici bindings will be distributed with major
         Linux distributions this function could be simplified."""
         vlog.info("Refreshing StrongSwan configuration")
-        proc = subprocess.Popen([self.IPSEC, "update"],
-                        stdout=subprocess.PIPE,
-                        stderr=subprocess.PIPE)
-        outs, errs = proc.communicate()
-        if proc.returncode != 0:
-            vlog.err("StrongSwan failed to update configuration:\n"
-                           "%s \n %s" % (str(outs), str(errs)))
-
-        subprocess.call([self.IPSEC, "rereadsecrets"])
+
+        run_command([self.IPSEC, "update"],
+                    "update StrongSwan's configuration")
+        run_command([self.IPSEC, "rereadsecrets"], "re-read secrets")
+
         # "ipsec update" command does not remove those tunnels that were
         # updated or that disappeared from the ipsec.conf file.  So, we have
         # to manually remove them by calling "ipsec stroke down-nb <tunnel>"
@@ -382,7 +402,8 @@ conn prevent_unencrypted_vxlan
 
                 if not tunnel or tunnel.version != ver:
                     vlog.info("%s is outdated %u" % (conn, ver))
-                    subprocess.call([self.IPSEC, "stroke", "down-nb", conn])
+                    run_command([self.IPSEC, "stroke", "down-nb", conn],
+                                "stroke the outdated %s" % conn)
 
 
 class LibreSwanHelper(object):
@@ -460,13 +481,11 @@ conn prevent_unencrypted_vxlan
         # Collect version infromation
         self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
         self.IPSEC_AUTO = [self.IPSEC]
-        proc = subprocess.Popen([self.IPSEC, "--version"],
-                                stdout=subprocess.PIPE,
-                                encoding="latin1")
-        pout, perr = proc.communicate()
 
-        v = re.match("^Libreswan v?(.*)$", pout)
+        ret, pout, perr = run_command([self.IPSEC, "--version"],
+                                      "get Libreswan's version")
         try:
+            v = re.match("^Libreswan v?(.*)$", pout.decode().strip())
             version = int(v.group(1).split(".")[0])
         except:
             version = 0
@@ -513,7 +532,7 @@ conn prevent_unencrypted_vxlan
         f.close()
 
         vlog.info("Restarting LibreSwan")
-        subprocess.call([self.IPSEC, "restart"])
+        run_command([self.IPSEC, "restart"], "restart Libreswan")
 
     def config_init(self):
         self.conf_file = open(self.IPSEC_CONF, "w")
@@ -599,8 +618,10 @@ conn prevent_unencrypted_vxlan
 
     def refresh(self, monitor):
         vlog.info("Refreshing LibreSwan configuration")
-        subprocess.call(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL,
-                        "--config", self.IPSEC_CONF, "--rereadsecrets"])
+        run_command(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL,
+                                       "--config", self.IPSEC_CONF,
+                                       "--rereadsecrets"],
+                    "re-read secrets")
         tunnels = set(monitor.tunnels.keys())
 
         # Delete old connections
@@ -627,9 +648,10 @@ conn prevent_unencrypted_vxlan
 
                 if not tunnel or tunnel.version != ver:
                     vlog.info("%s is outdated %u" % (conn, ver))
-                    subprocess.call(self.IPSEC_AUTO + ["--ctlsocket",
-                                    self.IPSEC_CTL, "--config",
-                                    self.IPSEC_CONF, "--delete", conn])
+                    run_command(self.IPSEC_AUTO +
+                                ["--ctlsocket", self.IPSEC_CTL,
+                                 "--config", self.IPSEC_CONF,
+                                 "--delete", conn], "delete %s" % conn)
                 elif ifname in tunnels:
                     tunnels.remove(ifname)
 
@@ -649,43 +671,43 @@ conn prevent_unencrypted_vxlan
         # Update shunt policy if changed
         if monitor.conf_in_use["skb_mark"] != monitor.conf["skb_mark"]:
             if monitor.conf["skb_mark"]:
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--add",
                             "--asynchronous", "prevent_unencrypted_gre"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--add",
                             "--asynchronous", "prevent_unencrypted_geneve"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--add",
                             "--asynchronous", "prevent_unencrypted_stt"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--add",
                             "--asynchronous", "prevent_unencrypted_vxlan"])
             else:
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--delete",
                             "--asynchronous", "prevent_unencrypted_gre"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--delete",
                             "--asynchronous", "prevent_unencrypted_geneve"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--delete",
                             "--asynchronous", "prevent_unencrypted_stt"])
-                subprocess.call(self.IPSEC_AUTO +
+                run_command(self.IPSEC_AUTO +
                             ["--config", self.IPSEC_CONF,
                             "--ctlsocket", self.IPSEC_CTL,
                             "--delete",
@@ -700,14 +722,13 @@ conn prevent_unencrypted_vxlan
         sample line from the parsed outpus as <value>. """
 
         conns = {}
-        proc = subprocess.Popen([self.IPSEC, 'status', '--ctlsocket',
-                                self.IPSEC_CTL], stdout=subprocess.PIPE)
-
-        while True:
-            line = proc.stdout.readline().strip().decode()
-            if line == '':
-                break
+        ret, pout, perr = run_command([self.IPSEC, 'status',
+                                      '--ctlsocket', self.IPSEC_CTL],
+                                      "get active connections")
+        if ret:
+            return conns
 
+        for line in pout.decode().splitlines():
             m = re.search(r"#\d+: \"(.*)\".*", line)
             if not m:
                 continue
@@ -732,117 +753,72 @@ conn prevent_unencrypted_vxlan
         # the "ipsec auto --start" command is lost. Just retry to make sure
         # the command is received by LibreSwan.
         while True:
-            proc = subprocess.Popen(self.IPSEC_AUTO +
-                                    ["--config", self.IPSEC_CONF,
-                                    "--ctlsocket", self.IPSEC_CTL,
-                                    "--start",
-                                    "--asynchronous", conn],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            perr = str(proc.stderr.read())
-            pout = str(proc.stdout.read())
-            if not re.match(r".*Connection refused.*", perr) and \
-                    not re.match(r".*need --listen.*", pout):
+            ret, pout, perr = run_command(self.IPSEC_AUTO +
+                                          ["--config", self.IPSEC_CONF,
+                                          "--ctlsocket", self.IPSEC_CTL,
+                                          "--start",
+                                          "--asynchronous", conn],
+                                          "start %s" % conn)
+            if not re.match(r".*Connection refused.*", perr.decode()) and \
+                    not re.match(r".*need --listen.*", pout.decode()):
                 break
 
-        if re.match(r".*[F|f]ailed to initiate connection.*", pout):
+        if re.match(r".*[F|f]ailed to initiate connection.*", pout.decode()):
             vlog.err('Failed to initiate connection through'
                     ' Interface %s.\n' % (conn.split('-')[0]))
-            vlog.err(pout)
+            vlog.err("stdout: %s" % pout)
 
     def _nss_clear_database(self):
         """Remove all OVS IPsec related state from the NSS database"""
-        try:
-            proc = subprocess.Popen(['certutil', '-L', '-d',
-                                    self.IPSEC_D],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE,
-                                    universal_newlines=True)
-            lines = proc.stdout.readlines()
-
-            for line in lines:
-                s = line.strip().split()
-                if len(s) < 1:
-                    continue
-                name = s[0]
-                if name.startswith(self.CERT_PREFIX):
-                    self._nss_delete_cert(name)
-                elif name.startswith(self.CERTKEY_PREFIX):
-                    self._nss_delete_cert_and_key(name)
+        ret, pout, perr = run_command(['certutil', '-L', '-d', self.IPSEC_D],
+                                      "clear NSS database")
+        if ret:
+            return
 
-        except Exception as e:
-            vlog.err("Failed to clear NSS database.\n" + str(e))
+        for line in pout.decode().splitlines():
+            s = line.strip().split()
+            if len(s) < 1:
+                continue
+            name = s[0]
+            if name.startswith(self.CERT_PREFIX):
+                self._nss_delete_cert(name)
+            elif name.startswith(self.CERTKEY_PREFIX):
+                self._nss_delete_cert_and_key(name)
 
     def _nss_import_cert(self, cert, name, cert_type):
         """Cert_type is 'CT,,' for the CA certificate and 'P,P,P' for the
         normal certificate."""
-        try:
-            proc = subprocess.Popen(['certutil', '-A', '-a', '-i', cert,
-                                    '-d', self.IPSEC_D, '-n',
-                                    name, '-t', cert_type],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-        except Exception as e:
-            vlog.err("Failed to import certificate into NSS.\n" + str(e))
+        run_command(['certutil', '-A', '-a', '-i', cert, '-d', self.IPSEC_D,
+                     '-n', name, '-t', cert_type],
+                    "import certificate %s into NSS" % name)
 
     def _nss_delete_cert(self, name):
-        try:
-            proc = subprocess.Popen(['certutil', '-D', '-d',
-                                    self.IPSEC_D, '-n', name],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-        except Exception as e:
-            vlog.err("Failed to delete certificate from NSS.\n" + str(e))
+        run_command(['certutil', '-D', '-d', self.IPSEC_D, '-n', name],
+                    "delete certificate %s from NSS" % name)
 
     def _nss_import_cert_and_key(self, cert, key, name):
-        try:
-            # Avoid deleting other files
-            path = os.path.abspath('/tmp/%s.p12' % name)
-            if not path.startswith('/tmp/'):
-                raise Exception("Illegal certificate name!")
-
-            # Create p12 file from pem files
-            proc = subprocess.Popen(['openssl', 'pkcs12', '-export',
-                                    '-in', cert, '-inkey', key, '-out',
-                                    path, '-name', name, '-passout', 'pass:'],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-
-            # Load p12 file to the database
-            proc = subprocess.Popen(['pk12util', '-i', path, '-d',
-                                    self.IPSEC_D, '-W', ''],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-
-        except Exception as e:
-            vlog.err("Import cert and key failed.\n" + str(e))
+        # Avoid deleting other files
+        path = os.path.abspath('/tmp/%s.p12' % name)
+        if not path.startswith('/tmp/'):
+            vlog.err("Illegal certificate name '%s'!" % name)
+            return
+
+        if run_command(['openssl', 'pkcs12', '-export',
+                       '-in', cert, '-inkey', key,
+                       '-out', path, '-name', name,
+                       '-passout', 'pass:'],
+                       "create p12 file from pem files")[0]:
+            return
+
+        # Load p12 file to the database
+        run_command(['pk12util', '-i', path, '-d', self.IPSEC_D, '-W', ''],
+                    "load p12 file to the NSS database")
         os.remove(path)
 
     def _nss_delete_cert_and_key(self, name):
-        try:
-            # Delete certificate and private key
-            proc = subprocess.Popen(['certutil', '-F', '-d',
-                                    self.IPSEC_D, '-n', name],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-
-        except Exception as e:
-            vlog.err("Delete cert and key failed.\n" + str(e))
+        # Delete certificate and private key
+        run_command(['certutil', '-F', '-d', self.IPSEC_D, '-n', name],
+                    "delete certificate and private key for %s" % name)
 
 
 class IPsecTunnel(object):
@@ -1220,19 +1196,15 @@ class IPsecMonitor(object):
             self.ike_helper.refresh(self)
 
     def _get_cn_from_cert(self, cert):
-        try:
-            proc = subprocess.Popen(['openssl', 'x509', '-noout', '-subject',
-                                    '-nameopt', 'RFC2253', '-in', cert],
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
-            proc.wait()
-            if proc.returncode:
-                raise Exception(proc.stderr.read())
-            m = re.search(r"CN=(.+?),", proc.stdout.readline().decode())
-            if not m:
-                raise Exception("No CN in the certificate subject.")
-        except Exception as e:
-            vlog.warn(str(e))
+        ret, pout, perr = run_command(['openssl', 'x509', '-noout', '-subject',
+                                       '-nameopt', 'RFC2253', '-in', cert],
+                                       "get certificate %s options" % cert)
+        if ret:
+            return None
+
+        m = re.search(r"CN=(.+?),", pout.decode().strip())
+        if not m:
+            vlog.warn("No CN in the certificate subject (%s)." % cert)
             return None
 
         return m.group(1)
-- 
2.46.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to