On 29/10/2024 12:14, Ilya Maximets wrote:
> Until now, functions that needed to call external programs like openssl
> or ipsec commands were using subprocess commands directly. Most of
> these calls had no failure checks or any logging making it hard to
> understand what is happening inside the daemon when something doesn't
> work as intended.
>
> Some commands also had a chance to not read the command output in full.
> That might sound like not a big problem, but in practice it causes
> ovs-monitor-ipsec to deadlock pluto and itself with certain versions of
> Libreswan (mainly Libreswan 5+). The order of events is following:
>
> 1. ovs-monitor-ipsec calls ipsec status redirecting the output
> to a pipe.
> 2. ipsec status calls ipsec whack.
> 3. ipsec whack connects to pluto and asks for status.
> 4. ovs-monitor-ipsec doesn't read the pipe in full.
> 5. ipsec whack blocks on write to the other side of the pipe
> when it runs out of buffer space.
> 6. pluto blocks on sendmsg to ipsec whack for the same reason.
> 7. ovs-monitor-ipsec calls another ipsec command and blocks
> on connection to pluto.
>
> In this scenario the running process is at the mercy of garbage
> collector and it doesn't run because we're blocked on calling another
> ipsec command. All the processes are completely blocked and will not
> do any work until ipsec whack is killed.
>
> With this change we're introducing a new function that will be used
> for all the external process execution commands and will read the full
> output before returning, avoiding the deadlock. It will also log all
> the failures as warnings and the commands themselves at the debug level.
>
> We'll be adding more logic into this function in later commits as well,
> so it will not stay that simple.
>
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
> ipsec/ovs-monitor-ipsec.in | 296 +++++++++++++++++--------------------
> 1 file changed, 134 insertions(+), 162 deletions(-)
>
> diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
> index 37c509ac6..19a401609 100755
> --- a/ipsec/ovs-monitor-ipsec.in
> +++ b/ipsec/ovs-monitor-ipsec.in
> @@ -84,6 +84,28 @@ monitor = None
> xfrm = None
>
>
> +def run_command(args, description=None):
> + """ This function runs the process args[0] with args[1:] arguments
> + and returns a tuple: return-code, stdout, stderr. """
> +
> + if not description:
the last arg as a description might not give much or confuse?
maybe it's better to use the first arg i.e. the command ?
> + description = args[-1]
> +
> + vlog.dbg("Running %s" % args)
> + proc = subprocess.Popen(args, stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE)
> + pout, perr = proc.communicate()
> +
you can skip call to len() in the if statement.
pout and perr should always be a string empty or not.
the if statement will work well on perr itself.
> + if proc.returncode or len(perr):
> + vlog.warn("Failed to %s; exit code: %d"
> + % (description, proc.returncode))
> + vlog.warn("cmdline: %s" % proc.args)
> + vlog.warn("stderr: %s" % perr)
> + vlog.warn("stdout: %s" % pout)
> +
Since pout and perr are always strings you don't need
the or statement.
this should work the same
return proc.returncode, pout, perr
also every caller that uses pout and/or perr
will do decode().strip().
so why not return it like so by default?
return proc.returncode, pout.decode().strip(), perr.decode()
> + return proc.returncode, pout or b'', perr or b''
> +
> +
> class XFRM(object):
> """This class is a simple wrapper around ip-xfrm (8) command line
> utility. We are using this class only for informational purposes
> @@ -99,13 +121,14 @@ class XFRM(object):
> where <key> is destination IPv4 address and <value> is SELECTOR of
> the IPsec policy."""
> policies = {}
> - proc = subprocess.Popen([self.IP, 'xfrm', 'policy'],
> - stdout=subprocess.PIPE)
> - while True:
> - line = proc.stdout.readline().strip().decode()
> - if line == '':
> - break
> - a = line.split(" ")
> +
> + ret, pout, perr = run_command([self.IP, 'xfrm', 'policy'],
> + "get XFRM policies")
> + if ret:
> + return policies
> +
> + for line in pout.decode().splitlines():
> + a = line.strip().split(" ")
> if len(a) >= 4 and a[0] == "src" and a[2] == "dst":
> dst = (a[3].split("/"))[0]
> if dst not in policies:
> @@ -122,13 +145,14 @@ class XFRM(object):
> in a dictionary where <key> is destination IPv4 address and <value>
> is SELECTOR."""
> securities = {}
> - proc = subprocess.Popen([self.IP, 'xfrm', 'state'],
> - stdout=subprocess.PIPE)
> - while True:
> - line = proc.stdout.readline().strip().decode()
> - if line == '':
> - break
> - a = line.split(" ")
> +
> + ret, pout, perr = run_command([self.IP, 'xfrm', 'state'],
> + "get XFRM state")
> + if ret:
> + return securities
> +
> + for line in pout.decode().splitlines():
> + a = line.strip().split(" ")
> if len(a) >= 4 and a[0] == "sel" \
> and a[1] == "src" and a[3] == "dst":
> remote_ip = a[4].rstrip().split("/")[0]
> @@ -242,7 +266,7 @@ conn prevent_unencrypted_vxlan
> f.close()
>
> vlog.info("Restarting StrongSwan")
> - subprocess.call([self.IPSEC, "restart"])
> + run_command([self.IPSEC, "restart"], "restart StrongSwan")
>
> def get_active_conns(self):
> """This function parses output from 'ipsec status' command.
> @@ -252,13 +276,13 @@ conn prevent_unencrypted_vxlan
> sample line from the parsed outpus as <value>. """
>
> conns = {}
> - proc = subprocess.Popen([self.IPSEC, 'status'],
> stdout=subprocess.PIPE)
> + ret, pout, perr = run_command([self.IPSEC, 'status'],
> + "get active connections")
> + if ret:
> + return conns
>
> - while True:
> - line = proc.stdout.readline().strip().decode()
> - if line == '':
> - break
> - tunnel_name = line.split(":")
> + for line in pout.decode().splitlines():
> + tunnel_name = line.strip().split(":")
> if len(tunnel_name) < 2:
> continue
> m = re.match(r"(.*)(-in-\d+|-out-\d+|-\d+).*", tunnel_name[0])
> @@ -341,15 +365,11 @@ conn prevent_unencrypted_vxlan
> Once strongSwan vici bindings will be distributed with major
> Linux distributions this function could be simplified."""
> vlog.info("Refreshing StrongSwan configuration")
> - proc = subprocess.Popen([self.IPSEC, "update"],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - outs, errs = proc.communicate()
> - if proc.returncode != 0:
> - vlog.err("StrongSwan failed to update configuration:\n"
> - "%s \n %s" % (str(outs), str(errs)))
> -
> - subprocess.call([self.IPSEC, "rereadsecrets"])
> +
> + run_command([self.IPSEC, "update"],
> + "update StrongSwan's configuration")
> + run_command([self.IPSEC, "rereadsecrets"], "re-read secrets")
> +
> # "ipsec update" command does not remove those tunnels that were
> # updated or that disappeared from the ipsec.conf file. So, we have
> # to manually remove them by calling "ipsec stroke down-nb <tunnel>"
> @@ -382,7 +402,8 @@ conn prevent_unencrypted_vxlan
>
> if not tunnel or tunnel.version != ver:
> vlog.info("%s is outdated %u" % (conn, ver))
> - subprocess.call([self.IPSEC, "stroke", "down-nb", conn])
> + run_command([self.IPSEC, "stroke", "down-nb", conn],
> + "stroke the outdated %s" % conn)
>
>
> class LibreSwanHelper(object):
> @@ -460,13 +481,11 @@ conn prevent_unencrypted_vxlan
> # Collect version infromation
> self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec"
> self.IPSEC_AUTO = [self.IPSEC]
> - proc = subprocess.Popen([self.IPSEC, "--version"],
> - stdout=subprocess.PIPE,
> - encoding="latin1")
> - pout, perr = proc.communicate()
>
> - v = re.match("^Libreswan v?(.*)$", pout)
> + ret, pout, perr = run_command([self.IPSEC, "--version"],
> + "get Libreswan's version")
> try:
> + v = re.match("^Libreswan v?(.*)$", pout.decode().strip())
> version = int(v.group(1).split(".")[0])
> except:
> version = 0
> @@ -513,7 +532,7 @@ conn prevent_unencrypted_vxlan
> f.close()
>
> vlog.info("Restarting LibreSwan")
> - subprocess.call([self.IPSEC, "restart"])
> + run_command([self.IPSEC, "restart"], "restart Libreswan")
>
> def config_init(self):
> self.conf_file = open(self.IPSEC_CONF, "w")
> @@ -599,8 +618,10 @@ conn prevent_unencrypted_vxlan
>
> def refresh(self, monitor):
> vlog.info("Refreshing LibreSwan configuration")
> - subprocess.call(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL,
> - "--config", self.IPSEC_CONF, "--rereadsecrets"])
> + run_command(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL,
> + "--config", self.IPSEC_CONF,
> + "--rereadsecrets"],
> + "re-read secrets")
> tunnels = set(monitor.tunnels.keys())
>
> # Delete old connections
> @@ -627,9 +648,10 @@ conn prevent_unencrypted_vxlan
>
> if not tunnel or tunnel.version != ver:
> vlog.info("%s is outdated %u" % (conn, ver))
> - subprocess.call(self.IPSEC_AUTO + ["--ctlsocket",
> - self.IPSEC_CTL, "--config",
> - self.IPSEC_CONF, "--delete", conn])
> + run_command(self.IPSEC_AUTO +
> + ["--ctlsocket", self.IPSEC_CTL,
> + "--config", self.IPSEC_CONF,
> + "--delete", conn], "delete %s" % conn)
> elif ifname in tunnels:
> tunnels.remove(ifname)
>
> @@ -649,43 +671,43 @@ conn prevent_unencrypted_vxlan
> # Update shunt policy if changed
> if monitor.conf_in_use["skb_mark"] != monitor.conf["skb_mark"]:
> if monitor.conf["skb_mark"]:
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--add",
> "--asynchronous", "prevent_unencrypted_gre"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--add",
> "--asynchronous", "prevent_unencrypted_geneve"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--add",
> "--asynchronous", "prevent_unencrypted_stt"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--add",
> "--asynchronous", "prevent_unencrypted_vxlan"])
> else:
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--delete",
> "--asynchronous", "prevent_unencrypted_gre"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--delete",
> "--asynchronous", "prevent_unencrypted_geneve"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--delete",
> "--asynchronous", "prevent_unencrypted_stt"])
> - subprocess.call(self.IPSEC_AUTO +
> + run_command(self.IPSEC_AUTO +
> ["--config", self.IPSEC_CONF,
> "--ctlsocket", self.IPSEC_CTL,
> "--delete",
> @@ -700,14 +722,13 @@ conn prevent_unencrypted_vxlan
> sample line from the parsed outpus as <value>. """
>
> conns = {}
> - proc = subprocess.Popen([self.IPSEC, 'status', '--ctlsocket',
> - self.IPSEC_CTL], stdout=subprocess.PIPE)
> -
> - while True:
> - line = proc.stdout.readline().strip().decode()
> - if line == '':
> - break
> + ret, pout, perr = run_command([self.IPSEC, 'status',
> + '--ctlsocket', self.IPSEC_CTL],
> + "get active connections")
> + if ret:
> + return conns
>
> + for line in pout.decode().splitlines():
> m = re.search(r"#\d+: \"(.*)\".*", line)
> if not m:
> continue
> @@ -732,117 +753,72 @@ conn prevent_unencrypted_vxlan
> # the "ipsec auto --start" command is lost. Just retry to make sure
> # the command is received by LibreSwan.
> while True:
> - proc = subprocess.Popen(self.IPSEC_AUTO +
> - ["--config", self.IPSEC_CONF,
> - "--ctlsocket", self.IPSEC_CTL,
> - "--start",
> - "--asynchronous", conn],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - perr = str(proc.stderr.read())
> - pout = str(proc.stdout.read())
> - if not re.match(r".*Connection refused.*", perr) and \
> - not re.match(r".*need --listen.*", pout):
> + ret, pout, perr = run_command(self.IPSEC_AUTO +
> + ["--config", self.IPSEC_CONF,
> + "--ctlsocket", self.IPSEC_CTL,
> + "--start",
> + "--asynchronous", conn],
> + "start %s" % conn)
> + if not re.match(r".*Connection refused.*", perr.decode()) and \
> + not re.match(r".*need --listen.*", pout.decode()):
> break
>
> - if re.match(r".*[F|f]ailed to initiate connection.*", pout):
> + if re.match(r".*[F|f]ailed to initiate connection.*", pout.decode()):
> vlog.err('Failed to initiate connection through'
> ' Interface %s.\n' % (conn.split('-')[0]))
> - vlog.err(pout)
> + vlog.err("stdout: %s" % pout)
>
> def _nss_clear_database(self):
> """Remove all OVS IPsec related state from the NSS database"""
> - try:
> - proc = subprocess.Popen(['certutil', '-L', '-d',
> - self.IPSEC_D],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE,
> - universal_newlines=True)
> - lines = proc.stdout.readlines()
> -
> - for line in lines:
> - s = line.strip().split()
> - if len(s) < 1:
> - continue
> - name = s[0]
> - if name.startswith(self.CERT_PREFIX):
> - self._nss_delete_cert(name)
> - elif name.startswith(self.CERTKEY_PREFIX):
> - self._nss_delete_cert_and_key(name)
> + ret, pout, perr = run_command(['certutil', '-L', '-d', self.IPSEC_D],
> + "clear NSS database")
> + if ret:
> + return
>
> - except Exception as e:
> - vlog.err("Failed to clear NSS database.\n" + str(e))
> + for line in pout.decode().splitlines():
> + s = line.strip().split()
> + if len(s) < 1:
> + continue
> + name = s[0]
> + if name.startswith(self.CERT_PREFIX):
> + self._nss_delete_cert(name)
> + elif name.startswith(self.CERTKEY_PREFIX):
> + self._nss_delete_cert_and_key(name)
>
> def _nss_import_cert(self, cert, name, cert_type):
> """Cert_type is 'CT,,' for the CA certificate and 'P,P,P' for the
> normal certificate."""
> - try:
> - proc = subprocess.Popen(['certutil', '-A', '-a', '-i', cert,
> - '-d', self.IPSEC_D, '-n',
> - name, '-t', cert_type],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> - except Exception as e:
> - vlog.err("Failed to import certificate into NSS.\n" + str(e))
> + run_command(['certutil', '-A', '-a', '-i', cert, '-d', self.IPSEC_D,
> + '-n', name, '-t', cert_type],
> + "import certificate %s into NSS" % name)
>
> def _nss_delete_cert(self, name):
> - try:
> - proc = subprocess.Popen(['certutil', '-D', '-d',
> - self.IPSEC_D, '-n', name],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> - except Exception as e:
> - vlog.err("Failed to delete certificate from NSS.\n" + str(e))
> + run_command(['certutil', '-D', '-d', self.IPSEC_D, '-n', name],
> + "delete certificate %s from NSS" % name)
>
> def _nss_import_cert_and_key(self, cert, key, name):
> - try:
> - # Avoid deleting other files
> - path = os.path.abspath('/tmp/%s.p12' % name)
> - if not path.startswith('/tmp/'):
> - raise Exception("Illegal certificate name!")
> -
> - # Create p12 file from pem files
> - proc = subprocess.Popen(['openssl', 'pkcs12', '-export',
> - '-in', cert, '-inkey', key, '-out',
> - path, '-name', name, '-passout',
> 'pass:'],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> -
> - # Load p12 file to the database
> - proc = subprocess.Popen(['pk12util', '-i', path, '-d',
> - self.IPSEC_D, '-W', ''],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> -
> - except Exception as e:
> - vlog.err("Import cert and key failed.\n" + str(e))
> + # Avoid deleting other files
> + path = os.path.abspath('/tmp/%s.p12' % name)
> + if not path.startswith('/tmp/'):
> + vlog.err("Illegal certificate name '%s'!" % name)
> + return
> +
> + if run_command(['openssl', 'pkcs12', '-export',
> + '-in', cert, '-inkey', key,
> + '-out', path, '-name', name,
> + '-passout', 'pass:'],
> + "create p12 file from pem files")[0]:
> + return
> +
> + # Load p12 file to the database
> + run_command(['pk12util', '-i', path, '-d', self.IPSEC_D, '-W', ''],
> + "load p12 file to the NSS database")
> os.remove(path)
>
> def _nss_delete_cert_and_key(self, name):
> - try:
> - # Delete certificate and private key
> - proc = subprocess.Popen(['certutil', '-F', '-d',
> - self.IPSEC_D, '-n', name],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> -
> - except Exception as e:
> - vlog.err("Delete cert and key failed.\n" + str(e))
> + # Delete certificate and private key
> + run_command(['certutil', '-F', '-d', self.IPSEC_D, '-n', name],
> + "delete certificate and private key for %s" % name)
>
>
> class IPsecTunnel(object):
> @@ -1220,19 +1196,15 @@ class IPsecMonitor(object):
> self.ike_helper.refresh(self)
>
> def _get_cn_from_cert(self, cert):
> - try:
> - proc = subprocess.Popen(['openssl', 'x509', '-noout', '-subject',
> - '-nameopt', 'RFC2253', '-in', cert],
> - stdout=subprocess.PIPE,
> - stderr=subprocess.PIPE)
> - proc.wait()
> - if proc.returncode:
> - raise Exception(proc.stderr.read())
> - m = re.search(r"CN=(.+?),", proc.stdout.readline().decode())
> - if not m:
> - raise Exception("No CN in the certificate subject.")
> - except Exception as e:
> - vlog.warn(str(e))
> + ret, pout, perr = run_command(['openssl', 'x509', '-noout',
> '-subject',
> + '-nameopt', 'RFC2253', '-in', cert],
> + "get certificate %s options" % cert)
> + if ret:
> + return None
> +
> + m = re.search(r"CN=(.+?),", pout.decode().strip())
> + if not m:
> + vlog.warn("No CN in the certificate subject (%s)." % cert)
> return None
>
> return m.group(1)
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev