Repository: mesos Updated Branches: refs/heads/master cb0efcf67 -> a9c1c775b
Added the logic for installing and removing DNAT rules. Review: https://reviews.apache.org/r/51617/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a9c1c775 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a9c1c775 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a9c1c775 Branch: refs/heads/master Commit: a9c1c775b744635675c2427297927c0a341be6cb Parents: cb0efcf Author: Avinash sridharan <avin...@mesosphere.io> Authored: Mon Oct 17 16:32:20 2016 -0700 Committer: Jie Yu <yujie....@gmail.com> Committed: Mon Oct 17 17:27:30 2016 -0700 ---------------------------------------------------------------------- .../cni/plugins/port_mapper/port_mapper.cpp | 271 ++++++++++++++++++- .../cni/plugins/port_mapper/port_mapper.hpp | 25 +- 2 files changed, 284 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/a9c1c775/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp index 2ff8b0e..b470f0c 100644 --- a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp +++ b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.cpp @@ -22,10 +22,10 @@ #include <process/io.hpp> #include <process/subprocess.hpp> -#include "slave/containerizer/mesos/isolators/network/cni/spec.hpp" #include "slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp" namespace io = process::io; +namespace spec = mesos::internal::slave::cni::spec; using std::cerr; using std::endl; @@ -57,8 +57,17 @@ Try<Owned<PortMapper>, PluginError> PortMapper::create(const string& _cniConfig) ERROR_BAD_ARGS); } - // 'CNI_CONTAINERID' is optional. + // NOTE: While CNI spec allows 'CNI_CONTAINERID' to be optional, the + // plugin uses the 'CNI_CONTAINERID' to tag iptable rules so that + // we can delete the rules without IP address information (which + // will not be available during DEL). Hence, making this + // variable required. Option<string> cniContainerId = os::getenv("CNI_CONTAINERID"); + if (cniContainerId.isNone()) { + return PluginError( + "Unable to find environment variable 'CNI_CONTAINERID'", + ERROR_BAD_ARGS); + } Option<string> cniNetNs = os::getenv("CNI_NETNS"); if (cniNetNs.isNone()) { @@ -217,7 +226,7 @@ Try<Owned<PortMapper>, PluginError> PortMapper::create(const string& _cniConfig) return Owned<PortMapper>( new PortMapper( cniCommand.get(), - cniContainerId, + cniContainerId.get(), cniNetNs.get(), cniIfName.get(), cniArgs, @@ -230,9 +239,254 @@ Try<Owned<PortMapper>, PluginError> PortMapper::create(const string& _cniConfig) } +string PortMapper::getIptablesRuleTag() +{ + return "container_id: " + cniContainerId; +} + + +string PortMapper::getIptablesRule( + const net::IP& ip, + const mesos::NetworkInfo::PortMapping& portMapping) +{ + string devices; + + // Get list of devices to exclude. + if (!excludeDevices.empty()) { + foreach (const string& device, excludeDevices) { + devices = "! -i " + device + " "; + } + } + + const string protocol = portMapping.has_protocol() + ? strings::lower(portMapping.protocol()) + : "tcp"; + + // Iptables DNAT rule representing a specific port-mapping. + return strings::format( + " %s %s -p %s -m %s" + " --dport %d -j DNAT --to-destination %s:%d" + " -m comment --comment \"%s\"", + chain, + devices, + protocol, + protocol, + portMapping.host_port(), + stringify(ip), + portMapping.container_port(), + getIptablesRuleTag()).get(); +} + + +Try<Nothing> PortMapper::addPortMapping( + const net::IP& ip, + const mesos::NetworkInfo::PortMapping& portMapping) +{ + Try<string> iptablesRule = getIptablesRule(ip, portMapping); + if (iptablesRule.isError()) { + return Error(iptablesRule.error()); + } + + // Run the following iptables script to install the DNAT rule under + // the specified chain. + string script = strings::format( + R"~( + #!/bin/sh + exec 1>&2 + set -x + + # NOTE: We need iptables 1.4.20 and higher for the commands to + # work. We use the '-w' flag with the iptables command to ensure + # that iptables command are executed atomically. This flag is + # available starting iptables 1.4.20. + # + # Check if the `chain` exists in the iptable. If it does not + # exist go ahead and install the chain in the iptables NAT + # table. + iptables -w -t nat --list %s + if [ $? -ne 0 ]; then + # NOTE: When we create the chain, there is a possibility of a + # race due to which a container launch can fail. This can + # happen specifically when two containers are launched with + # port-mapping with the same iptables chain and the chain does + # not exist. In this scenario, there might be a race for the + # chain creation with only one of the containers succeeding. + # iptables, unfortunately, does not allow locks to be acquired + # outside the iptables process and hence there is no way to + # avoid this race. This event itself should be quite rare + # since it can happen only when the chain is created the first + # time and two commands for creation of the chain are executed + # simultaneously. + (iptables -w -t nat -N %s || exit 1) + + # Once the chain has been installed add a rule in the PREROUTING + # chain to jump to this chain for any packets that are + # destined to a local address. + (iptables -w -t nat -A PREROUTING \ + -m addrtype --dst-type LOCAL -j %s || exit 1) + + # For locally generated packets we need a rule in the OUTPUT + # chain as well, since locally generated packets directly hit + # the output CHAIN, bypassing PREROUTING. + (iptables -w -t nat -A OUTPUT \ + ! -d 127.0.0.0/8 -m addrtype \ + --dst-type LOCAL -j %s || exit 1) + fi + + # Within the `chain` go ahead and install the DNAT rule, if it + # does not exist. + (iptables -w -t nat -C %s || iptables -t nat -A %s))~", + chain, + chain, + chain, + chain, + iptablesRule.get(), + iptablesRule.get()).get(); + + if (os::system(script) != 0) { + return ErrnoError("Failed to add DNAT rule with tag"); + } + + return Nothing(); +} + + +Try<Nothing> PortMapper::delPortMapping() +{ + string script = strings::format( + R"~( + #!/bin/sh + exec 1>&2 + set -x + + # The iptables command searches for the DNAT rules with tag + # "container_id: <CNI_CONTAINERID>", and if it exists goes ahead + # and deletes it. + iptables -w -t nat -S %s | sed "/%s/ s/-A/iptables -w -t nat -D/e")~", + chain, + getIptablesRuleTag()).get(); + + // NOTE: Ideally we should be cleaning up the iptables chain in case + // this is the last rule in the chain. However, this is a bit tricky + // since when we try deleting the chain another incarnation of the + // `mesos-cni-port-mapper` plugin might be injecting rules into the + // same chain causing a race. Further, there is no way to ascertain + // the failure due to this race, and returning a failure in this + // scenario would (erroneously) fail the deletion of the container. + // We therefore leave it to the operator to delete the iptables + // chain once he deems it safe to remove the chain. + if (os::system(script) != 0) { + return ErrnoError("Unable to delete DNAT rules"); + } + + return Nothing(); +} + + +Try<string, PluginError> PortMapper::handleAddCommand() +{ + Result<spec::NetworkInfo> delegateResult = delegate(cniCommand); + if (delegateResult.isError()) { + return PluginError( + "Could not execute the delegate plugin '" + + delegatePlugin + "' for ADD command: " + delegateResult.error(), + ERROR_DELEGATE_FAILURE); + } + + cerr << "Delegate CNI plugin '" << delegatePlugin + << "' executed successfully for ADD command: " + << JSON::protobuf(delegateResult.get()) << endl; + + // We support only IPv4. + if (!delegateResult->has_ip4()) { + return PluginError( + "Delegate CNI plugin '" + delegatePlugin + + "' did not return an IPv4 address", + ERROR_DELEGATE_FAILURE); + } + + // The IP from `delegateResult->ip4().ip()` is in CIDR notation. We + // need to strip out the netmask. + Try<net::IPNetwork> ip = net::IPNetwork::parse( + delegateResult->ip4().ip(), + AF_INET); + + if (ip.isError()) { + return PluginError( + "Could not parse IPv4 address return by delegate CNI plugin '" + + delegatePlugin + "': " + ip.error(), + ERROR_DELEGATE_FAILURE); + } + + // Walk through each of the port-mappings and install a + // D-NAT rule for the port-map. + foreach (const NetworkInfo::PortMapping& portMapping, + networkInfo.port_mappings()) { + Try<Nothing> result = addPortMapping(ip->address(), portMapping); + if (result.isError()) { + return PluginError(result.error(), ERROR_PORTMAP_FAILURE); + } + } + + return stringify(JSON::protobuf(delegateResult.get())); +} + + +Try<Nothing, PluginError> PortMapper::handleDelCommand() +{ + // In case of `spec::CNI_CMD_DEL` we want to first delete the + // iptables DNAT rules and then invoke the delegate plugin with the + // DEL command. Reason being that if the delegate plugin is invoked + // before the iptables DNAT rules are removed, the IP address + // associated with the current container might be re-allocated to a + // new container, which might start receiving traffic hitting these + // DNAT rules. + Try<Nothing> result = delPortMapping(); + if (result.isError()) { + return PluginError( + "Unable to remove iptables DNAT rules: " + result.error(), + ERROR_PORTMAP_FAILURE); + } + + cerr << "Launching delegate CNI plugin '" << delegatePlugin + << "' with DEL command" << endl; + + Result<spec::NetworkInfo> delegateResult = delegate(spec::CNI_CMD_DEL); + if (delegateResult.isError()) { + return PluginError( + "Could not execute the delegate plugin '" + + delegatePlugin + "' for DEL command: " + delegateResult.error(), + ERROR_DELEGATE_FAILURE); + } + + cerr << "Successfully removed iptables DNAT rule and detached container " + << "using CNI delegate plugin '" << delegatePlugin << "'" << endl; + + return Nothing(); +} + + Try<Option<string>, PluginError> PortMapper::execute() { - return None(); + if (cniCommand == spec::CNI_CMD_ADD) { + Try<string, PluginError> result = handleAddCommand(); + if (result.isError()) { + return result.error(); + } + + return result.get(); + } else if (cniCommand == spec::CNI_CMD_DEL) { + Try<Nothing, PluginError> result = handleDelCommand(); + if (result.isError()) { + return result.error(); + } + + return None(); + } + + return PluginError( + "Unsupported command: " + cniCommand, + ERROR_UNSUPPORTED_COMMAND); } @@ -244,10 +498,7 @@ Result<spec::NetworkInfo> PortMapper::delegate(const string& command) environment["CNI_IFNAME"] = cniIfName; environment["CNI_NETNS"] = cniNetNs; environment["CNI_PATH"] = cniPath; - - if (cniContainerId.isSome()) { - environment["CNI_CONTAINERID"] = cniContainerId.get(); - } + environment["CNI_CONTAINERID"] = cniContainerId; if (cniArgs.isSome()) { environment["CNI_ARGS"] = cniArgs.get(); @@ -289,7 +540,7 @@ Result<spec::NetworkInfo> PortMapper::delegate(const string& command) if (s.isError()) { return Error( - "Failed to exec the '" + delegatePlugin + + "Failed to exec the delegate CNI plugin '" + delegatePlugin + "' subprocess: " + s.error()); } @@ -351,7 +602,7 @@ Result<spec::NetworkInfo> PortMapper::delegate(const string& command) return Error( "The delegate CNI plugin '" + delegatePlugin + "' return status " + stringify(status->get()) + - ". Could not attach container: " + output.get()); + ". Could not attach/detach container: " + output.get()); } if (command == spec::CNI_CMD_ADD) { http://git-wip-us.apache.org/repos/asf/mesos/blob/a9c1c775/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp index 7fad707..25f49f4 100644 --- a/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp +++ b/src/slave/containerizer/mesos/isolators/network/cni/plugins/port_mapper/port_mapper.hpp @@ -43,6 +43,9 @@ public: // NOTE: Plugin specific erros should use Values of 100+. static constexpr int ERROR_READ_FAILURE = 100; // Fail to read from stdin. static constexpr int ERROR_BAD_ARGS = 101; // Miss or invalid arguments. + static constexpr int ERROR_DELEGATE_FAILURE = 102; + static constexpr int ERROR_PORTMAP_FAILURE = 103; + static constexpr int ERROR_UNSUPPORTED_COMMAND = 104; // Takes in a JSON formatted string, validates that the following // fields are present: @@ -97,7 +100,7 @@ protected: private: PortMapper( const std::string& _cniCommand, // ADD, DEL or VERSION. - const Option<std::string>& _cniContainerId, // Container ID. + const std::string& _cniContainerId, // Container ID. const std::string& _cniNetNs, // Path to network namespace file. const std::string& _cniIfName, // Interface name to set up. const Option<std::string>& _cniArgs, // Extra arguments. @@ -119,8 +122,26 @@ private: chain(_chain), excludeDevices(_excludeDevices){}; + // Returns a tag that will be appended to every DNAT rule in the + // iptables associated with this container. Currently the tag is of + // the form: 'container_id: <CNI_CONTAINERID>'. + std::string getIptablesRuleTag(); + + std::string getIptablesRule( + const net::IP& ip, + const mesos::NetworkInfo::PortMapping& portMapping); + + Try<Nothing> addPortMapping( + const net::IP& ip, + const mesos::NetworkInfo::PortMapping& portMapping); + + Try<Nothing> delPortMapping(); + + Try<std::string, spec::PluginError> handleAddCommand(); + Try<Nothing, spec::PluginError> handleDelCommand(); + const std::string cniCommand; - const Option<std::string> cniContainerId; + const std::string cniContainerId; const std::string cniNetNs; const std::string cniIfName; const Option<std::string> cniArgs;