Repository: mesos
Updated Branches:
  refs/heads/master 8327209a4 -> b872ccd08


Generate u32 filter handle in user level to work around MESOS-1617.

Review: https://reviews.apache.org/r/23701


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b872ccd0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b872ccd0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b872ccd0

Branch: refs/heads/master
Commit: b872ccd0863a05807992791304bb61117ed2f733
Parents: 8327209
Author: Jie Yu <yujie....@gmail.com>
Authored: Fri Jul 18 16:52:36 2014 -0700
Committer: Jie Yu <yujie....@gmail.com>
Committed: Mon Jul 21 13:52:46 2014 -0700

----------------------------------------------------------------------
 src/linux/routing/filter/arp.cpp      |   3 +
 src/linux/routing/filter/filter.hpp   |  17 +++-
 src/linux/routing/filter/handle.hpp   |  71 ++++++++++++++++
 src/linux/routing/filter/icmp.cpp     |   3 +
 src/linux/routing/filter/internal.hpp | 125 ++++++++++++++++++++++++++++-
 src/linux/routing/filter/ip.cpp       |  21 +++++
 src/linux/routing/filter/ip.hpp       |  12 +++
 src/tests/routing_tests.cpp           |  74 +++++++++++++++++
 8 files changed, 321 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/arp.cpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/arp.cpp b/src/linux/routing/filter/arp.cpp
index c18aebb..b794a73 100644
--- a/src/linux/routing/filter/arp.cpp
+++ b/src/linux/routing/filter/arp.cpp
@@ -124,6 +124,7 @@ Try<bool> create(
           parent,
           Classifier(),
           priority,
+          None(),
           redirect));
 }
 
@@ -140,6 +141,7 @@ Try<bool> create(
           parent,
           Classifier(),
           priority,
+          None(),
           mirror));
 }
 
@@ -161,6 +163,7 @@ Try<bool> update(
           parent,
           Classifier(),
           None(),
+          None(),
           mirror));
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/filter.hpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/filter.hpp 
b/src/linux/routing/filter/filter.hpp
index 40e21db..a603d73 100644
--- a/src/linux/routing/filter/filter.hpp
+++ b/src/linux/routing/filter/filter.hpp
@@ -26,6 +26,7 @@
 #include <stout/option.hpp>
 
 #include "linux/routing/filter/action.hpp"
+#include "linux/routing/filter/handle.hpp"
 #include "linux/routing/filter/priority.hpp"
 
 #include "linux/routing/queueing/handle.hpp"
@@ -51,20 +52,24 @@ public:
   // Creates a filter with no action.
   Filter(const queueing::Handle& _parent,
          const Classifier& _classifier,
-         const Option<Priority>& _priority)
+         const Option<Priority>& _priority,
+         const Option<Handle>& _handle)
     : parent_(_parent),
       classifier_(_classifier),
-      priority_(_priority) {}
+      priority_(_priority),
+      handle_(_handle) {}
 
   // TODO(jieyu): Support arbitrary number of actions.
   template <typename Action>
   Filter(const queueing::Handle& _parent,
          const Classifier& _classifier,
          const Option<Priority>& _priority,
+         const Option<Handle>& _handle,
          const Action& action)
     : parent_(_parent),
       classifier_(_classifier),
-      priority_(_priority)
+      priority_(_priority),
+      handle_(_handle)
   {
     attach(action);
   }
@@ -76,9 +81,10 @@ public:
     actions_.push_back(process::Shared<action::Action>(new A(action)));
   }
 
-  const queueing::Handle parent() const { return parent_; }
+  const queueing::Handle& parent() const { return parent_; }
   const Classifier& classifier() const { return classifier_; }
   const Option<Priority>& priority() const { return priority_; }
+  const Option<Handle>& handle() const { return handle_; }
 
   // Returns all the actions attached to this filter.
   const std::vector<process::Shared<action::Action> >& actions() const
@@ -97,6 +103,9 @@ private:
   // The priority of this filter.
   Option<Priority> priority_;
 
+  // The handle of this filter.
+  Option<Handle> handle_;
+
   // The set of actions attached to this filer. Note that we use
   // Shared here to make Filter copyable.
   std::vector<process::Shared<action::Action> > actions_;

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/handle.hpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/handle.hpp 
b/src/linux/routing/filter/handle.hpp
new file mode 100644
index 0000000..0381933
--- /dev/null
+++ b/src/linux/routing/filter/handle.hpp
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LINUX_ROUTING_FILTER_HANDLE_HPP__
+#define __LINUX_ROUTING_FILTER_HANDLE_HPP__
+
+#include <stdint.h>
+
+namespace routing {
+namespace filter {
+
+// Represents the handle for a traffic control (tc) filter. Different
+// types of filters have different types of handles. This is the base
+// class of all types of filters.
+class Handle
+{
+public:
+  explicit Handle(uint32_t _handle) : handle(_handle) {}
+  virtual ~Handle() {}
+
+  uint32_t get() const { return handle; }
+
+protected:
+  uint32_t handle;
+};
+
+
+// Represents a u32 filter handle. A u32 filter handle has three
+// parts. The first number identifies a hash table, the second number
+// identifies a bucket within the hash table, and the third number
+// identifies the filter item within the bucket.
+// http://ace-host.stuart.id.au/russell/files/tc/doc/cls_u32.txt
+class U32Handle : public Handle
+{
+public:
+  explicit U32Handle(uint32_t _handle) : Handle(_handle) {}
+
+  // The format of a u32 filter handle.
+  // +------------+--------+------------+
+  // |   htid     |  hash  |    node    |
+  // +------------+--------+------------+
+  //     12 bits    8 bits     12 bits
+  U32Handle(uint32_t htid, uint32_t hash, uint32_t node)
+    : Handle(((htid & 0xfff) << 20) + ((hash & 0xff) << 12) + (node & 0xfff)) 
{}
+
+  virtual ~U32Handle() {}
+
+  uint32_t htid() const { return handle >> 20; }
+  uint32_t hash() const { return (handle & 0x000ff000) >> 12; }
+  uint32_t node() const { return handle & 0x00000fff; }
+};
+
+} // namespace filter {
+} // namespace routing {
+
+#endif // __LINUX_ROUTING_FILTER_HANDLE_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/icmp.cpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/icmp.cpp 
b/src/linux/routing/filter/icmp.cpp
index 8e6f013..12b9eb7 100644
--- a/src/linux/routing/filter/icmp.cpp
+++ b/src/linux/routing/filter/icmp.cpp
@@ -220,6 +220,7 @@ Try<bool> create(
           parent,
           classifier,
           priority,
+          None(),
           redirect));
 }
 
@@ -237,6 +238,7 @@ Try<bool> create(
           parent,
           classifier,
           priority,
+          None(),
           mirror));
 }
 
@@ -262,6 +264,7 @@ Try<bool> update(
           parent,
           classifier,
           None(),
+          None(),
           mirror));
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/internal.hpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/internal.hpp 
b/src/linux/routing/filter/internal.hpp
index 6836d1f..a970613 100644
--- a/src/linux/routing/filter/internal.hpp
+++ b/src/linux/routing/filter/internal.hpp
@@ -42,6 +42,8 @@
 
 #include <stout/error.hpp>
 #include <stout/foreach.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
 #include <stout/none.hpp>
 #include <stout/nothing.hpp>
 #include <stout/option.hpp>
@@ -271,6 +273,85 @@ inline Try<Nothing> attach(
 }
 
 
+// Generates the handle for the given filter on the link. Returns none
+// if we decide to let the kernel choose the handle.
+template <typename Classifier>
+Result<U32Handle> generateU32Handle(
+    const Netlink<struct rtnl_link>& link,
+    const Filter<Classifier>& filter)
+{
+  // If the user does not specify a priority, we have no choice but
+  // let the kernel choose the handle because we do not know the
+  // 'htid' that is associated with that priority.
+  if (filter.priority().isNone()) {
+    return None();
+  }
+
+  // Scan all the filters attached to the given parent on the link.
+  Try<Netlink<struct nl_sock> > sock = routing::socket();
+  if (sock.isError()) {
+    return Error(sock.error());
+  }
+
+  // Dump all the libnl filters (i.e., rtnl_cls) attached to the given
+  // parent on the link.
+  struct nl_cache* c = NULL;
+  int err = rtnl_cls_alloc_cache(
+      sock.get().get(),
+      rtnl_link_get_ifindex(link.get()),
+      filter.parent().get(),
+      &c);
+
+  if (err != 0) {
+    return Error(
+        "Failed to get filter info from kernel: " +
+        std::string(nl_geterror(err)));
+  }
+
+  Netlink<struct nl_cache> cache(c);
+
+  // A map from priority to the corresponding 'htid'.
+  hashmap<uint16_t, uint32_t> htids;
+
+  // A map from 'htid' to a set of already used nodes.
+  hashmap<uint32_t, hashset<uint32_t> > nodes;
+
+  for (struct nl_object* o = nl_cache_get_first(cache.get());
+       o != NULL; o = nl_cache_get_next(o)) {
+    struct rtnl_cls* cls = (struct rtnl_cls*) o;
+
+    // Only look at u32 filters. For other type of filters, their
+    // handles are generated by the kernel correctly.
+    if (rtnl_tc_get_kind(TC_CAST(cls)) == std::string("u32")) {
+      U32Handle handle(rtnl_tc_get_handle(TC_CAST(cls)));
+
+      htids[rtnl_cls_get_prio(cls)] = handle.htid();
+      nodes[handle.htid()].insert(handle.node());
+    }
+  }
+
+  // If this filter has a new priority, we need to let the kernel
+  // decide the handle because we don't know which 'htid' this
+  // priority will be associated with.
+  if (!htids.contains(filter.priority().get().get())) {
+    return None();
+  }
+
+  // NOTE: By default, kernel will choose to use divisor 1, which
+  // means all filters will be in hash bucket 0. Also, kernel assigns
+  // node id starting from 0x800 by default. Here, we keep the same
+  // semantics as kernel.
+  uint32_t htid = htids[filter.priority().get().get()];
+  for (uint32_t node = 0x800; node <= 0xfff; node++) {
+    if (!nodes[htid].contains(node)) {
+      return U32Handle(htid, 0x0, node);
+    }
+  }
+
+  return Error("No available handle exists");
+}
+
+
 // Encodes a filter (in our representation) to a libnl filter
 // (rtnl_cls). We use template here so that it works for any type of
 // classifier.
@@ -308,6 +389,31 @@ Try<Netlink<struct rtnl_cls> > encodeFilter(
     }
   }
 
+  // Encode the handle.
+  if (filter.handle().isSome()) {
+    rtnl_tc_set_handle(TC_CAST(cls.get()), filter.handle().get().get());
+  } else {
+    // NOTE: This is a workaround for MESOS-1617. Normally, if the
+    // user does not specify the handle for a filter, the kernel will
+    // generate one automatically. However, for u32 filters, the
+    // existing kernel is buggy in the sense that it will generate a
+    // handle that is already used by some other u32 filter (see the
+    // ticket for details). To address that, we explicitly set the
+    // handle of the filter by picking an unused handle.
+    // TODO(jieyu): Revisit this once the kernel bug is fixed.
+    if (rtnl_tc_get_kind(TC_CAST(cls.get())) == std::string("u32")) {
+      Result<U32Handle> handle = generateU32Handle(link, filter);
+      if (handle.isError()) {
+        return Error("Failed to find an unused u32 handle: " + handle.error());
+      }
+
+      // If 'handle' is none, let the kernel choose the handle.
+      if (handle.isSome()) {
+        rtnl_tc_set_handle(TC_CAST(cls.get()), handle.get().get());
+      }
+    }
+  }
+
   return cls;
 }
 
@@ -333,6 +439,11 @@ Result<Filter<Classifier> > decodeFilter(const 
Netlink<struct rtnl_cls>& cls)
   // always have a valid priority here.
   Priority priority(rtnl_cls_get_prio(cls.get()));
 
+  // Decode the handle. If the handle is not specified by the user,
+  // kernel will assign a handle to the filter. So we should always
+  // have a valid handle here.
+  Handle handle(rtnl_tc_get_handle(TC_CAST(cls.get())));
+
   // Decode the classifier using a classifier specific function.
   Result<Classifier> classifier = decode<Classifier>(cls);
   if (classifier.isError()) {
@@ -344,7 +455,7 @@ Result<Filter<Classifier> > decodeFilter(const 
Netlink<struct rtnl_cls>& cls)
   // TODO(jieyu): Decode all the actions attached to the filter.
   // Currently, libnl does not support that (but will support that in
   // the future).
-  return Filter<Classifier>(parent, classifier.get(), priority);
+  return Filter<Classifier>(parent, classifier.get(), priority, handle);
 }
 
 /////////////////////////////////////////////////
@@ -581,6 +692,18 @@ Try<bool> update(const std::string& _link, const 
Filter<Classifier>& filter)
         stringify(filter.priority().get().get()));
   }
 
+  // The kernel does not allow us to update the handle. So if the user
+  // specifies a handle, we will check to make sure they match.
+  if (filter.handle().isSome() &&
+      filter.handle().get().get() !=
+        rtnl_tc_get_handle(TC_CAST(oldCls.get().get()))) {
+    return Error(
+        "The handles do not match. The old handle is " +
+        stringify(rtnl_tc_get_handle(TC_CAST(oldCls.get().get()))) +
+        " and the new handle is " +
+        stringify(filter.handle().get().get()));
+  }
+
   Try<Netlink<struct rtnl_cls> > newCls = encodeFilter(link.get(), filter);
   if (newCls.isError()) {
     return Error("Failed to encode the new filter: " + newCls.error());

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/ip.cpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/ip.cpp b/src/linux/routing/filter/ip.cpp
index 43f1f7f..74525a9 100644
--- a/src/linux/routing/filter/ip.cpp
+++ b/src/linux/routing/filter/ip.cpp
@@ -472,6 +472,26 @@ Try<bool> create(
           parent,
           classifier,
           priority,
+          None(),
+          redirect));
+}
+
+
+Try<bool> create(
+    const string& link,
+    const queueing::Handle& parent,
+    const Classifier& classifier,
+    const Option<Priority>& priority,
+    const Option<Handle>& handle,
+    const action::Redirect& redirect)
+{
+  return internal::create(
+      link,
+      Filter<Classifier>(
+          parent,
+          classifier,
+          priority,
+          handle,
           redirect));
 }
 
@@ -489,6 +509,7 @@ Try<bool> create(
           parent,
           classifier,
           priority,
+          None(),
           terminal));
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/linux/routing/filter/ip.hpp
----------------------------------------------------------------------
diff --git a/src/linux/routing/filter/ip.hpp b/src/linux/routing/filter/ip.hpp
index 11e417d..566fd1e 100644
--- a/src/linux/routing/filter/ip.hpp
+++ b/src/linux/routing/filter/ip.hpp
@@ -158,6 +158,18 @@ Try<bool> create(
     const action::Redirect& redirect);
 
 
+// Same as above, but allow the user to specify the handle. This
+// interface is exposed only for testing MESOS-1617.
+// TODO(jieyu): Revisit this once the kernel bug is fixed.
+Try<bool> create(
+    const std::string& link,
+    const queueing::Handle& parent,
+    const Classifier& classifier,
+    const Option<Priority>& priority,
+    const Option<Handle>& handle,
+    const action::Redirect& redirect);
+
+
 // Creates an IP packet filter attached to the given parent on the
 // link which will stop the IP packets from being sent to the next
 // filter. Returns false if an IP packet filter attached to the given

http://git-wip-us.apache.org/repos/asf/mesos/blob/b872ccd0/src/tests/routing_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/routing_tests.cpp b/src/tests/routing_tests.cpp
index dcc4982..020676c 100644
--- a/src/tests/routing_tests.cpp
+++ b/src/tests/routing_tests.cpp
@@ -993,3 +993,77 @@ TEST_F(RoutingVethTest, ROOT_IPFilterRemove)
   ASSERT_SOME(classifiers);
   EXPECT_EQ(0u, classifiers.get().size());
 }
+
+
+// Test the workaround introduced for MESOS-1617.
+TEST_F(RoutingVethTest, ROOT_HandleGeneration)
+{
+  ASSERT_SOME(link::create(TEST_VETH_LINK, TEST_PEER_LINK, None()));
+
+  EXPECT_SOME_TRUE(link::exists(TEST_VETH_LINK));
+  EXPECT_SOME_TRUE(link::exists(TEST_PEER_LINK));
+
+  ASSERT_SOME_TRUE(ingress::create(TEST_VETH_LINK));
+
+  Result<net::MAC> mac = net::mac(TEST_VETH_LINK);
+  ASSERT_SOME(mac);
+
+  net::IP ip = net::IP(0x01020304); // 1.2.3.4
+
+  Try<ip::PortRange> sourcePorts1 =
+    ip::PortRange::fromBeginEnd(1024, 1027);
+
+  ASSERT_SOME(sourcePorts1);
+
+  Try<ip::PortRange> destinationPorts1 =
+    ip::PortRange::fromBeginEnd(2000, 2000);
+
+  ASSERT_SOME(destinationPorts1);
+
+  Try<ip::PortRange> sourcePorts2 =
+    ip::PortRange::fromBeginEnd(3024, 3025);
+
+  ASSERT_SOME(sourcePorts2);
+
+  Try<ip::PortRange> destinationPorts2 =
+    ip::PortRange::fromBeginEnd(4000, 4003);
+
+  ASSERT_SOME(destinationPorts2);
+
+  ip::Classifier classifier1 =
+    ip::Classifier(
+        mac.get(),
+        ip,
+        sourcePorts1.get(),
+        destinationPorts1.get());
+
+  ip::Classifier classifier2 =
+    ip::Classifier(
+        mac.get(),
+        ip,
+        sourcePorts2.get(),
+        destinationPorts2.get());
+
+  // Use handle 800:00:fff for the first filter.
+  EXPECT_SOME_TRUE(ip::create(
+      TEST_VETH_LINK,
+      ingress::HANDLE,
+      classifier1,
+      Priority(2, 1),
+      U32Handle(0x800, 0x0, 0xfff),
+      action::Redirect(TEST_PEER_LINK)));
+
+  // With the workaround, this filter should be assigned a handle
+  // different than 800:00:fff.
+  EXPECT_SOME_TRUE(ip::create(
+      TEST_VETH_LINK,
+      ingress::HANDLE,
+      classifier2,
+      Priority(2, 1),
+      action::Redirect(TEST_PEER_LINK)));
+
+  // Try to remove the second filter. If we don't have the workaround,
+  // removing the second filter will return false since the kernel
+  // will find the handle matches the first filter.
+  EXPECT_SOME_TRUE(ip::remove(TEST_VETH_LINK, ingress::HANDLE, classifier2));
+}

Reply via email to