[ 
https://issues.apache.org/jira/browse/CLOUDSTACK-9339?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15245479#comment-15245479
 ] 

Wei Zhou commented on CLOUDSTACK-9339:
--------------------------------------

Hi Dean,

I've applied the following patch to our internal version (based on 4.7.1):

{code}
diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py 
b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
index b4ed263..b0e2429 100755
--- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
+++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
@@ -27,7 +27,6 @@ from CsRoute import CsRoute
 from CsRule import CsRule

 VRRP_TYPES = ['guest']
-VPC_PUBLIC_INTERFACE = ['eth1']

 class CsAddress(CsDataBag):

@@ -323,7 +322,7 @@ class CsIP:
                 # If redundant only bring up public interfaces that are not 
eth1.
                 # Reason: private gateways are public interfaces.
                 # master.py and keepalived will deal with eth1 public 
interface.
-                if self.cl.is_redundant() and (not self.is_public() or 
(self.config.is_vpc() and self.getDevice() not in VPC_PUBLIC_INTERFACE)):
+                if self.cl.is_redundant() and not self.is_public():
                     CsHelper.execute(cmd2)
                 # if not redundant bring everything up
                 if not self.cl.is_redundant():
diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py 
b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
index 385204c..b6e3c7d 100755
--- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
+++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
@@ -41,6 +41,8 @@ from CsRoute import CsRoute
 import socket
 from time import sleep

+VPC_PUBLIC_INTERFACE = ['eth1']
+NETWORK_PUBLIC_INTERFACE = ['eth2']

 class CsRedundant(object):

@@ -193,6 +195,8 @@ class CsRedundant(object):
         if not proc.find() or keepalived_conf.is_changed() or 
force_keepalived_restart:
             keepalived_conf.commit()
             CsHelper.service("keepalived", "restart")
+        elif self.cl.is_master(): # Bring public interfaces up
+            self.bring_public_interfaces_up()

     def release_lock(self):
         try:
@@ -290,6 +294,27 @@ class CsRedundant(object):
         self.set_lock()
         logging.debug("Setting router to master")

+        self.bring_public_interfaces_up()
+
+        # ip route add default via $gw table Table_$dev proto static
+        cmd = "%s -C %s" % (self.CONNTRACKD_BIN, self.CONNTRACKD_CONF)
+        CsHelper.execute("%s -c" % cmd)
+        CsHelper.execute("%s -f" % cmd)
+        CsHelper.execute("%s -R" % cmd)
+        CsHelper.execute("%s -B" % cmd)
+        CsHelper.service("ipsec", "restart")
+        CsHelper.service("xl2tpd", "restart")
+        ads = [o for o in self.address.get_ips() if o.needs_vrrp()]
+        for o in ads:
+            CsPasswdSvc(o.get_gateway()).restart()
+
+        CsHelper.service("dnsmasq", "restart")
+        self.cl.set_master_state(True)
+        self.cl.save()
+        self.release_lock()
+        logging.info("Router switched to master mode")
+
+    def bring_public_interfaces_up(self):
         dev = ''
         ips = [ip for ip in self.address.get_ips() if ip.is_public()]
         route = CsRoute()
@@ -298,38 +323,27 @@ class CsRedundant(object):
                 continue
             dev = ip.get_device()
             logging.info("Will proceed configuring device ==> %s" % dev)
+            cmd1 = "ip link show %s | grep 'state UP'" % dev
             cmd2 = "ip link set %s up" % dev
             if CsDevice(dev, self.config).waitfordevice():
+                devUp = CsHelper.execute(cmd1)
+                if devUp:
+                    continue
                 CsHelper.execute(cmd2)
                 logging.info("Bringing public interface %s up" % dev)

                 try:
                     gateway = ip.get_gateway()
                     logging.info("Adding gateway ==> %s to device ==> %s" % 
(gateway, dev))
-                    route.add_defaultroute(gateway)
+                    if self.config.is_vpc() and dev in VPC_PUBLIC_INTERFACE:
+                        route.add_defaultroute(gateway)
+                    elif not self.config.is_vpc() and dev in 
NETWORK_PUBLIC_INTERFACE:
+                        route.add_defaultroute(gateway)
                 except:
                     logging.error("ERROR getting gateway from device %s" % dev)
             else:
                 logging.error("Device %s was not ready could not bring it up" 
% dev)

-        # ip route add default via $gw table Table_$dev proto static
-        cmd = "%s -C %s" % (self.CONNTRACKD_BIN, self.CONNTRACKD_CONF)
-        CsHelper.execute("%s -c" % cmd)
-        CsHelper.execute("%s -f" % cmd)
-        CsHelper.execute("%s -R" % cmd)
-        CsHelper.execute("%s -B" % cmd)
-        CsHelper.service("ipsec", "restart")
-        CsHelper.service("xl2tpd", "restart")
-        ads = [o for o in self.address.get_ips() if o.needs_vrrp()]
-        for o in ads:
-            CsPasswdSvc(o.get_gateway()).restart()
-
-        CsHelper.service("dnsmasq", "restart")
-        self.cl.set_master_state(True)
-        self.cl.save()
-        self.release_lock()
-        logging.info("Router switched to master mode")
-
     def _collect_ignore_ips(self):
         """
         This returns a list of ip objects that should be ignored
{code}

> Virtual Routers don't handle Multiple Public Interfaces
> -------------------------------------------------------
>
>                 Key: CLOUDSTACK-9339
>                 URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9339
>             Project: CloudStack
>          Issue Type: Bug
>      Security Level: Public(Anyone can view this level - this is the 
> default.) 
>          Components: Virtual Router
>    Affects Versions: 4.8.0
>            Reporter: dsclose
>              Labels: firewall, nat, router
>
> There are a series of issues with the way Virtual Routers manage multiple 
> public interfaces. These are more pronounced on redundant virtual router 
> setups. I have not attempted to examine these issues in a VPC context. 
> Outside of a VPC context, however, the following is expected behaviour:
> * eth0 connects the router to the guest network.
> * In RvR setups, keepalived manages the guests' gateway IP as a virtual IP on 
> eth0.
> * eth1 provides a local link to the hypervisor, allowing Cloudstack to issue 
> commands to the router.
> * eth2 is the routers public interface. By default, a single public IP will 
> be setup on eth2 along with the necessary iptables and ip rules to source-NAT 
> guest traffic to that public IP.
> * When a public IP address is assigned to the router that is on a separate 
> subnet to the source-NAT IP, a new interface is configured, such as eth3, and 
> the IP is assigned to that interface.
> * This can result in eth3, eth4, eth5, etc. being created depending upon how 
> many public subnets the router has to work with.
> The above all works. The following, however, is currently not working:
> * Public interfaces should be set to DOWN on backup redundant routers. The 
> master.py script is responsible for setting public interfaces to UP during a 
> keepalived transition. Currently the check_is_up method of the CsIP class 
> brings all interfaces UP on both RvR. A proposed fix for this has been 
> discussed on the mailing list. That fix will leave public interfaces DOWN on 
> RvR allowing the keepalived transition to control the state of public 
> interfaces. Issue #1413 includes a commit that contradicts the proposed fix 
> so it is unclear what the current state of the code should be.
> * Newly created interfaces should be set to UP on master redundant routers. 
> Assuming public interfaces should be default be DOWN on an RvR we need to 
> accommodate the fact that, as interfaces are created, no keepalived 
> transition occurs. This means that assigning an IP from a new public subnet 
> will have no effect (as the interface will be down) until the network is 
> restarted with a "clean up."
> * Public interfaces other than eth2 do not forward traffic. There are two 
> iptables rules in the FORWARD chain of the filter table created for eth2 that 
> allow forwarding between eth2 and eth0. Equivalent rules are not created for 
> other public interfaces so forwarded traffic is dropped.
> * Outbound traffic from guest VMs does not honour static-NAT rules. Instead, 
> outbound traffic is source-NAT'd to the networks default source-NAT IP. New 
> connections from guests that are destined for public networks are processed 
> like so:
> 1. Traffic is matched against the following rule in the mangle table that 
> marks the connection with a 0x0:
> *mangle
> -A PREROUTING -i eth0 -m state --state NEW -j CONNMARK --set-xmark 
> 0x0/0xffffffff
> 2. There are no "ip rule" statements that match a connection marked 0x0, so 
> the kernel routes the connection via the default gateway. That gateway is on 
> source-NAT subnet, so the connection is routed out of eth2.
> 3. The following iptables rules are then matched in the filter table:
> *filter
> -A FORWARD -i eth0 -o eth2 -j FW_OUTBOUND
> -A FW_OUTBOUND -j FW_EGRESS_RULES
> -A FW_EGRESS_RULES -j ACCEPT
> 4. Finally, the following rule is matched from the nat table, where the IP 
> address is the source-NAT IP:
> *nat
> -A POSTROUTING -o eth2 -j SNAT --to-source 123.4.5.67
>  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to