[
https://issues.apache.org/jira/browse/CLOUDSTACK-9339?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15245479#comment-15245479
]
Wei Zhou commented on CLOUDSTACK-9339:
--------------------------------------
Hi Dean,
I've applied the following patch to our internal version (based on 4.7.1):
{code}
diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
index b4ed263..b0e2429 100755
--- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
+++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py
@@ -27,7 +27,6 @@ from CsRoute import CsRoute
from CsRule import CsRule
VRRP_TYPES = ['guest']
-VPC_PUBLIC_INTERFACE = ['eth1']
class CsAddress(CsDataBag):
@@ -323,7 +322,7 @@ class CsIP:
# If redundant only bring up public interfaces that are not
eth1.
# Reason: private gateways are public interfaces.
# master.py and keepalived will deal with eth1 public
interface.
- if self.cl.is_redundant() and (not self.is_public() or
(self.config.is_vpc() and self.getDevice() not in VPC_PUBLIC_INTERFACE)):
+ if self.cl.is_redundant() and not self.is_public():
CsHelper.execute(cmd2)
# if not redundant bring everything up
if not self.cl.is_redundant():
diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
index 385204c..b6e3c7d 100755
--- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
+++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
@@ -41,6 +41,8 @@ from CsRoute import CsRoute
import socket
from time import sleep
+VPC_PUBLIC_INTERFACE = ['eth1']
+NETWORK_PUBLIC_INTERFACE = ['eth2']
class CsRedundant(object):
@@ -193,6 +195,8 @@ class CsRedundant(object):
if not proc.find() or keepalived_conf.is_changed() or
force_keepalived_restart:
keepalived_conf.commit()
CsHelper.service("keepalived", "restart")
+ elif self.cl.is_master(): # Bring public interfaces up
+ self.bring_public_interfaces_up()
def release_lock(self):
try:
@@ -290,6 +294,27 @@ class CsRedundant(object):
self.set_lock()
logging.debug("Setting router to master")
+ self.bring_public_interfaces_up()
+
+ # ip route add default via $gw table Table_$dev proto static
+ cmd = "%s -C %s" % (self.CONNTRACKD_BIN, self.CONNTRACKD_CONF)
+ CsHelper.execute("%s -c" % cmd)
+ CsHelper.execute("%s -f" % cmd)
+ CsHelper.execute("%s -R" % cmd)
+ CsHelper.execute("%s -B" % cmd)
+ CsHelper.service("ipsec", "restart")
+ CsHelper.service("xl2tpd", "restart")
+ ads = [o for o in self.address.get_ips() if o.needs_vrrp()]
+ for o in ads:
+ CsPasswdSvc(o.get_gateway()).restart()
+
+ CsHelper.service("dnsmasq", "restart")
+ self.cl.set_master_state(True)
+ self.cl.save()
+ self.release_lock()
+ logging.info("Router switched to master mode")
+
+ def bring_public_interfaces_up(self):
dev = ''
ips = [ip for ip in self.address.get_ips() if ip.is_public()]
route = CsRoute()
@@ -298,38 +323,27 @@ class CsRedundant(object):
continue
dev = ip.get_device()
logging.info("Will proceed configuring device ==> %s" % dev)
+ cmd1 = "ip link show %s | grep 'state UP'" % dev
cmd2 = "ip link set %s up" % dev
if CsDevice(dev, self.config).waitfordevice():
+ devUp = CsHelper.execute(cmd1)
+ if devUp:
+ continue
CsHelper.execute(cmd2)
logging.info("Bringing public interface %s up" % dev)
try:
gateway = ip.get_gateway()
logging.info("Adding gateway ==> %s to device ==> %s" %
(gateway, dev))
- route.add_defaultroute(gateway)
+ if self.config.is_vpc() and dev in VPC_PUBLIC_INTERFACE:
+ route.add_defaultroute(gateway)
+ elif not self.config.is_vpc() and dev in
NETWORK_PUBLIC_INTERFACE:
+ route.add_defaultroute(gateway)
except:
logging.error("ERROR getting gateway from device %s" % dev)
else:
logging.error("Device %s was not ready could not bring it up"
% dev)
- # ip route add default via $gw table Table_$dev proto static
- cmd = "%s -C %s" % (self.CONNTRACKD_BIN, self.CONNTRACKD_CONF)
- CsHelper.execute("%s -c" % cmd)
- CsHelper.execute("%s -f" % cmd)
- CsHelper.execute("%s -R" % cmd)
- CsHelper.execute("%s -B" % cmd)
- CsHelper.service("ipsec", "restart")
- CsHelper.service("xl2tpd", "restart")
- ads = [o for o in self.address.get_ips() if o.needs_vrrp()]
- for o in ads:
- CsPasswdSvc(o.get_gateway()).restart()
-
- CsHelper.service("dnsmasq", "restart")
- self.cl.set_master_state(True)
- self.cl.save()
- self.release_lock()
- logging.info("Router switched to master mode")
-
def _collect_ignore_ips(self):
"""
This returns a list of ip objects that should be ignored
{code}
> Virtual Routers don't handle Multiple Public Interfaces
> -------------------------------------------------------
>
> Key: CLOUDSTACK-9339
> URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9339
> Project: CloudStack
> Issue Type: Bug
> Security Level: Public(Anyone can view this level - this is the
> default.)
> Components: Virtual Router
> Affects Versions: 4.8.0
> Reporter: dsclose
> Labels: firewall, nat, router
>
> There are a series of issues with the way Virtual Routers manage multiple
> public interfaces. These are more pronounced on redundant virtual router
> setups. I have not attempted to examine these issues in a VPC context.
> Outside of a VPC context, however, the following is expected behaviour:
> * eth0 connects the router to the guest network.
> * In RvR setups, keepalived manages the guests' gateway IP as a virtual IP on
> eth0.
> * eth1 provides a local link to the hypervisor, allowing Cloudstack to issue
> commands to the router.
> * eth2 is the routers public interface. By default, a single public IP will
> be setup on eth2 along with the necessary iptables and ip rules to source-NAT
> guest traffic to that public IP.
> * When a public IP address is assigned to the router that is on a separate
> subnet to the source-NAT IP, a new interface is configured, such as eth3, and
> the IP is assigned to that interface.
> * This can result in eth3, eth4, eth5, etc. being created depending upon how
> many public subnets the router has to work with.
> The above all works. The following, however, is currently not working:
> * Public interfaces should be set to DOWN on backup redundant routers. The
> master.py script is responsible for setting public interfaces to UP during a
> keepalived transition. Currently the check_is_up method of the CsIP class
> brings all interfaces UP on both RvR. A proposed fix for this has been
> discussed on the mailing list. That fix will leave public interfaces DOWN on
> RvR allowing the keepalived transition to control the state of public
> interfaces. Issue #1413 includes a commit that contradicts the proposed fix
> so it is unclear what the current state of the code should be.
> * Newly created interfaces should be set to UP on master redundant routers.
> Assuming public interfaces should be default be DOWN on an RvR we need to
> accommodate the fact that, as interfaces are created, no keepalived
> transition occurs. This means that assigning an IP from a new public subnet
> will have no effect (as the interface will be down) until the network is
> restarted with a "clean up."
> * Public interfaces other than eth2 do not forward traffic. There are two
> iptables rules in the FORWARD chain of the filter table created for eth2 that
> allow forwarding between eth2 and eth0. Equivalent rules are not created for
> other public interfaces so forwarded traffic is dropped.
> * Outbound traffic from guest VMs does not honour static-NAT rules. Instead,
> outbound traffic is source-NAT'd to the networks default source-NAT IP. New
> connections from guests that are destined for public networks are processed
> like so:
> 1. Traffic is matched against the following rule in the mangle table that
> marks the connection with a 0x0:
> *mangle
> -A PREROUTING -i eth0 -m state --state NEW -j CONNMARK --set-xmark
> 0x0/0xffffffff
> 2. There are no "ip rule" statements that match a connection marked 0x0, so
> the kernel routes the connection via the default gateway. That gateway is on
> source-NAT subnet, so the connection is routed out of eth2.
> 3. The following iptables rules are then matched in the filter table:
> *filter
> -A FORWARD -i eth0 -o eth2 -j FW_OUTBOUND
> -A FW_OUTBOUND -j FW_EGRESS_RULES
> -A FW_EGRESS_RULES -j ACCEPT
> 4. Finally, the following rule is matched from the nat table, where the IP
> address is the source-NAT IP:
> *nat
> -A POSTROUTING -o eth2 -j SNAT --to-source 123.4.5.67
>
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)