On Dec 19, 2007 6:00 PM, Damien Thébault <[EMAIL PROTECTED]> wrote:
> Hello,
>
> I sent the quoted mail to linux-net with my problem yesterday, but I
> did a git bisect today and I got the following output :
>
> > 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8 is first bad commit
> > commit 2bf540b73ed5b304e84bb4d4c390d49d1cfa0ef8
> > Author: Patrick McHardy <[EMAIL PROTECTED]>
> > Date: Wed Dec 13 16:54:25 2006 -0800
> >
> > [NETFILTER]: bridge-netfilter: remove deferred hooks
> >
> > Remove the deferred hooks and all related code as scheduled in
> > feature-removal-schedule.
> >
> > Signed-off-by: Patrick McHardy <[EMAIL PROTECTED]>
> > Signed-off-by: David S. Miller <[EMAIL PROTECTED]>
> >
> > :040000 040000 c49ea947455937566b6129991dde5e86f2453aae
> > 6611736ce5c0fcde7627494b66b9ea94e37ea42e M Documentation
> > :040000 040000 d0dd0700fe68f98b52687be3a0c31d73f7b15b81
> > f8ddf15a0389c5f5b7f2c11d7d0db039a660e1d5 M include
> > :040000 040000 dafccf7ff8657be9adca6b28dbd365cdd6c01ca5
> > 3eeb1cb4b16cc5cb698ab559b47ea6b0991d4d3a M net
>
This morning I reverted the patch and ported it to work with the
net-2.6.25 and 2.6.23 kernels. With it, the behaviour seems to be good
again (I didn't test a lot so I don't know if anything else is broken
by this).
I don't know yet if it solves my RTSP conntrack problem too.
(Yes I know this is not really the good way to handle this, but since
the removal of the deferred hooks before 2.6.20, there was a lot of
changes in this area, so I just tried to see if it was working with
the current kernel)
I'm attaching the two patchs if anyone needs it.
--
Damien Thebault
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 0ae682b..919f331 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -181,6 +181,22 @@ Who: Nick Piggin <[EMAIL PROTECTED]>
---------------------------
+What: Bridge netfilter deferred IPv4/IPv6 output hook calling
+When: January 2007
+Why: The deferred output hooks are a layering violation causing unusual
+ and broken behaviour on bridge devices. Examples of things they
+ break include QoS classifation using the MARK or CLASSIFY targets,
+ the IPsec policy match and connection tracking with VLANs on a
+ bridge. Their only use is to enable bridge output port filtering
+ within iptables with the physdev match, which can also be done by
+ combining iptables and ebtables using netfilter marks. Until it
+ will get removed the hook deferral is disabled by default and is
+ only enabled when needed.
+
+Who: Patrick McHardy <[EMAIL PROTECTED]>
+
+---------------------------
+
What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
When: October 2008
Why: The stacking of class devices makes these values misleading and
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 499aa93..d9487b9 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -83,6 +83,7 @@ struct bridge_skb_cb {
} daddr;
};
+extern int brnf_deferred_hooks;
#else
#define nf_bridge_maybe_copy_header(skb) (0)
#define nf_bridge_pad(skb) (0)
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 9a10092..b96952a 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -36,6 +36,7 @@
#define NFC_IP_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP_PROTO_UNKNOWN 0x2000
+#endif /* ! __KERNEL__ */
/* IP Hooks */
/* After promisc drops, checksum checks. */
@@ -49,7 +50,6 @@
/* Packets about to hit the wire. */
#define NF_IP_POST_ROUTING 4
#define NF_IP_NUMHOOKS 5
-#endif /* ! __KERNEL__ */
enum nf_ip_hook_priorities {
NF_IP_PRI_FIRST = INT_MIN,
@@ -57,8 +57,10 @@ enum nf_ip_hook_priorities {
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
NF_IP_PRI_CONNTRACK = -200,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
NF_IP_PRI_MANGLE = -150,
NF_IP_PRI_NAT_DST = -100,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
NF_IP_PRI_FILTER = 0,
NF_IP_PRI_NAT_SRC = 100,
NF_IP_PRI_SELINUX_LAST = 225,
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 3475a65..07133c9 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -40,6 +40,7 @@
#define NFC_IP6_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP6_PROTO_UNKNOWN 0x2000
+#endif /* ! __KERNEL__ */
/* IP6 Hooks */
/* After promisc drops, checksum checks. */
@@ -53,7 +54,6 @@
/* Packets about to hit the wire. */
#define NF_IP6_POST_ROUTING 4
#define NF_IP6_NUMHOOKS 5
-#endif /* ! __KERNEL__ */
enum nf_ip6_hook_priorities {
@@ -61,8 +61,10 @@ enum nf_ip6_hook_priorities {
NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
NF_IP6_PRI_SELINUX_FIRST = -225,
NF_IP6_PRI_CONNTRACK = -200,
+ NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
NF_IP6_PRI_MANGLE = -150,
NF_IP6_PRI_NAT_DST = -100,
+ NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
NF_IP6_PRI_FILTER = 0,
NF_IP6_PRI_NAT_SRC = 100,
NF_IP6_PRI_SELINUX_LAST = 225,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 32ac035..86131b4 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -65,6 +65,9 @@ static int brnf_filter_pppoe_tagged __read_mostly = 1;
#define brnf_filter_pppoe_tagged 1
#endif
+int brnf_deferred_hooks;
+EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
+
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -689,46 +692,109 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
return NF_STOLEN;
}
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_push(skb, VLAN_HLEN);
+ skb->network_header -= VLAN_HLEN;
+ }
+
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+ return 0;
+}
+
+/* This function sees both locally originated IP packets and forwarded
* IP packets (in both cases the destination device is a bridge
* device). It also sees bridged-and-DNAT'ed packets.
+ * To be able to filter on the physical bridge devices (with the physdev
+ * module), we steal packets destined to a bridge device away from the
+ * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
+ * when we have determined the real output device. This is done in here.
*
* If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
* and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
* will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
* NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
* will be executed.
- */
+ * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * nf_bridge->physindev set. */
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct net_device *realindev;
+ struct net_device *realindev, *realoutdev;
struct nf_bridge_info *nf_bridge;
+ int pf;
if (!skb->nf_bridge)
return NF_ACCEPT;
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ pf = PF_INET;
+ else
+ pf = PF_INET6;
+
nf_bridge = skb->nf_bridge;
- if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
- return NF_ACCEPT;
+ nf_bridge->physoutdev = skb->dev;
+ realindev = nf_bridge->physindev;
/* Bridged, take PF_BRIDGE/FORWARD.
* (see big note in front of br_nf_pre_routing_finish) */
- nf_bridge->physoutdev = skb->dev;
- realindev = nf_bridge->physindev;
+ if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
+ if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->mask ^= BRNF_PKT_TYPE;
+ }
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_push(skb, VLAN_HLEN);
+ skb->network_header -= VLAN_HLEN;
+ }
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+ skb->dev, br_forward_finish);
+ goto out;
}
- nf_bridge_push_encap_header(skb);
+ realoutdev = bridge_parent(skb->dev);
+ if (!realoutdev)
+ return NF_DROP;
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ /* iptables should match -o br0.x */
+ if (nf_bridge->netoutdev)
+ realoutdev = nf_bridge->netoutdev;
+#endif
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_pull(skb, VLAN_HLEN);
+ skb->network_header += VLAN_HLEN;
+ }
+ /* IP forwarded traffic has a physindev, locally
+ * generated traffic hasn't. */
+ if (realindev != NULL) {
+ if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
+ struct net_device *parent = bridge_parent(realindev);
+ if (parent)
+ realindev = parent;
+ }
- NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
- br_forward_finish);
+ NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
+ realoutdev, br_nf_local_out_finish,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+ } else {
+ NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
+ realoutdev, br_nf_local_out_finish,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+ }
+
+out:
return NF_STOLEN;
}
@@ -834,6 +900,67 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
return NF_ACCEPT;
}
+/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
+ * and PF_INET(6)/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined nf_bridge->physoutdev. */
+static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if ((out->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_forward_finish &&
+ okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+ VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+#endif
+ ) {
+ struct nf_bridge_info *nf_bridge;
+
+ if (!skb->nf_bridge) {
+#ifdef CONFIG_SYSCTL
+ /* This code is executed while in the IP(v6) stack,
+ the version should be 4 or 6. We can't use
+ skb->protocol because that isn't set on
+ PF_INET(6)/LOCAL_OUT. */
+ struct iphdr *ip = ip_hdr(skb);
+
+ if (ip->version == 4 && !brnf_call_iptables)
+ return NF_ACCEPT;
+ else if (ip->version == 6 && !brnf_call_ip6tables)
+ return NF_ACCEPT;
+ else if (!brnf_deferred_hooks)
+ return NF_ACCEPT;
+#endif
+ if (hook == NF_IP_POST_ROUTING)
+ return NF_ACCEPT;
+ if (!nf_bridge_alloc(skb))
+ return NF_DROP;
+ }
+
+ nf_bridge = skb->nf_bridge;
+
+ /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+ * will need the indev then. For a brouter, the real indev
+ * can be a bridge port, so we make sure br_nf_local_out()
+ * doesn't use the bridge parent of the indev by using
+ * the BRNF_DONT_TAKE_PARENT mask. */
+ if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
+ nf_bridge->mask |= BRNF_DONT_TAKE_PARENT;
+ nf_bridge->physindev = (struct net_device *)in;
+ }
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ /* the iptables outdev is br0.x, not br0 */
+ if (out->priv_flags & IFF_802_1Q_VLAN)
+ nf_bridge->netoutdev = (struct net_device *)out;
+#endif
+ return NF_STOP;
+ }
+
+ return NF_ACCEPT;
+}
+
/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
* PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
@@ -879,6 +1006,36 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
.pf = PF_INET6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_FORWARD,
+ .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_FORWARD,
+ .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_LOCAL_OUT,
+ .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_FIRST, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_POST_ROUTING,
+ .priority = NF_IP6_PRI_FIRST, },
};
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 678b683..6e7eb9b 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -104,16 +104,20 @@ physdev_mt_check(const char *tablename, const void *ip,
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return false;
- if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+ if (brnf_deferred_hooks == 0 &&
+ info->bitmask & XT_PHYSDEV_OP_OUT &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
(1 << NF_INET_POST_ROUTING))) {
printk(KERN_WARNING "physdev match: using --physdev-out in the "
"OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
- "traffic is not supported anymore.\n");
- if (hook_mask & (1 << NF_INET_LOCAL_OUT))
- return false;
+ "traffic is deprecated and breaks other things, it will "
+ "be removed in January 2007. See Documentation/"
+ "feature-removal-schedule.txt for details. This doesn't "
+ "affect you in case you're using it for purely bridged "
+ "traffic.\n");
+ brnf_deferred_hooks = 1;
}
return true;
}
Index: Documentation/feature-removal-schedule.txt
===================================================================
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -154,6 +154,22 @@
---------------------------
+What: Bridge netfilter deferred IPv4/IPv6 output hook calling
+When: January 2007
+Why: The deferred output hooks are a layering violation causing unusual
+ and broken behaviour on bridge devices. Examples of things they
+ break include QoS classifation using the MARK or CLASSIFY targets,
+ the IPsec policy match and connection tracking with VLANs on a
+ bridge. Their only use is to enable bridge output port filtering
+ within iptables with the physdev match, which can also be done by
+ combining iptables and ebtables using netfilter marks. Until it
+ will get removed the hook deferral is disabled by default and is
+ only enabled when needed.
+
+Who: Patrick McHardy <[EMAIL PROTECTED]>
+
+---------------------------
+
What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
When: October 2008
Why: The stacking of class devices makes these values misleading and
Index: include/linux/netfilter_bridge.h
===================================================================
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -82,6 +82,7 @@
} daddr;
};
+extern int brnf_deferred_hooks;
#else
#define nf_bridge_maybe_copy_header(skb) (0)
#define nf_bridge_pad(skb) (0)
Index: include/linux/netfilter_ipv4.h
===================================================================
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -57,8 +57,10 @@
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
NF_IP_PRI_CONNTRACK = -200,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
NF_IP_PRI_MANGLE = -150,
NF_IP_PRI_NAT_DST = -100,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
NF_IP_PRI_FILTER = 0,
NF_IP_PRI_NAT_SRC = 100,
NF_IP_PRI_SELINUX_LAST = 225,
Index: include/linux/netfilter_ipv6.h
===================================================================
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -62,8 +62,10 @@
NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
NF_IP6_PRI_SELINUX_FIRST = -225,
NF_IP6_PRI_CONNTRACK = -200,
+ NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
NF_IP6_PRI_MANGLE = -150,
NF_IP6_PRI_NAT_DST = -100,
+ NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT = -50,
NF_IP6_PRI_FILTER = 0,
NF_IP6_PRI_NAT_SRC = 100,
NF_IP6_PRI_SELINUX_LAST = 225,
Index: net/bridge/br_netfilter.c
===================================================================
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -65,6 +65,9 @@
#define brnf_filter_pppoe_tagged 1
#endif
+int brnf_deferred_hooks;
+EXPORT_SYMBOL_GPL(brnf_deferred_hooks);
+
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -697,47 +700,110 @@
return NF_STOLEN;
}
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_push(skb, VLAN_HLEN);
+ skb->network_header -= VLAN_HLEN;
+ }
+
+ NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+ return 0;
+}
+
+/* This function sees both locally originated IP packets and forwarded
* IP packets (in both cases the destination device is a bridge
* device). It also sees bridged-and-DNAT'ed packets.
+ * To be able to filter on the physical bridge devices (with the physdev
+ * module), we steal packets destined to a bridge device away from the
+ * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
+ * when we have determined the real output device. This is done in here.
*
* If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
* and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
* will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
* NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
* will be executed.
- */
+ * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * nf_bridge->physindev set. */
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct net_device *realindev;
+ struct net_device *realindev, *realoutdev;
struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
+ int pf;
if (!skb->nf_bridge)
return NF_ACCEPT;
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ pf = PF_INET;
+ else
+ pf = PF_INET6;
+
nf_bridge = skb->nf_bridge;
- if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
- return NF_ACCEPT;
+ nf_bridge->physoutdev = skb->dev;
+ realindev = nf_bridge->physindev;
/* Bridged, take PF_BRIDGE/FORWARD.
* (see big note in front of br_nf_pre_routing_finish) */
- nf_bridge->physoutdev = skb->dev;
- realindev = nf_bridge->physindev;
+ if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
+ if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->mask ^= BRNF_PKT_TYPE;
+ }
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_push(skb, VLAN_HLEN);
+ skb->network_header -= VLAN_HLEN;
+ }
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+ skb->dev, br_forward_finish);
+ goto out;
}
- nf_bridge_push_encap_header(skb);
+ realoutdev = bridge_parent(skb->dev);
+ if (!realoutdev)
+ return NF_DROP;
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ /* iptables should match -o br0.x */
+ if (nf_bridge->netoutdev)
+ realoutdev = nf_bridge->netoutdev;
+#endif
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ skb_pull(skb, VLAN_HLEN);
+ skb->network_header += VLAN_HLEN;
+ }
+ /* IP forwarded traffic has a physindev, locally
+ * generated traffic hasn't. */
+ if (realindev != NULL) {
+ if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
+ struct net_device *parent = bridge_parent(realindev);
+ if (parent)
+ realindev = parent;
+ }
- NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
- br_forward_finish);
+ NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
+ realoutdev, br_nf_local_out_finish,
+ NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+ } else {
+ NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
+ realoutdev, br_nf_local_out_finish,
+ NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+ }
+
+out:
return NF_STOLEN;
}
@@ -841,6 +907,69 @@
return NF_ACCEPT;
}
+/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
+ * and PF_INET(6)/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined nf_bridge->physoutdev. */
+static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sk_buff *skb = *pskb;
+
+ if ((out->hard_start_xmit == br_dev_xmit &&
+ okfn != br_nf_forward_finish &&
+ okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+ VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+#endif
+ ) {
+ struct nf_bridge_info *nf_bridge;
+
+ if (!skb->nf_bridge) {
+#ifdef CONFIG_SYSCTL
+ /* This code is executed while in the IP(v6) stack,
+ the version should be 4 or 6. We can't use
+ skb->protocol because that isn't set on
+ PF_INET(6)/LOCAL_OUT. */
+ struct iphdr *ip = ip_hdr(skb);
+
+ if (ip->version == 4 && !brnf_call_iptables)
+ return NF_ACCEPT;
+ else if (ip->version == 6 && !brnf_call_ip6tables)
+ return NF_ACCEPT;
+ else if (!brnf_deferred_hooks)
+ return NF_ACCEPT;
+#endif
+ if (hook == NF_IP_POST_ROUTING)
+ return NF_ACCEPT;
+ if (!nf_bridge_alloc(skb))
+ return NF_DROP;
+ }
+
+ nf_bridge = skb->nf_bridge;
+
+ /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+ * will need the indev then. For a brouter, the real indev
+ * can be a bridge port, so we make sure br_nf_local_out()
+ * doesn't use the bridge parent of the indev by using
+ * the BRNF_DONT_TAKE_PARENT mask. */
+ if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
+ nf_bridge->mask |= BRNF_DONT_TAKE_PARENT;
+ nf_bridge->physindev = (struct net_device *)in;
+ }
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ /* the iptables outdev is br0.x, not br0 */
+ if (out->priv_flags & IFF_802_1Q_VLAN)
+ nf_bridge->netoutdev = (struct net_device *)out;
+#endif
+ return NF_STOP;
+ }
+
+ return NF_ACCEPT;
+}
+
/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
* PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
@@ -886,6 +1013,36 @@
.pf = PF_INET6,
.hooknum = NF_IP6_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_FORWARD,
+ .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_FORWARD,
+ .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_LOCAL_OUT,
+ .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_FIRST, },
+ { .hook = ip_sabotage_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_IP6_POST_ROUTING,
+ .priority = NF_IP6_PRI_FIRST, },
};
#ifdef CONFIG_SYSCTL
Index: net/netfilter/xt_physdev.c
===================================================================
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -110,16 +110,20 @@
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return false;
- if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+ if (brnf_deferred_hooks == 0 &&
+ info->bitmask & XT_PHYSDEV_OP_OUT &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
(1 << NF_IP_POST_ROUTING))) {
printk(KERN_WARNING "physdev match: using --physdev-out in the "
"OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
- "traffic is not supported anymore.\n");
- if (hook_mask & (1 << NF_IP_LOCAL_OUT))
- return false;
+ "traffic is deprecated and breaks other things, it will "
+ "be removed in January 2007. See Documentation/"
+ "feature-removal-schedule.txt for details. This doesn't "
+ "affect you in case you're using it for purely bridged "
+ "traffic.\n");
+ brnf_deferred_hooks = 1;
}
return true;
}