From: Numan Siddique <[email protected]>

Presently, if the inport or outport is a peer port (of router port),
then we skip sending the packet to conntrack by not setting the
reg0[0]/reg0[1] bits.  But the packet still goes through the
stages - ls_in_acl_hint, ls_in_acl and ls_in_stateful in the ingress
pipeline and similarly ls_out_acl_hint, ls_out_acl and ls_out_stateful
in the egress pipeline of the logical switch.

In these mentioned stages, the logical flows match on the ct states
(ct.new, ct.est etc) and this can be problematic if the inport or
outport is peer port.  It is more problematic if the packet was
sent to conntrack and committed in the ingress pipeline.  When
that packet enters the egress pipeline with outport set to the peer
port,  we skip sending the packet to conntrack (which is expected)
but ct state values carry over from the ingress state.  If the ct.new
flag was set in the ingress pipeline, then the below flows are hit and
the packet gets committed in the conntrack zone of the inport logical port.

table=4 (ls_out_acl         ), priority=1    ,
         match=(ip && (!ct.est || (ct.est && ct_label.blocked == 1))),
         action=(reg0[1] = 1; next;)
table=7 (ls_out_stateful    ), priority=100  ,
         match=(reg0[1] == 1 && reg0[13] == 0),
         action=(ct_commit { ct_label.blocked = 0; }; next;)

With this extra commit to the same conntrack zone, sometimes the packet gets
dropped or doesn't reach the destination.  It is not very clear how
the packet gets misplaced, but avoiding this resolves the issue.
And OVN ideally should not do this extra commit.

To address this issue, this patch adds the logical flows to skip all
the conntrack related stages if the inport or outport is peer logical
port.

table=5 (ls_in_pre_acl      ), priority=110  ,
         match=(ip && inport == "sw0-lr0"),
         action=(next(pipeline=ingress,table=18);)

table=0 (ls_out_pre_lb      ), priority=110  ,
         match=(ip && outport == "sw0-lr0"),
         action=(next(pipeline=egress,table=8);)

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2060688
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2062431
Reported-by: Michael Washer <[email protected]>
Signed-off-by: Numan Siddique <[email protected]>
---
 northd/northd.c         | 27 +++++++++++++++++----------
 northd/ovn-northd.8.xml |  6 ++++--
 tests/ovn-northd.at     | 26 ++++++++++++++++++++++----
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index b264fb850..15e8b147b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -5661,20 +5661,27 @@ skip_port_from_conntrack(struct ovn_datapath *od, 
struct ovn_port *op,
      * know about the connection, as the icmp request went through the logical
      * router on hostA, not hostB. This would only work with distributed
      * conntrack state across all chassis. */
-    struct ds match_in = DS_EMPTY_INITIALIZER;
-    struct ds match_out = DS_EMPTY_INITIALIZER;
+    char *ingress_action =
+        xasprintf("next(pipeline=ingress,table=%d);",
+                  ovn_stage_get_table(S_SWITCH_IN_ARP_ND_RSP));
+    char *egress_action =
+        xasprintf("next(pipeline=egress,table=%d);",
+                  ovn_stage_get_table(S_SWITCH_OUT_PORT_SEC_IP));
+
+    char *ingress_match = xasprintf("ip && inport == %s", op->json_key);
+    char *egress_match = xasprintf("ip && outport == %s", op->json_key);
 
-    ds_put_format(&match_in, "ip && inport == %s", op->json_key);
-    ds_put_format(&match_out, "ip && outport == %s", op->json_key);
     ovn_lflow_add_with_lport_and_hint(lflows, od, in_stage, priority,
-                                      ds_cstr(&match_in), "next;", op->key,
-                                      &op->nbsp->header_);
+                                      ingress_match, ingress_action,
+                                      op->key, &op->nbsp->header_);
     ovn_lflow_add_with_lport_and_hint(lflows, od, out_stage, priority,
-                                      ds_cstr(&match_out), "next;", op->key,
-                                      &op->nbsp->header_);
+                                      egress_match, egress_action,
+                                      op->key, &op->nbsp->header_);
 
-    ds_destroy(&match_in);
-    ds_destroy(&match_out);
+    free(ingress_action);
+    free(egress_action);
+    free(ingress_match);
+    free(egress_match);
 }
 
 static void
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 4784bff04..217f10ead 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -505,7 +505,8 @@
       <code>Pre-stateful</code> to send IP packets to the connection tracker
       before eventually advancing to ingress table <code>ACLs</code>. If
       special ports such as route ports or localnet ports can't use ct(), a
-      priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6
+      priority-110 flow is added which skips all the stateful stages and
+      advances to the next stage after the stateful.  Multicast, IPv6
       Neighbor Discovery and MLD traffic also skips stateful ACLs. For
       "allow-stateless" ACLs, a flow is added to bypass setting the hint for
       connection tracker processing.
@@ -589,7 +590,8 @@
     <p>
       This table also has a priority-110 flow with the match
       <code>inport == <var>I</var></code> for all logical switch
-      datapaths to move traffic to the next table. Where <var>I</var>
+      datapaths which skips all the stateful stages and advances
+      to the next stage after the stateful. Where <var>I</var>
       is the peer of a logical router port. This flow is added to
       skip the connection tracking of packets which enter from
       logical router datapath to logical switch datapath.
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 60d91a771..f228e07cb 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -3934,7 +3934,7 @@ check_stateful_flows() {
   table=6 (ls_in_pre_lb       ), priority=100  , match=(ip), action=(reg0[[2]] 
= 1; next;)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(eth.dst == 
$svc_monitor_mac), action=(next;)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(eth.mcast), 
action=(next;)
-  table=6 (ls_in_pre_lb       ), priority=110  , match=(ip && inport == 
"sw0-lr0"), action=(next;)
+  table=6 (ls_in_pre_lb       ), priority=110  , match=(ip && inport == 
"sw0-lr0"), action=(next(pipeline=ingress,table=18);)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2), action=(next;)
 ])
 
@@ -3967,7 +3967,7 @@ check_stateful_flows() {
   table=0 (ls_out_pre_lb      ), priority=100  , match=(ip), action=(reg0[[2]] 
= 1; next;)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast), 
action=(next;)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.src == 
$svc_monitor_mac), action=(next;)
-  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport == 
"sw0-lr0"), action=(next;)
+  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport == 
"sw0-lr0"), action=(next(pipeline=egress,table=8);)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2), action=(next;)
 ])
 
@@ -4006,10 +4006,19 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort], [0], 
[dnl
   table=6 (ls_in_pre_lb       ), priority=0    , match=(1), action=(next;)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(eth.dst == 
$svc_monitor_mac), action=(next;)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(eth.mcast), 
action=(next;)
-  table=6 (ls_in_pre_lb       ), priority=110  , match=(ip && inport == 
"sw0-lr0"), action=(next;)
+  table=6 (ls_in_pre_lb       ), priority=110  , match=(ip && inport == 
"sw0-lr0"), action=(next(pipeline=ingress,table=18);)
   table=6 (ls_in_pre_lb       ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2), action=(next;)
 ])
 
+AT_CHECK([grep "ls_in_pre_acl" sw0flows | sed 's/table=./table=?/' | sort], 
[0], [dnl
+  table=? (ls_in_pre_acl      ), priority=0    , match=(1), action=(next;)
+  table=? (ls_in_pre_acl      ), priority=100  , match=(ip), action=(reg0[[0]] 
= 1; next;)
+  table=? (ls_in_pre_acl      ), priority=110  , match=(eth.dst == 
$svc_monitor_mac), action=(next;)
+  table=? (ls_in_pre_acl      ), priority=110  , match=(eth.mcast), 
action=(next;)
+  table=? (ls_in_pre_acl      ), priority=110  , match=(ip && inport == 
"sw0-lr0"), action=(next(pipeline=ingress,table=18);)
+  table=? (ls_in_pre_acl      ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2 || (udp && udp.src == 546 && udp.dst == 547)), action=(next;)
+])
+
 AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort], [0], [dnl
   table=7 (ls_in_pre_stateful ), priority=0    , match=(1), action=(next;)
   table=7 (ls_in_pre_stateful ), priority=100  , match=(reg0[[0]] == 1), 
action=(ct_next;)
@@ -4036,10 +4045,19 @@ AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], 
[dnl
   table=0 (ls_out_pre_lb      ), priority=0    , match=(1), action=(next;)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast), 
action=(next;)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.src == 
$svc_monitor_mac), action=(next;)
-  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport == 
"sw0-lr0"), action=(next;)
+  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport == 
"sw0-lr0"), action=(next(pipeline=egress,table=8);)
   table=0 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2), action=(next;)
 ])
 
+AT_CHECK([grep "ls_out_pre_acl" sw0flows |  sed 's/table=./table=?/' | sort], 
[0], [dnl
+  table=? (ls_out_pre_acl     ), priority=0    , match=(1), action=(next;)
+  table=? (ls_out_pre_acl     ), priority=100  , match=(ip), action=(reg0[[0]] 
= 1; next;)
+  table=? (ls_out_pre_acl     ), priority=110  , match=(eth.mcast), 
action=(next;)
+  table=? (ls_out_pre_acl     ), priority=110  , match=(eth.src == 
$svc_monitor_mac), action=(next;)
+  table=? (ls_out_pre_acl     ), priority=110  , match=(ip && outport == 
"sw0-lr0"), action=(next(pipeline=egress,table=8);)
+  table=? (ls_out_pre_acl     ), priority=110  , match=(nd || nd_rs || nd_ra 
|| mldv1 || mldv2 || (udp && udp.src == 546 && udp.dst == 547)), action=(next;)
+])
+
 AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl
   table=2 (ls_out_pre_stateful), priority=0    , match=(1), action=(next;)
   table=2 (ls_out_pre_stateful), priority=100  , match=(reg0[[0]] == 1), 
action=(ct_next;)
-- 
2.34.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to