> From: Numan Siddique <[email protected]>
> 
> OVN actions today can rewrite fields of the outermost IPv4 header
> (e.g. 'ip4.dst = ...'), but they don't reach into the payload of an
> ICMP error.  An ICMPv4 error carries an embedded copy of the IP+L4
> header that triggered it (RFC 792 §3.2.2, RFC 1812 §4.3.2.3 expands
> this to up to 576 bytes), and that inner header is what NAT-aware
> receivers use to correlate the error with the original flow.
> 
> For OVN logical routers that use stateless NAT, this gap is
> user-visible: when an inbound ICMPv4 'Fragmentation Needed' error
> (RFC 1191 PMTUD) arrives, the LR pipeline rewrites only the outer
> ip4.dst back to the logical IP.  The inner header still carries the
> external (post-NAT) source, so conntrack in the downstream logical
> switch zone fails to match the tracked outgoing flow, the LS ACL
> stage drops the packet as ct.inv, and the VM's kernel never learns
> to lower its PMTU.
> 
> Introduce a new OVN-managed field 'icmp4.inner_ip4.src' that can be
> assigned from an action, and which - at runtime - rewrites the
> source IPv4 address inside an ICMPv4 error's embedded
> original-packet header (un-NATing the post-NAT external address back
> to the logical IP).
> 
> Unlike the existing 'icmp4.frag_mtu' / 'icmp6.frag_mtu' fields, which
> are encoded as ovn-field notes (the value sits in packet metadata and
> is consumed at egress time without modifying packet bytes),
> 'icmp4.inner_ip4.src' has to mutate bytes inside the ICMP payload and
> recompute checksums.  It is therefore encoded as a controller action
> carrying a new opcode, ACTION_OPCODE_PUT_ICMP4_INNER_IP4_SRC: the
> packet is sent to ovn-controller, which performs the inner rewrite
> and resumes the OpenFlow pipeline with the modified packet.
> 
> To support both encoding styles, struct ovn_field gains a new boolean
> 'is_note'.  The existing frag_mtu fields set is_note=true and keep
> their previous code path; the new field sets is_note=false and routes
> through encode_start_controller_op() / encode_finish_controller_op().
> 
> Assisted-by: Claude Opus 4.7, Claude Code
> Signed-off-by: Numan Siddique <[email protected]>

Acked-by: Lorenzo Bianconi <[email protected]>

> ---
>  include/ovn/actions.h        |  8 +++++++-
>  include/ovn/logical-fields.h |  9 +++++++++
>  lib/actions.c                | 38 +++++++++++++++++++++++++++++++++---
>  lib/logical-fields.c         | 11 ++++++++++-
>  ovn-sb.xml                   | 27 ++++++++++++++++++++++++-
>  tests/ovn.at                 |  3 +++
>  utilities/ovn-trace.c        |  6 ++++++
>  7 files changed, 96 insertions(+), 6 deletions(-)
> 
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 2ca8dac8f7..712b723cdd 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -829,7 +829,13 @@ OVNACTS
>       *   - The 32-bit DHCP relay IP.
>       *   - The 32-bit DHCP server IP.
>       */                                                                      
>  \
> -    ACTION_OPCODE(DHCP_RELAY_RESP_CHK)
> +    ACTION_OPCODE(DHCP_RELAY_RESP_CHK)                                       
>  \
> +                                                                             
>  \
> +    /* icmp4.inner_ip4.src = <value>
> +     * Arguments follow the action_header, in this format:
> +     *   - The 32-bit IPv4 address.
> +     */                                                                      
>  \
> +    ACTION_OPCODE(PUT_ICMP4_INNER_IP4_SRC)
>  
>  
>  enum action_opcode {
> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> index 3c0fb22e76..1f8fe0a9f9 100644
> --- a/include/ovn/logical-fields.h
> +++ b/include/ovn/logical-fields.h
> @@ -256,6 +256,13 @@ enum ovn_field_id {
>       */
>      OVN_ICMP6_FRAG_MTU,
>  
> +    /*
> +     * Name: "icmp4.inner_ip4.src" -
> +     * Type: be32
> +     * Description: Sets the inner payload IPv4 source address if present.
> +     */
> +    OVN_ICMP4_INNER_IP4_SRC,
> +
>      OVN_FIELD_N_IDS
>  };
>  
> @@ -264,6 +271,8 @@ struct ovn_field {
>      const char *name;
>      unsigned int n_bytes;       /* Width of the field in bytes. */
>      unsigned int n_bits;        /* Number of significant bits in field. */
> +    bool is_note; /* If true, the field is encoded as 'note' action, else as
> +                   * controller action. */
>  };
>  
>  static inline const struct ovn_field *
> diff --git a/lib/actions.c b/lib/actions.c
> index 3fbaed7af6..be9947003b 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -2043,6 +2043,7 @@ is_paused_nested_action(enum action_opcode opcode)
>      case ACTION_OPCODE_SPLIT_BUF_ACTION:
>      case ACTION_OPCODE_DHCP_RELAY_REQ_CHK:
>      case ACTION_OPCODE_DHCP_RELAY_RESP_CHK:
> +    case ACTION_OPCODE_PUT_ICMP4_INNER_IP4_SRC:
>      default:
>          return false;
>      }
> @@ -4017,6 +4018,10 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , 
> struct ds *s)
>          ds_put_format(s, "%s = %u;", f->name,
>                        ntohs(load->imm.value.be16_int));
>          break;
> +    case OVN_ICMP4_INNER_IP4_SRC:
> +        ds_put_format(s, "%s = "IP_FMT";", f->name,
> +                      IP_ARGS(load->imm.value.be32_int));
> +        break;
>  
>      case OVN_FIELD_N_IDS:
>      default:
> @@ -4026,11 +4031,30 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , 
> struct ds *s)
>  
>  static void
>  encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> -                     const struct ovnact_encode_params *ep OVS_UNUSED,
> +                     const struct ovnact_encode_params *ep,
>                       struct ofpbuf *ofpacts)
>  {
>      const struct ovn_field *f = ovn_field_from_name(load->dst.symbol->name);
> -    size_t offset = encode_start_ovn_field_note(f->id, ofpacts);
> +    size_t offset = 0;
> +    if (f->is_note) {
> +        offset = encode_start_ovn_field_note(f->id, ofpacts);
> +    } else {
> +        enum action_opcode opcode;
> +        switch (f->id) {
> +        case OVN_ICMP4_INNER_IP4_SRC:
> +            opcode = ACTION_OPCODE_PUT_ICMP4_INNER_IP4_SRC;
> +            break;
> +
> +        case OVN_ICMP4_FRAG_MTU:
> +        case OVN_ICMP6_FRAG_MTU:
> +        case OVN_FIELD_N_IDS:
> +        default:
> +            OVS_NOT_REACHED();
> +        }
> +
> +        offset = encode_start_controller_op(opcode, true, ep->ctrl_meter_id,
> +                                            ofpacts);
> +    }
>  
>      switch (f->id) {
>      case OVN_ICMP4_FRAG_MTU: {
> @@ -4041,12 +4065,20 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>          ofpbuf_put(ofpacts, &load->imm.value.be32_int, sizeof(ovs_be32));
>          break;
>      }
> +    case OVN_ICMP4_INNER_IP4_SRC: {
> +        ofpbuf_put(ofpacts, &load->imm.value.be32_int, sizeof(ovs_be32));
> +        break;
> +    }
>      case OVN_FIELD_N_IDS:
>      default:
>          OVS_NOT_REACHED();
>      }
>  
> -    encode_finish_ovn_field_note(offset, ofpacts);
> +    if (f->is_note) {
> +        encode_finish_ovn_field_note(offset, ofpacts);
> +    } else {
> +        encode_finish_controller_op(offset, ofpacts);
> +    }
>  }
>  
>  static void
> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> index 807bb4db48..35d9a2e229 100644
> --- a/lib/logical-fields.c
> +++ b/lib/logical-fields.c
> @@ -30,11 +30,18 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
>          OVN_ICMP4_FRAG_MTU,
>          "icmp4.frag_mtu",
>          2, 16,
> +        true,
>      }, {
>          OVN_ICMP6_FRAG_MTU,
>          "icmp6.frag_mtu",
>          4, 32,
> -    },
> +        true,
> +    }, {
> +        OVN_ICMP4_INNER_IP4_SRC,
> +        "icmp4.inner_ip4.src",
> +        4, 32,
> +        false,
> +    }
>  };
>  
>  static struct shash ovnfield_by_name;
> @@ -411,6 +418,8 @@ ovn_init_symtab(struct shash *symtab)
>  
>      expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
>      expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
> +    expr_symtab_add_ovn_field(symtab, "icmp4.inner_ip4.src",
> +                              OVN_ICMP4_INNER_IP4_SRC);
>  }
>  
>  const char *
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index e45b63d73f..470ca00f53 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -1306,9 +1306,34 @@
>                  defined in the RFC 1191 with the value specified in
>                  <var>constant</var>.
>                </p>
> +            </li>
> +
> +            <li>
> +              <code>icmp4.inner_ip4.src</code>
> +              <p>
> +                This field rewrites the source IPv4 address carried in the
> +                inner (original-packet) IPv4 header that is embedded in the
> +                payload of an ICMPv4 error message, setting it to
> +                <var>constant</var>.  The inner IPv4 header checksum and the
> +                outer ICMP checksum are recomputed accordingly.  The action
> +                has no effect on packets that are not ICMPv4 errors.
> +              </p>
> +
> +              <p>
> +                It can be used by stateless NAT to fix up inbound ICMPv4
> +                errors, for example <code>Fragmentation Needed</code> 
> messages
> +                used for Path MTU discovery (RFC 1191).  A stateless
> +                <code>dnat_and_snat</code> rule rewrites only the outer
> +                destination of such an error, while the embedded inner header
> +                still carries the external (post-NAT) IP as its source.  As a
> +                result conntrack in the downstream logical switch cannot
> +                correlate the error with the tracked flow.  Un-NATing the 
> inner
> +                source back to the logical IP lets conntrack relate the error
> +                and lets the originating workload honor the PMTU signal.
> +              </p>
>  
>                <p>
> -                Eg. icmp4.frag_mtu = 1500;
> +                Eg. <code>icmp4.inner_ip4.src = 10.0.0.10;</code>
>                </p>
>              </li>
>            </ul>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 1995a989d3..59b41bde82 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -1849,6 +1849,9 @@ icmp4_error { eth.dst = ff:ff:ff:ff:ff:ff; 
> icmp4.frag_mtu = 1500; output; }; out
>  icmp4.frag_mtu = 1500;
>      encodes as note:6f.76.6e.00.00.00.00.00.05.dc
>  
> +icmp4.inner_ip4.src = 10.0.0.40;
> +    encodes as controller(userdata=00.00.00.1e.00.00.00.00.0a.00.00.28,pause)
> +
>  # icmp6
>  icmp6 { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output;
>      encodes as 
> controller(userdata=00.00.00.0a.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.OFTABLE_SAVE_INPORT_HEX.00.00.00),resubmit(,OFTABLE_SAVE_INPORT)
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index c09a9041f5..263157dc3d 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2763,6 +2763,12 @@ execute_ovnfield_load(const struct ovnact_load *load,
>                               ntohs(load->imm.value.be16_int));
>          break;
>      }
> +    case OVN_ICMP4_INNER_IP4_SRC: {
> +        ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
> +                             "icmp4.inner_ip4.src = "IP_FMT,
> +                             IP_ARGS(load->imm.value.be32_int));
> +        break;
> +    }
>      case OVN_FIELD_N_IDS:
>      default:
>          OVS_NOT_REACHED();
> -- 
> 2.54.0
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to