On 6/15/26 2:36 PM, Ilya Maximets wrote:
> On 6/13/26 1:25 AM, [email protected] wrote:
>> From: Numan Siddique <[email protected]>
>>
>> Implement the controller-side handler for the new OVN action
>> 'icmp4.inner_ip4.src = <value>' introduced in the previous patch.
>>
>> When ovn-controller receives a packet with action opcode
>> ACTION_OPCODE_PUT_ICMP4_INNER_IP4_SRC, it now:
> 
> Not a full review, but a couple comments below.
> 
> Best regards, Ilya Maximets.
> 
>>
>>   - Locates the inner IPv4 header inside the ICMP error payload
>>     (right after the 8-byte outer ICMP header).
>>   - Validates that the ICMP payload is long enough to contain
>>     the inner IPv4 header (respecting its IHL, so IP options
>>     are handled).
>>   - Rewrites the inner IPv4 source address to the value
>>     supplied in the userdata.
>>   - Recomputes the inner IPv4 header checksum.
> 
> Why a full recompute instead of incremental recalculation?  Normally,
> we shouldn't fix incorrect checksums, but the full recalculation will.
> 
> Also, the inner packet likely includes L4 headers as well and the IP
> header change will break the L4 checksum.  Not sure how important that
> is, but should at least be noted in the commit message and in the code
> that changes the data.
> 
>>   - Incrementally updates the outer ICMP checksum to reflect
>>     both the 4-byte inner-src change and the 2-byte inner
>>     IPv4-csum change (the outer ICMP csum covers the entire
>>     ICMP payload, including the inner IP header).
> 
> Do we even need this?  ICMP checksum covers both the inner IP checksum and
> the inner IP source address.  Summing the entire inner header should give
> you zero or a constant value in case the checksum was initially wrong.
> So, changes in the inner IP header should not affect the ICMP checksum, or
> do they?
> 
>>   - Resumes the OpenFlow pipeline with the rewritten packet.
>>
>> This is the building block needed to make ICMP fragmentation-
>> needed errors (RFC 1191 PMTU discovery) work correctly when an
>> OVN logical router uses stateless NAT.  With stateless NAT, the
>> inner payload of an incoming ICMP error still carries the
>> external (post-NAT) IPv4 source, so conntrack in the VM's
>> logical switch zone fails to match the tracked outgoing flow
>> and marks the packet as +inv.  By rewriting the inner source
>> back to the logical IP in the LR pipeline before the LS
>> pipeline runs, conntrack can correlate the error with the
>> original flow and the VM can honor the PMTUD signal.
>>
>> A follow-up patch will add the corresponding ovn-northd logical
>> flow that emits this action for each stateless NAT configured
>> on a logical router.
>>
>> Assisted-by: Claude Opus 4.7, Claude Code
>> Signed-off-by: Numan Siddique <[email protected]>
>> ---
>>  controller/pinctrl.c | 92 ++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 92 insertions(+)
>>
>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>> index bb8d20e7f4..93e8d7417b 100644
>> --- a/controller/pinctrl.c
>> +++ b/controller/pinctrl.c
>> @@ -256,6 +256,10 @@ static void pinctrl_handle_nd_ns(struct rconn *swconn,
>>                                   const struct ofputil_packet_in *pin,
>>                                   struct ofpbuf *userdata,
>>                                   const struct ofpbuf *continuation);
>> +static void pinctrl_handle_put_icmp4_inner_ip4_src(
>> +    struct rconn *swconn, const struct flow *in_flow,
>> +    struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
>> +    struct ofpbuf *userdata, struct ofpbuf *continuation);
>>  static void
>>  pinctrl_handle_event(struct ofpbuf *userdata)
>>      OVS_REQUIRES(pinctrl_mutex);
>> @@ -3928,6 +3932,10 @@ process_packet_in(struct rconn *swconn, const struct 
>> ofp_header *msg)
>>          break;
>>      }
>>  
>> +    case ACTION_OPCODE_PUT_ICMP4_INNER_IP4_SRC:
>> +        pinctrl_handle_put_icmp4_inner_ip4_src(swconn, &headers, &packet, 
>> &pin,
>> +                                               &userdata, &continuation);
>> +        break;
>>      default:
>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>>                       ntohl(ah->opcode));
>> @@ -6654,6 +6662,90 @@ exit:
>>      dp_packet_uninit(pkt_out_ptr);
>>  }
>>  
>> +/* Called with in the pinctrl_handler thread context. */
>> +static void
>> +pinctrl_handle_put_icmp4_inner_ip4_src(struct rconn *swconn,
>> +                                       const struct flow *in_flow,
>> +                                       struct dp_packet *pkt_in,
>> +                                       struct ofputil_packet_in *pin,
>> +                                       struct ofpbuf *userdata,
>> +                                       struct ofpbuf *continuation)
>> +{
>> +    enum ofp_version version = rconn_get_version(swconn);
>> +    enum ofputil_protocol proto = 
>> ofputil_protocol_from_ofp_version(version);
>> +    struct dp_packet *pkt_out = NULL;
>> +
>> +    /* This action only works for ICMPv4 packets. */
>> +    if (!is_icmpv4(in_flow, NULL)) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +        VLOG_WARN_RL(&rl,
>> +                     "put_icmp4_inner_ip4_src action on non-ICMPv4 packet");
>> +        goto exit;
>> +    }
>> +
>> +    ovs_be32 *ip4_src = ofpbuf_try_pull(userdata, sizeof *ip4_src);
>> +    if (!ip4_src) {
>> +        goto exit;
>> +    }
>> +
>> +    pkt_out = dp_packet_clone(pkt_in);
>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> 
> Clone copies all of these.
> 
>> +
>> +    struct icmp_header *ih = dp_packet_l4(pkt_out);
>> +
>> +    /* The inner IPv4 header lives in the ICMP payload, right after the
>> +     * 8-byte ICMP header. */
>> +    struct ip_header *inner_ip = ALIGNED_CAST(struct ip_header *,
>> +                                              (char *) ih + sizeof *ih);
> 
> AFAIR, shifting a pointer outside of the memory block triggers an undefined
> behavior.  I think, all the checks below need to be re-structured in a way
> that we don't have such pointers.
> 
> E.g. compare tail - known-good-pointer > struct size, instead of
>              known-good-pointer + struct size > tail.
> 
> In the current code in this patch the inner_ip is not even a known good 
> pointer.
> 

May also want to check that it's not a later fragment, just in case.

>> +    const char *pkt_end = dp_packet_tail(pkt_out);
>> +    if ((const char *)(inner_ip + 1) > pkt_end) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +        VLOG_WARN_RL(&rl, "put_icmp4_inner_ip4_src: ICMP payload too short "
>> +                     "to contain an inner IPv4 header");
>> +        goto exit;
>> +    }
>> +
>> +    size_t inner_ihl_bytes = IP_IHL(inner_ip->ip_ihl_ver) * 4;
>> +    if (inner_ihl_bytes < sizeof *inner_ip ||
>> +        (const char *) inner_ip + inner_ihl_bytes > pkt_end) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +        VLOG_WARN_RL(&rl, "put_icmp4_inner_ip4_src: inner IPv4 header "
>> +                     "length (%"PRIuSIZE") invalid", inner_ihl_bytes);
>> +        goto exit;
>> +    }
>> +
>> +    /* Save old values so we can incrementally update the outer ICMP
>> +     * checksum, which covers the entire ICMP payload (including the inner
>> +     * IP header). */
>> +    ovs_be32 old_inner_ip_src = get_16aligned_be32(&inner_ip->ip_src);
>> +    ovs_be16 old_inner_ip_csum = inner_ip->ip_csum;
>> +
>> +    /* Rewrite the inner source and recompute the inner IP checksum
>> +     * over its full IHL (including any options). */
>> +    put_16aligned_be32(&inner_ip->ip_src, *ip4_src);
>> +    inner_ip->ip_csum = 0;
>> +    inner_ip->ip_csum = csum(inner_ip, inner_ihl_bytes);
>> +
>> +    /* Fold both deltas (inner src, inner csum) into the outer ICMP csum. */
>> +    ih->icmp_csum = recalc_csum32(ih->icmp_csum, old_inner_ip_src, 
>> *ip4_src);
>> +    ih->icmp_csum = recalc_csum16(ih->icmp_csum, old_inner_ip_csum,
>> +                                  inner_ip->ip_csum);
>> +
>> +    /* The outer IPv4 header is unchanged, so its checksum stays valid. */
>> +
>> +    pin->packet = dp_packet_data(pkt_out);
>> +    pin->packet_len = dp_packet_size(pkt_out);
>> +
>> +exit:
>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
>> +    if (pkt_out) {
>> +        dp_packet_delete(pkt_out);
>> +    }
>> +}
>> +
>>  static void
>>  wait_controller_event(void)
>>  {
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to