> From: Scott <[email protected]>
> 
> GCC has a bug where it incorrectly elides struct initialization in
> inline functions when strict aliasing is enabled (-O2/-O3/-Os), causing
> reads from uninitialized memory. This affects both designated
> initializers and manual field assignment.

Does this still happen after you replaced the RTE_PTR_ADD() with native pointer 
arithmetic in the checksum function?
In other words: Is this workaround still necessary?

This is a showstopper:
If the workaround is necessary, applications with similar use cases also need 
to apply the workaround.
If we cannot somehow enforce that, the series is likely to break some 
applications, which is unacceptable.

> 
> Add RTE_FORCE_INIT_BARRIER macro that uses an asm volatile memory
> barrier
> to prevent the compiler from incorrectly optimizing away struct
> initialization. Apply the workaround to pseudo-header checksum
> functions
> in rte_ip4.h, rte_ip6.h, hinic driver, and mlx5 driver.
> 
> Signed-off-by: Scott Mitchell <[email protected]>
> ---
>  drivers/net/hinic/hinic_pmd_tx.c |  2 ++
>  drivers/net/mlx5/mlx5_flow_dv.c  |  2 ++
>  lib/eal/include/rte_common.h     | 14 ++++++++++++++
>  lib/net/rte_ip4.h                |  1 +
>  lib/net/rte_ip6.h                |  1 +
>  5 files changed, 20 insertions(+)
> 
> diff --git a/drivers/net/hinic/hinic_pmd_tx.c
> b/drivers/net/hinic/hinic_pmd_tx.c
> index 22fb0bffaf..570715531d 100644
> --- a/drivers/net/hinic/hinic_pmd_tx.c
> +++ b/drivers/net/hinic/hinic_pmd_tx.c
> @@ -725,6 +725,7 @@ hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, uint64_t ol_flags)
>               rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
>                                rte_ipv4_hdr_len(ipv4_hdr));
>       }
> +     RTE_FORCE_INIT_BARRIER(psd_hdr);
>       return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
>  }
> 
> @@ -743,6 +744,7 @@ hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>       else
>               psd_hdr.len = ipv6_hdr->payload_len;
> 
> +     RTE_FORCE_INIT_BARRIER(psd_hdr);
>       sum = __rte_raw_cksum(&ipv6_hdr->src_addr,
>               sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
> 0);
>       sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c
> index 47f6d28410..1eeeb6747f 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -4445,6 +4445,8 @@ __flow_encap_decap_resource_register(struct
> rte_eth_dev *dev,
>                       .reserve = 0,
>               }
>       };
> +     RTE_FORCE_INIT_BARRIER(encap_decap_key);
> +
>       struct mlx5_flow_cb_ctx ctx = {
>               .error = error,
>               .data = resource,
> diff --git a/lib/eal/include/rte_common.h
> b/lib/eal/include/rte_common.h
> index 00d428e295..d58e856d96 100644
> --- a/lib/eal/include/rte_common.h
> +++ b/lib/eal/include/rte_common.h
> @@ -555,6 +555,20 @@ static void
> __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
>  #define __rte_no_ubsan_alignment
>  #endif
> 
> +/**
> + * Force struct initialization to prevent GCC optimization bug.
> + * GCC has a bug where it incorrectly elides struct initialization in
> + * inline functions when strict aliasing is enabled, causing reads
> from
> + * uninitialized memory. This memory barrier prevents the
> misoptimization.
> + */
> +#ifdef RTE_CC_GCC
> +#define RTE_FORCE_INIT_BARRIER(var) do {     \
> +     asm volatile("" : "+m" (var));          \
> +} while (0)
> +#else
> +#define RTE_FORCE_INIT_BARRIER(var)
> +#endif
> +
>  /*********** Macros for pointer arithmetic ********/
> 
>  /**
> diff --git a/lib/net/rte_ip4.h b/lib/net/rte_ip4.h
> index 822a660cfb..a0839c584e 100644
> --- a/lib/net/rte_ip4.h
> +++ b/lib/net/rte_ip4.h
> @@ -238,6 +238,7 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, uint64_t ol_flags)
>               psd_hdr.len = rte_cpu_to_be_16((uint16_t)(l3_len -
>                       rte_ipv4_hdr_len(ipv4_hdr)));
>       }
> +     RTE_FORCE_INIT_BARRIER(psd_hdr);
>       return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
>  }
> 
> diff --git a/lib/net/rte_ip6.h b/lib/net/rte_ip6.h
> index d1abf1f5d5..902f100a44 100644
> --- a/lib/net/rte_ip6.h
> +++ b/lib/net/rte_ip6.h
> @@ -572,6 +572,7 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>       else
>               psd_hdr.len = ipv6_hdr->payload_len;
> 
> +     RTE_FORCE_INIT_BARRIER(psd_hdr);
>       sum = __rte_raw_cksum(&ipv6_hdr->src_addr,
>               sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
>               0);
> --
> 2.39.5 (Apple Git-154)

Reply via email to