This is an AI-generated review of your patch. The human sending this email has considered the AI review valid, or at least plausible.
For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html --- net: mana: Force full-page RX buffers for 4K page size on specific systems. This commit adds processor-specific optimization for ARM64 systems with 4K pages where page_pool fragments cause throughput regression. It detects affected processors via SMBIOS and forces one RX buffer per page. > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c > b/drivers/net/ethernet/microsoft/mana/gdma_main.c > index aef8612b73cb..05fecc00a90c 100644 > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c [ ... ] > @@ -1959,6 +1960,128 @@ static bool mana_is_pf(unsigned short dev_id) > return dev_id == MANA_PF_DEVICE_ID; > } > > +/* > + * Table for Processor Version strings found from SMBIOS Type 4 information, > + * for processors that needs to force single RX buffer per page quirk for > + * meeting line rate performance with ARM64 + 4K pages. > + * Note: These strings are exactly matched with version fetched from SMBIOS. > + */ > +static const char * const mana_single_rxbuf_per_page_quirk_tbl[] = { > + "Cobalt 200", > +}; > + > +/* On some systems with 4K PAGE_SIZE, page_pool RX fragments can > + * trigger a throughput regression. Hence identify those processors > + * from the extracted SMBIOS table and apply the quirk to forces one > + * RX buffer per page to avoid the fragment allocation/refcounting > + * overhead in the RX refill path for those processors only. > + */ > +static bool mana_needs_single_rxbuf_per_page(struct gdma_context *gc) > +{ > + int i = 0; > + const char *ver = gc->processor_version; > + > + if (!ver) > + return false; > + > + if (PAGE_SIZE != SZ_4K) > + return false; > + > + while (i < ARRAY_SIZE(mana_single_rxbuf_per_page_quirk_tbl)) { > + if (!strcmp(ver, mana_single_rxbuf_per_page_quirk_tbl[i])) > + return true; > + i++; > + } > + > + return false; > +} > + > +static void mana_get_proc_ver_strno(const struct dmi_header *hdr, void *data) > +{ > + struct gdma_context *gc = data; > + const u8 *d = (const u8 *)hdr; > + > + /* We are only looking for Type 4: Processor Information */ > + if (hdr->type != SMBIOS_TYPE_4_PROCESSOR_INFO) > + return; > + > + /* Ensure the record is long enough to contain the Processor Version > + * field > + */ > + if (hdr->length <= SMBIOS_TYPE4_PROC_VERSION_OFFSET) > + return; > + > + /* The 'Processor Version' string is located at index pointed by > + * SMBIOS_TYPE4_PROC_VERSION_OFFSET. Make a copy of the index. > + * There could be multiple Type 4 tables so read and store the > + * processor version index found the first time. > + */ > + if (gc->proc_ver_strno) > + return; > + > + gc->proc_ver_strno = d[SMBIOS_TYPE4_PROC_VERSION_OFFSET]; > +} > + > +static const char *mana_dmi_string_nosave(const struct dmi_header *hdr, u8 s) > +{ > + const char *bp = (const char *)hdr + hdr->length; > + > + if (!s) > + return NULL; > + > + /* String numbers start at 1 */ > + while (--s > 0 && *bp) > + bp += strlen(bp) + 1; > + > + if (!*bp) > + return NULL; > + > + return bp; > +} > + > +static void mana_fetch_proc_ver_string(const struct dmi_header *hdr, > + void *data) > +{ > + struct gdma_context *gc = data; > + const char *ver; > + > + /* We are only looking for Type 4: Processor Information */ > + if (hdr->type != SMBIOS_TYPE_4_PROCESSOR_INFO) > + return; > + > + /* Extract proc version found the first time only */ > + if (!gc->proc_ver_strno || gc->processor_version) > + return; > + > + ver = mana_dmi_string_nosave(hdr, gc->proc_ver_strno); > + if (ver) > + gc->processor_version = kstrdup(ver, GFP_KERNEL); > +} > + > +/* Check and initialize all processor optimizations/quirks here */ > +static bool mana_init_processor_optimization(struct gdma_context *gc) > +{ > + bool opt_initialized = false; > + > + gc->proc_ver_strno = 0; > + gc->processor_version = NULL; > + > + dmi_walk(mana_get_proc_ver_strno, gc); > + if (!gc->proc_ver_strno) > + return false; > + > + dmi_walk(mana_fetch_proc_ver_string, gc); > + if (!gc->processor_version) > + return false; > + > + if (mana_needs_single_rxbuf_per_page(gc)) { > + gc->force_full_page_rx_buffer = true; > + opt_initialized = true; > + } > + > + return opt_initialized; > +} > + > static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id > *ent) > { > struct gdma_context *gc; [ ... ] > @@ -2013,6 +2136,11 @@ static int mana_gd_probe(struct pci_dev *pdev, const > struct pci_device_id *ent) > gc->mana_pci_debugfs = > debugfs_create_dir(pci_slot_name(pdev->slot), > mana_debugfs_root); > > + if (mana_init_processor_optimization(gc)) > + dev_info(&pdev->dev, > + "Processor specific optimization initialized on: %s\n", > + gc->processor_version); > + > err = mana_gd_setup(pdev); > if (err) > goto unmap_bar; [ ... ] > @@ -2055,6 +2183,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const > struct pci_device_id *ent) > pci_iounmap(pdev, bar0_va); > free_gc: > pci_set_drvdata(pdev, NULL); > + kfree(gc->processor_version); > + gc->processor_version = NULL; > vfree(gc); > release_region: > pci_release_regions(pdev); [ ... ] > @@ -2110,6 +2240,9 @@ static void mana_gd_remove(struct pci_dev *pdev) > > pci_iounmap(pdev, gc->bar0_va); > > + kfree(gc->processor_version); > + gc->processor_version = NULL; > + > vfree(gc); > > pci_release_regions(pdev); [ ... ] > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c > b/drivers/net/ethernet/microsoft/mana/mana_en.c > index ea71de39f996..f17cc2cc7dd9 100644 > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c [ ... ] > @@ -744,6 +744,26 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, > dma_addr_t *da) > return va; > } > > +static bool > +mana_use_single_rxbuf_per_page(struct mana_port_context *apc, u32 mtu) > +{ > + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; > + > + /* On some systems with 4K PAGE_SIZE, page_pool RX fragments can > + * trigger a throughput regression. Hence forces one RX buffer per page > + * to avoid the fragment allocation/refcounting overhead in the RX > + * refill path for those processors only. > + */ > + if (gc->force_full_page_rx_buffer) > + return true; > + > + /* For xdp and jumbo frames make sure only one packet fits per page. */ > + if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) > + return true; > + > + return false; > +} > + > /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ > static void mana_get_rxbuf_cfg(struct mana_port_context *apc, > int mtu, u32 *datasize, u32 *alloc_size, [ ... ] > @@ -754,8 +774,7 @@ static void mana_get_rxbuf_cfg(struct mana_port_context > *apc, > /* Calculate datasize first (consistent across all cases) */ > *datasize = mtu + ETH_HLEN; > > - /* For xdp and jumbo frames make sure only one packet fits per page */ > - if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { > + if (mana_use_single_rxbuf_per_page(apc, mtu)) { > if (mana_xdp_get(apc)) { > *headroom = XDP_PACKET_HEADROOM; > *alloc_size = PAGE_SIZE; [ ... ] > diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h > index ec17004b10c0..be56b347f3f6 100644 > --- a/include/net/mana/gdma.h > +++ b/include/net/mana/gdma.h [ ... ] > @@ -9,6 +9,12 @@ > > #include "shm_channel.h" > > +/* SMBIOS Type 4: Processor Information table */ > +#define SMBIOS_TYPE_4_PROCESSOR_INFO 4 > + > +/* Byte offset containing the Processor Version string number.*/ > +#define SMBIOS_TYPE4_PROC_VERSION_OFFSET 0x10 > + > #define GDMA_STATUS_MORE_ENTRIES 0x00000105 > #define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff > [ ... ] > @@ -444,6 +450,9 @@ struct gdma_context { > struct workqueue_struct *service_wq; > > unsigned long flags; > + u8 *processor_version; > + u8 proc_ver_strno; > + bool force_full_page_rx_buffer; > }; > > static inline bool mana_gd_is_mana(struct gdma_dev *gd) Should processor_version be declared as char * instead of u8 *? The field is used as a string throughout the code: it's assigned from kstrdup() in mana_fetch_proc_ver_string(), read as const char *ver in mana_needs_single_rxbuf_per_page(), passed to strcmp(), and printed with %s format in dev_info(). Using u8 * for string data can trigger compiler warnings with -Wpointer-sign, since string functions expect char * types.

