The branch main has been updated by marius: URL: https://cgit.FreeBSD.org/src/commit/?id=b941d1c64e58708c93621cc07ed1c8e5e709cd48
commit b941d1c64e58708c93621cc07ed1c8e5e709cd48 Author: Marius Strobl <[email protected]> AuthorDate: 2026-02-02 21:30:42 +0000 Commit: Marius Strobl <[email protected]> CommitDate: 2026-02-02 21:57:56 +0000 sym(4): Map HCB memory as uncacheable also on x86 As part of making the chip-specific mix and match of different accesses (DMA/bus space) work as desired, the intent is to map the HCB memory as uncacheable. Prior to VM_MEMATTR_*, the !x86 way of indicating this to bus_dmamem_alloc(9) was BUS_DMA_COHERENT. Then later on in 2db99100a4, BUS_DMA_NOCACHE was hooked up to VM_MEMATTR_UNCACHEABLE for x86. As it turns out, still as of today bus_dmamem_alloc(9) differs in this regard across architectures. On arm, it still supports BUS_DMA_COHERENT only for requesting uncacheable DMA and x86 still uses BUS_DMA_NOCACHE only. On arm64 and riscv, BUS_DMA_COHERENT seems to effectively be an alias for BUS_DMA_NOCACHE. Thus, allocate the HCB memory with BUS_DMA_COHERENT | BUS_DMA_NOCACHE, so we get uncacheable memory on all architectures including x86 and so loads and stores from/to HCB won't get reordered. However, even on x86 we still need to use at least compiler barriers to achieve the desired program order. This change should also fix panics due to out-of-sync data seen with FreeBSD VMs on top of OpenStack and HBAs of type lsiLogic as a result of loads and stores getting reordered. [1] While at it: - Nuke the unused SYM_DRIVER_NAME macro. - Remove unused/redundant HCB members and correct a comment typo. PR: 270816 [1] MFC after: 3 days --- sys/dev/sym/sym_hipd.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/sys/dev/sym/sym_hipd.c b/sys/dev/sym/sym_hipd.c index 0e51607fb07a..1399eadb21fb 100644 --- a/sys/dev/sym/sym_hipd.c +++ b/sys/dev/sym/sym_hipd.c @@ -58,7 +58,6 @@ */ #include <sys/cdefs.h> -#define SYM_DRIVER_NAME "sym-1.6.5-20000902" /* #define SYM_DEBUG_GENERIC_SUPPORT */ @@ -114,17 +113,15 @@ typedef u_int32_t u32; #include <dev/sym/sym_fw.h> /* - * IA32 architecture does not reorder STORES and prevents - * LOADS from passing STORES. It is called `program order' - * by Intel and allows device drivers to deal with memory - * ordering by only ensuring that the code is not reordered - * by the compiler when ordering is required. - * Other architectures implement a weaker ordering that - * requires memory barriers (and also IO barriers when they - * make sense) to be used. + * With uncacheable memory, x86 does not reorder STORES and prevents LOADS + * from passing STORES. For ensuring this program order, we still need to + * employ compiler barriers, though, when the ordering of LOADS and STORES + * matters. + * Other architectures may implement weaker ordering guarantees and, thus, + * require memory barriers (and also IO barriers) to be used. */ #if defined __i386__ || defined __amd64__ -#define MEMORY_BARRIER() do { ; } while(0) +#define MEMORY_BARRIER() __compiler_membar() #elif defined __powerpc__ #define MEMORY_BARRIER() __asm__ volatile("eieio; sync" : : : "memory") #elif defined __arm__ @@ -594,10 +591,10 @@ static m_addr_t ___dma_getp(m_pool_s *mp) goto out_err; if (bus_dmamem_alloc(mp->dmat, &vaddr, - BUS_DMA_COHERENT | BUS_DMA_WAITOK, &vbp->dmamap)) + BUS_DMA_COHERENT | BUS_DMA_NOCACHE | BUS_DMA_WAITOK, &vbp->dmamap)) goto out_err; - bus_dmamap_load(mp->dmat, vbp->dmamap, vaddr, - MEMO_CLUSTER_SIZE, getbaddrcb, &baddr, BUS_DMA_NOWAIT); + bus_dmamap_load(mp->dmat, vbp->dmamap, vaddr, MEMO_CLUSTER_SIZE, + getbaddrcb, &baddr, BUS_DMA_NOWAIT); if (baddr) { int hc = VTOB_HASH_CODE(vaddr); vbp->vaddr = (m_addr_t) vaddr; @@ -1484,7 +1481,7 @@ struct sym_hcb { u32 scr_ram_seg; /* - * Chip and controller indentification. + * Chip and controller identification. */ device_t device; @@ -1534,7 +1531,6 @@ struct sym_hcb { struct resource *io_res; struct resource *mmio_res; struct resource *ram_res; - int ram_id; void *intr; /* @@ -1558,8 +1554,6 @@ struct sym_hcb { * BUS addresses of the chip */ vm_offset_t mmio_ba; /* MMIO BUS address */ - int mmio_ws; /* MMIO Window size */ - vm_offset_t ram_ba; /* RAM BUS address */ int ram_ws; /* RAM window size */ @@ -8528,16 +8522,15 @@ sym_pci_attach(device_t dev) * Alloc/get/map/retrieve the corresponding resources. */ if (np->features & (FE_RAM|FE_RAM8K)) { - int regs_id = SYM_PCI_RAM; + i = SYM_PCI_RAM; if (np->features & FE_64BIT) - regs_id = SYM_PCI_RAM64; - np->ram_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - ®s_id, RF_ACTIVE); + i = SYM_PCI_RAM64; + np->ram_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &i, + RF_ACTIVE); if (!np->ram_res) { device_printf(dev,"failed to allocate RAM resources\n"); goto attach_failed; } - np->ram_id = regs_id; np->ram_ba = rman_get_start(np->ram_res); } @@ -8772,16 +8765,15 @@ sym_pci_detach(device_t dev) */ if (np->ram_res) bus_release_resource(np->device, SYS_RES_MEMORY, - np->ram_id, np->ram_res); + rman_get_rid(np->ram_res), np->ram_res); if (np->mmio_res) bus_release_resource(np->device, SYS_RES_MEMORY, - SYM_PCI_MMIO, np->mmio_res); + rman_get_rid(np->mmio_res), np->mmio_res); if (np->io_res) bus_release_resource(np->device, SYS_RES_IOPORT, - SYM_PCI_IO, np->io_res); + rman_get_rid(np->io_res), np->io_res); if (np->irq_res) - bus_release_resource(np->device, SYS_RES_IRQ, - 0, np->irq_res); + bus_release_resource(np->device, SYS_RES_IRQ, 0, np->irq_res); if (np->scriptb0) sym_mfree_dma(np->scriptb0, np->scriptb_sz, "SCRIPTB0");
