Also turns out that we have a very similar issue (though not exactly the same, and unfortunately doesn’t get fixed by this patch)
If an instruction is going to do multiple accesses to memory - (something that sets an area of memory to zero, or a DMA or….) typically we probe it, if that fails, go ahead and do the access using MMIO. If (by chance) we then get a Memory region added, lots of bad stuff happens. One could expect many things - there used to be a cache which hid this issue - you could also expect memory regions to be added only ‘outside’ of an instruction execution block - or you could expect that adding a memory region shouldn’t break the MMIO access, or ….. On this one we’re like some guidance. In general, I think we should build some Qemu tests that specifically test the case that memory is/is not MMIO (and, potentially, stress the case where those assumptions change)…. Cheers Mark. > On 9 Dec 2025, at 13:14, Alex Bennée <[email protected]> wrote: > > WARNING: This email originated from outside of Qualcomm. Please be wary of > any links or attachments, and do not enable macros. > > Mark Burton <[email protected]> writes: > >> Just posting this here for the discussion this afternoon. >> >> Cheers >> Mark. >> >> >> [2. 0001-Record-AddressSpace-in-full-tlb-so-access-to-MMIO-vi.patch --- >> text/x-diff; >> 0001-Record-AddressSpace-in-full-tlb-so-access-to-MMIO-vi.patch]... > > We were discussing last week how we should break the dependency on > CPUState for AddressSpaces while Gustavo was looking at adding the > FEAT_MEC code. > > They are really a function of the machine where most CPUs will share the > same set of views. However we still need a way to resolve the AS from > the CPUs perspective. > > Copying inline for easier commenting: > > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index b09229dae8..4b1aa9df71 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -1073,7 +1073,8 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, > prot = full->prot; > asidx = cpu_asidx_from_attrs(cpu, full->attrs); > section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, > - &xlat, &sz, full->attrs, > &prot); > + &xlat, &sz, &full->attrs, > + &full->as, &prot); > assert(sz >= TARGET_PAGE_SIZE); > > tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx > @@ -1294,13 +1295,13 @@ static inline void cpu_unaligned_access(CPUState > *cpu, vaddr addr, > } > > static MemoryRegionSection * > -io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat, > +io_prepare(hwaddr *out_offset, CPUState *cpu, AddressSpace *as, hwaddr xlat, > MemTxAttrs attrs, vaddr addr, uintptr_t retaddr) > { > MemoryRegionSection *section; > hwaddr mr_offset; > > - section = iotlb_to_section(cpu, xlat, attrs); > + section = iotlb_to_section(as, xlat, attrs); > mr_offset = (xlat & TARGET_PAGE_MASK) + addr; > cpu->mem_io_pc = retaddr; > if (!cpu->neg.can_do_io) { > @@ -1618,7 +1619,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int > mmu_idx, > /* We must have an iotlb entry for MMIO */ > if (tlb_addr & TLB_MMIO) { > MemoryRegionSection *section = > - iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK, > + iotlb_to_section(full->as, full->xlat_section & > ~TARGET_PAGE_MASK, > full->attrs); > data->is_io = true; > data->mr = section->mr; > @@ -2028,7 +2029,8 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, > CPUTLBEntryFull *full, > tcg_debug_assert(size > 0 && size <= 8); > > attrs = full->attrs; > - section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, > ra); > + section = io_prepare(&mr_offset, cpu, full->as, > + full->xlat_section, attrs, addr, ra); > mr = section->mr; > > BQL_LOCK_GUARD(); > @@ -2049,7 +2051,8 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, > CPUTLBEntryFull *full, > tcg_debug_assert(size > 8 && size <= 16); > > attrs = full->attrs; > - section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, > ra); > + section = io_prepare(&mr_offset, cpu, full->as, > + full->xlat_section, attrs, addr, ra); > mr = section->mr; > > BQL_LOCK_GUARD(); > @@ -2593,7 +2596,8 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, > CPUTLBEntryFull *full, > tcg_debug_assert(size > 0 && size <= 8); > > attrs = full->attrs; > - section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, > ra); > + section = io_prepare(&mr_offset, cpu, full->as, > + full->xlat_section, attrs, addr, ra); > mr = section->mr; > > BQL_LOCK_GUARD(); > @@ -2613,7 +2617,8 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, > CPUTLBEntryFull *full, > tcg_debug_assert(size > 8 && size <= 16); > > attrs = full->attrs; > - section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, > ra); > + section = io_prepare(&mr_offset, cpu, full->as, > + full->xlat_section, attrs, addr, ra); > mr = section->mr; > > BQL_LOCK_GUARD(); > diff --git a/include/accel/tcg/iommu.h b/include/accel/tcg/iommu.h > index 90cfd6c0ed..ac50e50601 100644 > --- a/include/accel/tcg/iommu.h > +++ b/include/accel/tcg/iommu.h > @@ -16,22 +16,23 @@ > > /** > * iotlb_to_section: > - * @cpu: CPU performing the access > + * @as: Address space to access > * @index: TCG CPU IOTLB entry > * > * Given a TCG CPU IOTLB entry, return the MemoryRegionSection that > * it refers to. @index will have been initially created and returned > * by memory_region_section_get_iotlb(). > */ > -MemoryRegionSection *iotlb_to_section(CPUState *cpu, > - hwaddr index, MemTxAttrs attrs); > +struct MemoryRegionSection *iotlb_to_section(AddressSpace *as, > + hwaddr index, MemTxAttrs attrs); > > MemoryRegionSection *address_space_translate_for_iotlb(CPUState *cpu, > int asidx, > hwaddr addr, > hwaddr *xlat, > hwaddr *plen, > - MemTxAttrs attrs, > + MemTxAttrs *attrs, > + AddressSpace **as, > int *prot); > > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > index c0ca4b6905..a27d8feefc 100644 > --- a/include/hw/core/cpu.h > +++ b/include/hw/core/cpu.h > @@ -269,6 +269,8 @@ struct CPUTLBEntryFull { > bool guarded; > } arm; > } extra; > + > + AddressSpace *as; > }; > > /* > diff --git a/system/physmem.c b/system/physmem.c > index cf7146b224..52156325d9 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -688,7 +688,8 @@ void tcg_iommu_init_notifier_list(CPUState *cpu) > MemoryRegionSection * > address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, > hwaddr *xlat, hwaddr *plen, > - MemTxAttrs attrs, int *prot) > + MemTxAttrs *attrs, AddressSpace **as, > + int *prot) > { > MemoryRegionSection *section; > IOMMUMemoryRegion *iommu_mr; > @@ -696,7 +697,8 @@ address_space_translate_for_iotlb(CPUState *cpu, int > asidx, hwaddr orig_addr, > IOMMUTLBEntry iotlb; > int iommu_idx; > hwaddr addr = orig_addr; > - AddressSpaceDispatch *d = > address_space_to_dispatch(cpu->cpu_ases[asidx].as); > + *as = cpu->cpu_ases[asidx].as; > + AddressSpaceDispatch *d = address_space_to_dispatch(*as); > > for (;;) { > section = address_space_translate_internal(d, addr, &addr, plen, > false); > @@ -708,13 +710,13 @@ address_space_translate_for_iotlb(CPUState *cpu, int > asidx, hwaddr orig_addr, > > imrc = memory_region_get_iommu_class_nocheck(iommu_mr); > > - iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); > + iommu_idx = imrc->attrs_to_index(iommu_mr, *attrs); > tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx); > /* We need all the permissions, so pass IOMMU_NONE so the IOMMU > * doesn't short-cut its translation table walk. > */ > if (imrc->translate_attr) { > - iotlb = imrc->translate_attr(iommu_mr, addr, IOMMU_NONE, &attrs); > + iotlb = imrc->translate_attr(iommu_mr, addr, IOMMU_NONE, attrs); > } else { > iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx); > } > @@ -735,7 +737,8 @@ address_space_translate_for_iotlb(CPUState *cpu, int > asidx, hwaddr orig_addr, > goto translate_fail; > } > > - d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as)); > + *as = iotlb.target_as; > + d = flatview_to_dispatch(address_space_to_flatview(*as)); > } > > assert(!memory_region_is_iommu(section->mr)); > @@ -756,12 +759,12 @@ translate_fail: > return &d->map.sections[PHYS_SECTION_UNASSIGNED]; > } > > -MemoryRegionSection *iotlb_to_section(CPUState *cpu, > + > +MemoryRegionSection *iotlb_to_section(AddressSpace *as, > hwaddr index, MemTxAttrs attrs) > { > - int asidx = cpu_asidx_from_attrs(cpu, attrs); > - CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; > - AddressSpaceDispatch *d = address_space_to_dispatch(cpuas->as); > + assert(as); > + AddressSpaceDispatch *d = address_space_to_dispatch(as); > int section_index = index & ~TARGET_PAGE_MASK; > MemoryRegionSection *ret; > > @@ -3102,6 +3105,9 @@ static void tcg_commit(MemoryListener *listener) > * That said, the listener is also called during realize, before > * all of the tcg machinery for run-on is initialized: thus halt_cond. > */ > +// Why are these removed ? > +// cpu_reloading_memory_map(); > +// tlb_flush(cpuas->cpu); > if (cpu->halt_cond) { > async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas)); > } else { > -- > 2.51.1 > > -- > Alex Bennée > Virtualisation Tech Lead @ Linaro
