Also turns out that we have a very similar issue (though not exactly the same, 
and unfortunately doesn’t get fixed by this patch)

If an instruction is going to do multiple accesses to memory - (something that 
sets an area of memory to zero, or a DMA or….) typically we probe it, if that 
fails, go ahead and do the access using MMIO. If (by chance) we then get a 
Memory region added, lots of bad stuff happens.

One could expect many things  - there used to be a cache which hid this issue - 
you could also expect memory regions to be added only ‘outside’ of an 
instruction execution block - or you could expect that adding a memory region 
shouldn’t break the MMIO access, or …..

On this one we’re like some guidance.

In general, I think we should build some Qemu tests that specifically test the 
case that memory is/is not MMIO (and, potentially, stress the case where those 
assumptions change)….

Cheers
Mark.


> On 9 Dec 2025, at 13:14, Alex Bennée <[email protected]> wrote:
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary of 
> any links or attachments, and do not enable macros.
> 
> Mark Burton <[email protected]> writes:
> 
>> Just posting this here for the discussion this afternoon.
>> 
>> Cheers
>> Mark.
>> 
>> 
>> [2. 0001-Record-AddressSpace-in-full-tlb-so-access-to-MMIO-vi.patch --- 
>> text/x-diff; 
>> 0001-Record-AddressSpace-in-full-tlb-so-access-to-MMIO-vi.patch]...
> 
> We were discussing last week how we should break the dependency on
> CPUState for AddressSpaces while Gustavo was looking at adding the
> FEAT_MEC code.
> 
> They are really a function of the machine where most CPUs will share the
> same set of views. However we still need a way to resolve the AS from
> the CPUs perspective.
> 
> Copying inline for easier commenting:
> 
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index b09229dae8..4b1aa9df71 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -1073,7 +1073,8 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
>     prot = full->prot;
>     asidx = cpu_asidx_from_attrs(cpu, full->attrs);
>     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
> -                                                &xlat, &sz, full->attrs, 
> &prot);
> +                                                &xlat, &sz, &full->attrs,
> +                                                &full->as, &prot);
>     assert(sz >= TARGET_PAGE_SIZE);
> 
>     tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
> @@ -1294,13 +1295,13 @@ static inline void cpu_unaligned_access(CPUState 
> *cpu, vaddr addr,
> }
> 
> static MemoryRegionSection *
> -io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
> +io_prepare(hwaddr *out_offset, CPUState *cpu, AddressSpace *as, hwaddr xlat,
>            MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
> {
>     MemoryRegionSection *section;
>     hwaddr mr_offset;
> 
> -    section = iotlb_to_section(cpu, xlat, attrs);
> +    section = iotlb_to_section(as, xlat, attrs);
>     mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
>     cpu->mem_io_pc = retaddr;
>     if (!cpu->neg.can_do_io) {
> @@ -1618,7 +1619,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int 
> mmu_idx,
>     /* We must have an iotlb entry for MMIO */
>     if (tlb_addr & TLB_MMIO) {
>         MemoryRegionSection *section =
> -            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
> +            iotlb_to_section(full->as, full->xlat_section & 
> ~TARGET_PAGE_MASK,
>                              full->attrs);
>         data->is_io = true;
>         data->mr = section->mr;
> @@ -2028,7 +2029,8 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, 
> CPUTLBEntryFull *full,
>     tcg_debug_assert(size > 0 && size <= 8);
> 
>     attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, 
> ra);
> +    section = io_prepare(&mr_offset, cpu, full->as,
> +                         full->xlat_section, attrs, addr, ra);
>     mr = section->mr;
> 
>     BQL_LOCK_GUARD();
> @@ -2049,7 +2051,8 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, 
> CPUTLBEntryFull *full,
>     tcg_debug_assert(size > 8 && size <= 16);
> 
>     attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, 
> ra);
> +    section = io_prepare(&mr_offset, cpu, full->as,
> +                         full->xlat_section, attrs, addr, ra);
>     mr = section->mr;
> 
>     BQL_LOCK_GUARD();
> @@ -2593,7 +2596,8 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, 
> CPUTLBEntryFull *full,
>     tcg_debug_assert(size > 0 && size <= 8);
> 
>     attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, 
> ra);
> +    section = io_prepare(&mr_offset, cpu, full->as,
> +                         full->xlat_section, attrs, addr, ra);
>     mr = section->mr;
> 
>     BQL_LOCK_GUARD();
> @@ -2613,7 +2617,8 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, 
> CPUTLBEntryFull *full,
>     tcg_debug_assert(size > 8 && size <= 16);
> 
>     attrs = full->attrs;
> -    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, 
> ra);
> +    section = io_prepare(&mr_offset, cpu, full->as,
> +                         full->xlat_section, attrs, addr, ra);
>     mr = section->mr;
> 
>     BQL_LOCK_GUARD();
> diff --git a/include/accel/tcg/iommu.h b/include/accel/tcg/iommu.h
> index 90cfd6c0ed..ac50e50601 100644
> --- a/include/accel/tcg/iommu.h
> +++ b/include/accel/tcg/iommu.h
> @@ -16,22 +16,23 @@
> 
> /**
>  * iotlb_to_section:
> - * @cpu: CPU performing the access
> + * @as: Address space to access
>  * @index: TCG CPU IOTLB entry
>  *
>  * Given a TCG CPU IOTLB entry, return the MemoryRegionSection that
>  * it refers to. @index will have been initially created and returned
>  * by memory_region_section_get_iotlb().
>  */
> -MemoryRegionSection *iotlb_to_section(CPUState *cpu,
> -                                      hwaddr index, MemTxAttrs attrs);
> +struct MemoryRegionSection *iotlb_to_section(AddressSpace *as,
> +                                             hwaddr index, MemTxAttrs attrs);
> 
> MemoryRegionSection *address_space_translate_for_iotlb(CPUState *cpu,
>                                                        int asidx,
>                                                        hwaddr addr,
>                                                        hwaddr *xlat,
>                                                        hwaddr *plen,
> -                                                       MemTxAttrs attrs,
> +                                                       MemTxAttrs *attrs,
> +                                                       AddressSpace **as,
>                                                        int *prot);
> 
> hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index c0ca4b6905..a27d8feefc 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -269,6 +269,8 @@ struct CPUTLBEntryFull {
>             bool guarded;
>         } arm;
>     } extra;
> +
> +    AddressSpace *as;
> };
> 
> /*
> diff --git a/system/physmem.c b/system/physmem.c
> index cf7146b224..52156325d9 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -688,7 +688,8 @@ void tcg_iommu_init_notifier_list(CPUState *cpu)
> MemoryRegionSection *
> address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
>                                   hwaddr *xlat, hwaddr *plen,
> -                                  MemTxAttrs attrs, int *prot)
> +                                  MemTxAttrs *attrs, AddressSpace **as,
> +                                  int *prot)
> {
>     MemoryRegionSection *section;
>     IOMMUMemoryRegion *iommu_mr;
> @@ -696,7 +697,8 @@ address_space_translate_for_iotlb(CPUState *cpu, int 
> asidx, hwaddr orig_addr,
>     IOMMUTLBEntry iotlb;
>     int iommu_idx;
>     hwaddr addr = orig_addr;
> -    AddressSpaceDispatch *d = 
> address_space_to_dispatch(cpu->cpu_ases[asidx].as);
> +    *as = cpu->cpu_ases[asidx].as;
> +    AddressSpaceDispatch *d = address_space_to_dispatch(*as);
> 
>     for (;;) {
>         section = address_space_translate_internal(d, addr, &addr, plen, 
> false);
> @@ -708,13 +710,13 @@ address_space_translate_for_iotlb(CPUState *cpu, int 
> asidx, hwaddr orig_addr,
> 
>         imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
> 
> -        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
> +        iommu_idx = imrc->attrs_to_index(iommu_mr, *attrs);
>         tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
>         /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
>          * doesn't short-cut its translation table walk.
>          */
>         if (imrc->translate_attr) {
> -            iotlb = imrc->translate_attr(iommu_mr, addr, IOMMU_NONE, &attrs);
> +            iotlb = imrc->translate_attr(iommu_mr, addr, IOMMU_NONE, attrs);
>         } else {
>             iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
>         }
> @@ -735,7 +737,8 @@ address_space_translate_for_iotlb(CPUState *cpu, int 
> asidx, hwaddr orig_addr,
>             goto translate_fail;
>         }
> 
> -        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
> +        *as = iotlb.target_as;
> +        d = flatview_to_dispatch(address_space_to_flatview(*as));
>     }
> 
>     assert(!memory_region_is_iommu(section->mr));
> @@ -756,12 +759,12 @@ translate_fail:
>     return &d->map.sections[PHYS_SECTION_UNASSIGNED];
> }
> 
> -MemoryRegionSection *iotlb_to_section(CPUState *cpu,
> +
> +MemoryRegionSection *iotlb_to_section(AddressSpace *as,
>                                       hwaddr index, MemTxAttrs attrs)
> {
> -    int asidx = cpu_asidx_from_attrs(cpu, attrs);
> -    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
> -    AddressSpaceDispatch *d = address_space_to_dispatch(cpuas->as);
> +    assert(as);
> +    AddressSpaceDispatch *d = address_space_to_dispatch(as);
>     int section_index = index & ~TARGET_PAGE_MASK;
>     MemoryRegionSection *ret;
> 
> @@ -3102,6 +3105,9 @@ static void tcg_commit(MemoryListener *listener)
>      * That said, the listener is also called during realize, before
>      * all of the tcg machinery for run-on is initialized: thus halt_cond.
>      */
> +// Why are these removed   ?
> +//     cpu_reloading_memory_map();
> +//     tlb_flush(cpuas->cpu);
>     if (cpu->halt_cond) {
>         async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas));
>     } else {
> --
> 2.51.1
> 
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro

Reply via email to