Re: [Bug Report] smmuv3 event 0x10 report when running virtio-blk-pci

2024-09-10 Thread Mostafa Saleh
On Tue, Sep 10, 2024 at 2:51 AM Zhou Wang  wrote:
>
> On 2024/9/9 22:47, Mostafa Saleh wrote:
> > Hi Zhou,
> >
> > On Mon, Sep 9, 2024 at 3:22 PM Zhou Wang via  wrote:
> >>
> >> Hi All,
> >>
> >> When I tested mainline qemu(commit 7b87a25f49), it reports smmuv3 event 
> >> 0x10
> >> during kernel booting up.
> >>
> >> qemu command which I use is as below:
> >>
> >> qemu-system-aarch64 -machine 
> >> virt,kernel_irqchip=on,gic-version=3,iommu=smmuv3 \
> >> -kernel Image -initrd minifs.cpio.gz \
> >> -enable-kvm -net none -nographic -m 3G -smp 6 -cpu host \
> >> -append 'rdinit=init console=ttyAMA0 ealycon=pl0ll,0x9000 maxcpus=3' \
> >> -device 
> >> pcie-root-port,port=0x8,chassis=0,id=pci.0,bus=pcie.0,multifunction=on,addr=0x2
> >>  \
> >> -device pcie-root-port,port=0x9,chassis=1,id=pci.1,bus=pcie.0,addr=0x2.0x1 
> >> \
> >> -device 
> >> virtio-blk-pci,drive=drive0,id=virtblk0,num-queues=8,packed=on,bus=pci.1 \
> >> -drive file=/home/boot.img,if=none,id=drive0,format=raw
> >>
> >> smmuv3 event 0x10 log:
> >> [...]
> >> [1.962656] virtio-pci :02:00.0: Adding to iommu group 0
> >> [1.963150] virtio-pci :02:00.0: enabling device ( -> 0002)
> >> [1.964707] virtio_blk virtio0: 6/0/0 default/read/poll queues
> >> [1.965759] virtio_blk virtio0: [vda] 2097152 512-byte logical blocks 
> >> (1.07 GB/1.00 GiB)
> >> [1.966934] arm-smmu-v3 905.smmuv3: event 0x10 received:
> >> [1.967442] input: gpio-keys as /devices/platform/gpio-keys/input/input0
> >> [1.967478] arm-smmu-v3 905.smmuv3:  0x0210
> >> [1.968381] clk: Disabling unused clocks
> >> [1.968677] arm-smmu-v3 905.smmuv3:  0x0200
> >> [1.968990] PM: genpd: Disabling unused power domains
> >> [1.969424] arm-smmu-v3 905.smmuv3:  0x
> >> [1.969814] ALSA device list:
> >> [1.970240] arm-smmu-v3 905.smmuv3:  0x
> >> [1.970471]   No soundcards found.
> >> [1.970902] arm-smmu-v3 905.smmuv3: event 0x10 received:
> >> [1.971600] arm-smmu-v3 905.smmuv3:  0x0210
> >> [1.971601] arm-smmu-v3 905.smmuv3:  0x0200
> >> [1.971601] arm-smmu-v3 905.smmuv3:  0x
> >> [1.971602] arm-smmu-v3 905.smmuv3:  0x
> >> [1.971606] arm-smmu-v3 905.smmuv3: event 0x10 received:
> >> [1.971607] arm-smmu-v3 905.smmuv3:  0x0210
> >> [1.974202] arm-smmu-v3 905.smmuv3:  0x0200
> >> [1.974634] arm-smmu-v3 905.smmuv3:  0x
> >> [1.975005] Freeing unused kernel memory: 10112K
> >> [1.975062] arm-smmu-v3 905.smmuv3:  0x
> >> [1.975442] Run init as init process
> >>
> >> Another information is that if "maxcpus=3" is removed from the kernel 
> >> command line,
> >> it will be OK.
> >>
> >
> > That's interesting, not sure how that would be related.
> >
> >> I am not sure if there is a bug about vsmmu. It will be very appreciated 
> >> if anyone
> >> know this issue or can take a look at it.
> >>
> >
> > Can you please provide logs with adding "-d trace:smmu*" to qemu invocation.
>
> Sure. Please see the attached log(using above qemu commit and command).
>

Thanks a lot, it seems the SMMUv3 indeed receives a translation
request with addr 0x0 which causes this event.
I don't see any kind of modification (alignment) of the address in this path.
So my hunch it's not related to the SMMUv3 and the initiator is
issuing bogus addresses.

> >
> > Also if possible, can you please provide which Linux kernel version
> > you are using, I will see if I can repro.
>
> I just use the latest mainline kernel(commit b831f83e40a2) with defconfig.
>

I see, I can't repro in my setup which has no "--enable-kvm" and with
"-cpu max" instead of host.
I will try other options and see if I can repro.

Thanks,
Mostafa
> Thanks,
> Zhou
>
> >
> > Thanks,
> > Mostafa
> >
> >> Thanks,
> >> Zhou
> >>
> >>
> >>
> >
> > .



Re: [Bug Report] smmuv3 event 0x10 report when running virtio-blk-pci

2024-09-09 Thread Mostafa Saleh
Hi Zhou,

On Mon, Sep 9, 2024 at 3:22 PM Zhou Wang via  wrote:
>
> Hi All,
>
> When I tested mainline qemu(commit 7b87a25f49), it reports smmuv3 event 0x10
> during kernel booting up.
>
> qemu command which I use is as below:
>
> qemu-system-aarch64 -machine 
> virt,kernel_irqchip=on,gic-version=3,iommu=smmuv3 \
> -kernel Image -initrd minifs.cpio.gz \
> -enable-kvm -net none -nographic -m 3G -smp 6 -cpu host \
> -append 'rdinit=init console=ttyAMA0 ealycon=pl0ll,0x9000 maxcpus=3' \
> -device 
> pcie-root-port,port=0x8,chassis=0,id=pci.0,bus=pcie.0,multifunction=on,addr=0x2
>  \
> -device pcie-root-port,port=0x9,chassis=1,id=pci.1,bus=pcie.0,addr=0x2.0x1 \
> -device 
> virtio-blk-pci,drive=drive0,id=virtblk0,num-queues=8,packed=on,bus=pci.1 \
> -drive file=/home/boot.img,if=none,id=drive0,format=raw
>
> smmuv3 event 0x10 log:
> [...]
> [1.962656] virtio-pci :02:00.0: Adding to iommu group 0
> [1.963150] virtio-pci :02:00.0: enabling device ( -> 0002)
> [1.964707] virtio_blk virtio0: 6/0/0 default/read/poll queues
> [1.965759] virtio_blk virtio0: [vda] 2097152 512-byte logical blocks 
> (1.07 GB/1.00 GiB)
> [1.966934] arm-smmu-v3 905.smmuv3: event 0x10 received:
> [1.967442] input: gpio-keys as /devices/platform/gpio-keys/input/input0
> [1.967478] arm-smmu-v3 905.smmuv3:  0x0210
> [1.968381] clk: Disabling unused clocks
> [1.968677] arm-smmu-v3 905.smmuv3:  0x0200
> [1.968990] PM: genpd: Disabling unused power domains
> [1.969424] arm-smmu-v3 905.smmuv3:  0x
> [1.969814] ALSA device list:
> [1.970240] arm-smmu-v3 905.smmuv3:  0x
> [1.970471]   No soundcards found.
> [1.970902] arm-smmu-v3 905.smmuv3: event 0x10 received:
> [1.971600] arm-smmu-v3 905.smmuv3:  0x0210
> [1.971601] arm-smmu-v3 905.smmuv3:  0x0200
> [1.971601] arm-smmu-v3 905.smmuv3:  0x
> [1.971602] arm-smmu-v3 905.smmuv3:  0x
> [1.971606] arm-smmu-v3 905.smmuv3: event 0x10 received:
> [1.971607] arm-smmu-v3 905.smmuv3:  0x0210
> [1.974202] arm-smmu-v3 905.smmuv3:  0x0200
> [1.974634] arm-smmu-v3 905.smmuv3:  0x
> [1.975005] Freeing unused kernel memory: 10112K
> [1.975062] arm-smmu-v3 905.smmuv3:  0x
> [1.975442] Run init as init process
>
> Another information is that if "maxcpus=3" is removed from the kernel command 
> line,
> it will be OK.
>

That's interesting, not sure how that would be related.

> I am not sure if there is a bug about vsmmu. It will be very appreciated if 
> anyone
> know this issue or can take a look at it.
>

Can you please provide logs with adding "-d trace:smmu*" to qemu invocation.

Also if possible, can you please provide which Linux kernel version
you are using, I will see if I can repro.

Thanks,
Mostafa

> Thanks,
> Zhou
>
>
>



Re: nested-smmuv3 topic, Sep 2024

2024-09-06 Thread Mostafa Saleh
Hi Nicolin,

On Thu, Sep 05, 2024 at 01:26:20AM -0700, Nicolin Chen wrote:
> Hi all,
> 
> Hope I didn't miss anybody who is related to the topic. Please,
> feel free to add!
> 
> <--- Background --->
> As some of you know, there is an ongoing effort for nested-smmuv3
> support in QEMU on ARM, working with the kernel IOMMUFD uAPIs:
> [Nesting for vSTE]
> https://lore.kernel.org/linux-iommu/0-v2-621370057090+91fec-smmuv3_nesting_...@nvidia.com/
> [Nesting for invalidations]
> https://lore.kernel.org/linux-iommu/cover.1724776335.git.nicol...@nvidia.com/
> 
> The kernel patches are still under review. Jason and I are hoping
> them to get merged at next cycle for v6.13, which means the QEMU
> patches might start a review process as early as Nov/Dec?
> 
> That being said, I think we are way behind the point that patches
> can get reviewed: most of the QEMU patches on my branches weren't
> touched very often, but merely updated to the latest kernel uAPIs
> for verification. So, I feel this might be a good point to gather
> folks together to discuss about the possible timeline and ask for
> help. I think this would potentially help folks who are going to
> attend the KVM forum (or LPC) to carry out a discussion. (Sorry,
> I won't make it due to some conflict..)
> 
> <-- Task Breakdown --->
> I previously sent a RFCv1 series collecting comments/suggestions,
> for multi-vSMMU instance design in ARM Virt code:
> https://lore.kernel.org/qemu-devel/cover.1719361174.git.nicol...@nvidia.com/
> (And thanks again for all the inputs!)
> 
> The main takeaway from the discussion is to
> 1) Turn the vSMMU module into a pluggable one, like intel-iommu
> 2) Move the per-SMMU pxb bus and device auto-assign into libvirt
> 
> Apart from the multi-vSMMU thing, there's basic nesting series:
> 0) Keep updating to the latest kernel uAPIs to support nesting
> 
> I was trying to do all these three, but apparently too ambitious.
> The kernel side of work is still taking a lot of my bandwidth. So
> far I had almost-zero progress on task (1) and completely-zero on
> task (2).
> 
> <-- Help Needed --->
> So, I'm wondering if anyone(s) might have some extra bandwidth in
> the following months helping these two tasks, either of which can
> be a standalone project I think.

I don’t have plans to work on qemu in the next months, most of my
upstream focus will be on pKVM SMMUv3 support[1] in Linux which might
overlap with some of the vSMMU work but in the kernel side.

Otherwise, I’d be happy to review patches.

[1] 
https://lore.kernel.org/kvmarm/20230201125328.2186498-1-jean-phili...@linaro.org/

Thanks,
Mostafa

> 
> For task (0), I think I can keep updating the uAPI part, although
> it'd need some help for reviews, which I was hoping to occur after
> Intel sends the QEMU nesting backend patches. Once we know how big
> the rework is going to be, we may need to borrow some help at that
> point once again..
> 
> Thank you
> Nicolin



[PATCH] hw/arm/smmuv3: Assert input to oas2bits() is valid

2024-07-22 Thread Mostafa Saleh
Coverity has spotted a possible problem with the OAS handling
(CID 1558464), where the error return of oas2bits() -1 is not
checked, which can cause an overflow in oas value.

oas2bits() is only called with valid inputs, harden the function
to assert that.

Reported-By: Peter Maydell 
Link: 
https://lore.kernel.org/qemu-devel/CAFEAcA-H=n-3mhc+el6yjfl1m+x+b+fk3mkgzbn74wnxiff...@mail.gmail.com/
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3-internal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 0ebf2eebcf..b6b7399347 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -599,7 +599,8 @@ static inline int oas2bits(int oas_field)
 case 5:
 return 48;
 }
-return -1;
+
+g_assert_not_reached();
 }
 
 /* CD fields */
-- 
2.45.2.1089.g2a221341d9-goog




Re: [PATCH v5 00/18] SMMUv3 nested translation support

2024-07-20 Thread Mostafa Saleh
Hi Peter,

On Fri, Jul 19, 2024 at 04:57:18PM +0100, Peter Maydell wrote:
> On Fri, 19 Jul 2024 at 16:36, Julien Grall  wrote:
> >
> > Hi,
> >
> > On 18/07/2024 10:43, Julien Grall wrote:
> > > Hi Eric,
> > >
> > > On 17/07/2024 18:43, Eric Auger wrote:
> > >> Hi Peter, Richard,
> > >>
> > >> On 7/17/24 17:09, Jean-Philippe Brucker wrote:
> > >>> On Mon, Jul 15, 2024 at 08:45:00AM +, Mostafa Saleh wrote:
> > >>>> Currently, QEMU supports emulating either stage-1 or stage-2 SMMUs
> > >>>> but not nested instances.
> > >>>> This patch series adds support for nested translation in SMMUv3,
> > >>>> this is controlled by property “arm-smmuv3.stage=nested”, and
> > >>>> advertised to guests as (IDR0.S1P == 1 && IDR0.S2P == 2)
> > >>> For the whole series (3-9, 11, 12, 15, 16, 18):
> > >>>
> > >>> Reviewed-by: Jean-Philippe Brucker 
> > >>>
> > >>> (and I think patch 16 is missing Eric's R-b)
> > >>
> > >> Jean-Philippe and I have followed up the progress of this series,
> > >> Mostafa took into account all our comments and all the patches were
> > >> reviewed. It seems to be in a pretty decent state so if you don't have
> > >> any objection, please consider pulling it for 9.1.
> > >>
> > >> On my end I did some testing in non nesting mode with virtio-net/vhost
> > >> and I have not noticed any regression.
> > >> Would be nice if someone could send his T-b for the nested part though
> > >> (Julien?).
> > >
> > > I haven't yet tried the latest version. I will do that in the next
> > > couple of days.
> > I see this is already merged. If this still matters:
> >
> > Tested-by: Julien Grall 
> 
> We can't retrospectively add the tag, but the testing itself
> is still important -- thanks for doing it.
> 
> Q: is there any reason not to:
>  (a) change the default to "nested" rather than "1"
>  (b) make the virt board (for new virt machine versions) use
>  "nested"?
> 
> AIUI "nested" should be a superset of "stage-1 only", the guest
> can just ignore stage-2 if it doesn't care about it. Or is
> there a performance hit from having stage-2 around even if the
> guest doesn't enable it?

I didn’t do benchmarks, but from the code, I don’t think there
would be a difference from using stage-1 only or nested stages
with stage-1 config.
I didn’t make “nested” the default stage or used it for the virt
board, as I was worried about compatibility issues (I think that
breaks backward migration), but otherwise I don’t see issues.

But if I understand correctly, setting that for virt board 9.1
(virt_machine_9_1_options) would be fine?

Thanks,
Mostafa

> 
> thanks
> -- PMM



Re: [PULL 21/26] hw/arm/smmu: Refactor SMMU OAS

2024-07-20 Thread Mostafa Saleh
Hi Peter,

On Sat, Jul 20, 2024 at 04:05:40PM +0100, Peter Maydell wrote:
> On Thu, 18 Jul 2024 at 14:20, Peter Maydell  wrote:
> >
> > From: Mostafa Saleh 
> >
> > SMMUv3 OAS is currently hardcoded in the code to 44 bits, for nested
> > configurations that can be a problem, as stage-2 might be shared with
> > the CPU which might have different PARANGE, and according to SMMU manual
> > ARM IHI 0070F.b:
> > 6.3.6 SMMU_IDR5, OAS must match the system physical address size.
> >
> > This patch doesn't change the SMMU OAS, but refactors the code to
> > make it easier to do that:
> > - Rely everywhere on IDR5 for reading OAS instead of using the
> >   SMMU_IDR5_OAS macro, so, it is easier just to change IDR5 and
> >   it propagages correctly.
> > - Add additional checks when OAS is greater than 48bits.
> > - Remove unused functions/macros: pa_range/MAX_PA.
> 
> Hi; Coverity has spotted a possible problem with the OAS handling
> in this code (CID 1558464). I'm not sure if that's directly because of
> this patch or if it's just that the code change has caused Coverity to
> flag up a preexisting problem.
> 
> It's quite possible this is a false-positive because Coverity hasn't
> noticed that the situation can't happen; but if so I think it's also
> sufficiently unclear to a human reader to be worth addressing anyway.
> 
> > -static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
> > +static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
> > + STE *ste)
> >  {
> > +uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
> > +
> >  if (STE_S2AA64(ste) == 0x0) {
> >  qemu_log_mask(LOG_UNIMP,
> >"SMMUv3 AArch32 tables not supported\n");
> > @@ -460,7 +463,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE 
> > *ste)
> >  }
> >
> >  /* For AA64, The effective S2PS size is capped to the OAS. */
> > -cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
> > +cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
> 
> oas2bits() is implemented as a function that returns -1 if the input
> isn't a valid OAS. But we don't check for that failure here, so we put
> the result into a uint8_t, which ends up as 255. Then later in
> the function we will do
>   MAKE_64BIT_MASK(0, cfg->s2cfg.eff_ps)
> which will do an undefined-behaviour shift by a negative number
> if eff_ps is 255.
> 
> If the invalid-OAS case is impossible we should assert rather
> than returning -1; if it's not impossible we should handle it.
> 
> Mostafa, could you have a look at this, please?

Yes, it should be impossible to have an invalid OAS.

This patch doesn't change the old behaviour, it just consolidate OAS
setting in one place, instead hardcoding it everywhere, so here
instead of using the macro (SMMU_IDR5_OAS) directly we now read it
from IDR5, which is set to SMMU_IDR5_OAS at smmuv3_init_regs().

The other field S2PS is casted to 6 bits, and as we use MIN, and
all the previous values are valid, so it should be fine:
- 0b000: 32 bits
- 0b001: 36 bits
- 0b010: 40 bits
- 0b011: 42 bits
- 0b100: 44 bits

Adding an assertion makes sense to me. Please, let me know if you
want me to send a patch for that.

Thanks,
Mostafa

> 
> thanks
> -- PMM
> 



[PATCH v5 03/18] hw/arm/smmuv3: Fix encoding of CLASS in events

2024-07-15 Thread Mostafa Saleh
The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
class of events faults as:

CLASS: The class of the operation that caused the fault:
- 0b00: CD, CD fetch.
- 0b01: TTD, Stage 1 translation table fetch.
- 0b10: IN, Input address

However, this value was not set and left as 0 which means CD and not
IN (0b10).

Another problem was that stage-2 class is considered IN not TT for
EABT, according to the spec:
Translation of an IPA after successful stage 1 translation (or,
in stage 2-only configuration, an input IPA)
- S2 == 1 (stage 2), CLASS == IN (Input to stage)

This would change soon when nested translations are supported.

While at it, add an enum for class as it would be used for nesting.
However, at the moment stage-1 and stage-2 use the same class values,
except for EABT.

Fixes: 9bde7f0674 “hw/arm/smmuv3: Implement translate callback”
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3-internal.h | 6 ++
 hw/arm/smmuv3.c  | 8 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index e4dd11e1e6..0f3ecec804 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus {
 SMMU_TRANS_SUCCESS,
 } SMMUTranslationStatus;
 
+typedef enum SMMUTranslationClass {
+SMMU_CLASS_CD,
+SMMU_CLASS_TT,
+SMMU_CLASS_IN,
+} SMMUTranslationClass;
+
 /* MMIO Registers */
 
 REG32(IDR0,0x0)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 9dd3ea48e4..3d214c9f57 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -942,7 +942,9 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_WALK_EABT;
 event.u.f_walk_eabt.addr = addr;
 event.u.f_walk_eabt.rnw = flag & 0x1;
-event.u.f_walk_eabt.class = 0x1;
+/* Stage-2 (only) is class IN while stage-1 is class TT */
+event.u.f_walk_eabt.class = (ptw_info.stage == 2) ?
+ SMMU_CLASS_IN : SMMU_CLASS_TT;
 event.u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
@@ -950,6 +952,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
 event.u.f_translation.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -958,6 +961,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
 event.u.f_addr_size.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -966,6 +970,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
 event.u.f_access.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -974,6 +979,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
 event.u.f_permission.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 15/18] hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()

2024-07-15 Thread Mostafa Saleh
IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
SMMU instances we consider the input address as the IOVA, but when
nesting is used, we can't mix stage-1 and stage-2 addresses, so for
nesting only stage-1 is considered the IOVA and would be notified.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 39 +--
 hw/arm/trace-events |  2 +-
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 9a88b83511..84cd314b33 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1096,27 +1096,38 @@ epilogue:
  * @iova: iova
  * @tg: translation granule (if communicated through range invalidation)
  * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1
+ * @stage: Which stage(1 or 2) is used
  */
 static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
IOMMUNotifier *n,
int asid, int vmid,
dma_addr_t iova, uint8_t tg,
-   uint64_t num_pages)
+   uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
+SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
+SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo);
 IOMMUTLBEvent event;
 uint8_t granule;
-SMMUv3State *s = sdev->smmu;
+
+if (!cfg) {
+return;
+}
+
+/*
+ * stage is passed from TLB invalidation commands which can be either
+ * stage-1 or stage-2.
+ * However, IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
+ * SMMU instances we consider the input address as the IOVA, but when
+ * nesting is used, we can't mix stage-1 and stage-2 addresses, so for
+ * nesting only stage-1 is considered the IOVA and would be notified.
+ */
+if ((stage == SMMU_STAGE_2) && (cfg->stage == SMMU_NESTED))
+return;
 
 if (!tg) {
-SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
-SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo);
 SMMUTransTableInfo *tt;
 
-if (!cfg) {
-return;
-}
-
 if (asid >= 0 && cfg->asid != asid) {
 return;
 }
@@ -1125,7 +1136,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 return;
 }
 
-if (STAGE1_SUPPORTED(s)) {
+if (stage == SMMU_STAGE_1) {
 tt = select_tt(cfg, iova);
 if (!tt) {
 return;
@@ -1151,7 +1162,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 /* invalidate an asid/vmid/iova range tuple in all mr's */
 static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
   dma_addr_t iova, uint8_t tg,
-  uint64_t num_pages)
+  uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev;
 
@@ -1160,10 +1171,10 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 IOMMUNotifier *n;
 
 trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
-iova, tg, num_pages);
+iova, tg, num_pages, stage);
 
 IOMMU_NOTIFIER_FOREACH(n, mr) {
-smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
+smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage);
 }
 }
 }
@@ -1194,7 +1205,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 
 if (!tg) {
 trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
 } else {
@@ -1217,7 +1228,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 num_pages = (mask + 1) >> granule;
 trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
  ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
 } else {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 593cc571da..be6c8f720b 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -55,7 +55,7 @@ smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
 smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu 
mr=%

[PATCH v5 13/18] hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid

2024-07-15 Thread Mostafa Saleh
Soon, Instead of doing TLB invalidation by ASID only, VMID will be
also required.
Add smmu_iotlb_inv_asid_vmid() which invalidates by both ASID and VMID.

However, at the moment this function is only used in SMMU_CMD_TLBI_NH_ASID
which is a stage-1 command, so passing VMID = -1 keeps the original
behaviour.

Reviewed-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 20 +---
 hw/arm/smmuv3.c  |  2 +-
 hw/arm/trace-events  |  2 +-
 include/hw/arm/smmu-common.h |  2 +-
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index a100700497..bf35806b02 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -159,13 +159,14 @@ void smmu_iotlb_inv_all(SMMUState *s)
 g_hash_table_remove_all(s->iotlb);
 }
 
-static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
- gpointer user_data)
+static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
+  gpointer user_data)
 {
-int asid = *(int *)user_data;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
-return SMMU_IOTLB_ASID(*iotlb_key) == asid;
+return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
+   (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
 }
 
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
@@ -270,10 +271,15 @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, 
dma_addr_t ipa, uint8_t tg,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, int asid)
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
 {
-trace_smmu_iotlb_inv_asid(asid);
-g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
+SMMUIOTLBPageInvInfo info = {
+.asid = asid,
+.vmid = vmid,
+};
+
+trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, 
&info);
 }
 
 void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index a0979f15a0..cfee42add4 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1361,7 +1361,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 
 trace_smmuv3_cmdq_tlbi_nh_asid(asid);
 smmu_inv_notifiers_all(&s->smmu_state);
-smmu_iotlb_inv_asid(bs, asid);
+smmu_iotlb_inv_asid_vmid(bs, asid, -1);
 break;
 }
 case SMMU_CMD_TLBI_NH_ALL:
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 7d9c1703da..4aa71b1b19 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -11,7 +11,7 @@ smmu_ptw_page_pte(int stage, int level,  uint64_t iova, 
uint64_t baseaddr, uint6
 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, 
uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d 
base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block 
address = 0x%"PRIx64" block size = %d MiB"
 smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) 
"baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
-smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
+smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
 smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index f9c8b00c9d..b3a937190b 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -213,7 +213,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, 
SMMUTLBEntry *entry);
 SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
 uint8_t tg, uint8_t level);
 void smmu_iotlb_inv_all(SMMUState *s);
-void smmu_iotlb_inv_asid(SMMUState *s, int asid);
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid);
 void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl);
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 17/18] hw/arm/smmuv3: Support and advertise nesting

2024-07-15 Thread Mostafa Saleh
Everything is in place, consolidate parsing of STE cfg and setting
translation stage.

Advertise nesting if stage requested is "nested".

Reviewed-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index d052a2ba24..32b1f4cb75 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -261,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
 /* Based on sys property, the stages supported in smmu will be 
advertised.*/
 if (s->stage && !strcmp("2", s->stage)) {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
+} else if (s->stage && !strcmp("nested", s->stage)) {
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
 } else {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
 }
@@ -425,8 +428,6 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = SMMU_STAGE_2;
-
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -509,6 +510,27 @@ bad_ste:
 return -EINVAL;
 }
 
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
+{
+
+if (STE_CFG_ABORT(config)) {
+cfg->aborted = true;
+return;
+}
+if (STE_CFG_BYPASS(config)) {
+cfg->bypassed = true;
+return;
+}
+
+if (STE_CFG_S1_ENABLED(config)) {
+cfg->stage = SMMU_STAGE_1;
+}
+
+if (STE_CFG_S2_ENABLED(config)) {
+cfg->stage |= SMMU_STAGE_2;
+}
+}
+
 /* Returns < 0 in case of invalid STE, 0 otherwise */
 static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
@@ -525,13 +547,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
 
 config = STE_CONFIG(ste);
 
-if (STE_CFG_ABORT(config)) {
-cfg->aborted = true;
-return 0;
-}
+decode_ste_config(cfg, config);
 
-if (STE_CFG_BYPASS(config)) {
-cfg->bypassed = true;
+if (cfg->aborted || cfg->bypassed) {
 return 0;
 }
 
@@ -704,7 +722,6 @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 18/18] hw/arm/smmu: Refactor SMMU OAS

2024-07-15 Thread Mostafa Saleh
SMMUv3 OAS is currently hardcoded in the code to 44 bits, for nested
configurations that can be a problem, as stage-2 might be shared with
the CPU which might have different PARANGE, and according to SMMU manual
ARM IHI 0070F.b:
6.3.6 SMMU_IDR5, OAS must match the system physical address size.

This patch doesn't change the SMMU OAS, but refactors the code to
make it easier to do that:
- Rely everywhere on IDR5 for reading OAS instead of using the
  SMMU_IDR5_OAS macro, so, it is easier just to change IDR5 and
  it propagages correctly.
- Add additional checks when OAS is greater than 48bits.
- Remove unused functions/macros: pa_range/MAX_PA.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  7 ---
 hw/arm/smmuv3-internal.h | 13 -
 hw/arm/smmuv3.c  | 35 ---
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 67cb134d23..7d8a353956 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -452,7 +452,8 @@ static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg,
 inputsize = 64 - tt->tsz;
 level = 4 - (inputsize - 4) / stride;
 indexmask = VMSA_IDXMSK(inputsize, stride, level);
-baseaddr = extract64(tt->ttb, 0, 48);
+
+baseaddr = extract64(tt->ttb, 0, cfg->oas);
 baseaddr &= ~indexmask;
 
 while (level < VMSA_LEVELS) {
@@ -576,8 +577,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  * Get the ttb from concatenated structure.
  * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
  */
-uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
-  idx * sizeof(uint64_t);
+uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) +
+  (1 << stride) * idx * sizeof(uint64_t);
 dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level);
 
 baseaddr &= ~indexmask;
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 0f3ecec804..0ebf2eebcf 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -602,19 +602,6 @@ static inline int oas2bits(int oas_field)
 return -1;
 }
 
-static inline int pa_range(STE *ste)
-{
-int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS);
-
-if (!STE_S2AA64(ste)) {
-return 40;
-}
-
-return oas2bits(oas_field);
-}
-
-#define MAX_PA(ste) ((1 << pa_range(ste)) - 1)
-
 /* CD fields */
 
 #define CD_VALID(x)   extract32((x)->word[0], 31, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 32b1f4cb75..d119a8026f 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -402,10 +402,10 @@ static bool s2t0sz_valid(SMMUTransCfg *cfg)
 }
 
 if (cfg->s2cfg.granule_sz == 16) {
-return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS));
+return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps);
 }
 
-return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16));
+return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16));
 }
 
 /*
@@ -426,8 +426,11 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 return nr_concat <= VMSA_MAX_S2_CONCAT;
 }
 
-static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
+static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
+ STE *ste)
 {
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
+
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -460,7 +463,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 }
 
 /* For AA64, The effective S2PS size is capped to the OAS. */
-cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
+cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
+/*
+ * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input
+ * range is further limited to 48 bits unless STE.S2TG indicates a
+ * 64KB granule.
+ */
+if (cfg->s2cfg.granule_sz != 16) {
+cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48);
+}
 /*
  * It is ILLEGAL for the address in S2TTB to be outside the range
  * described by the effective S2PS value.
@@ -536,6 +547,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
 {
 uint32_t config;
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
 int ret;
 
 if (!STE_VALID(ste)) {
@@ -579,8 +591,8 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
  * Stage-1 OAS defaults to OAS even if not enabled as it would be used
  * in input address check for stage-2.
  */
-cfg->oas = oas2bits(SMMU_IDR5_OAS);
-ret = decode_ste_s2_cfg(cf

[PATCH v5 12/18] hw/arm/smmu: Support nesting in smmuv3_range_inval()

2024-07-15 Thread Mostafa Saleh
With nesting, we would need to invalidate IPAs without
over-invalidating stage-1 IOVAs. This can be done by
distinguishing IPAs in the TLBs by having ASID=-1.
To achieve that, rework the invalidation for IPAs to have a
separate function, while for IOVA invalidation ASID=-1 means
invalidate for all ASIDs.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 47 
 hw/arm/smmuv3.c  | 23 --
 hw/arm/trace-events  |  2 +-
 include/hw/arm/smmu-common.h |  3 ++-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8ed53f5b1d..a100700497 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -195,6 +195,25 @@ static gboolean 
smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
((entry->iova & ~info->mask) == info->iova);
 }
 
+static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
+ gpointer user_data)
+{
+SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
+IOMMUTLBEntry *entry = &iter->entry;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
+SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
+
+if (SMMU_IOTLB_ASID(iotlb_key) >= 0) {
+/* This is a stage-1 address. */
+return false;
+}
+if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
+return false;
+}
+return ((info->iova & ~entry->addr_mask) == entry->iova) ||
+   ((entry->iova & ~info->mask) == info->iova);
+}
+
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
@@ -223,6 +242,34 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, 
dma_addr_t iova,
 &info);
 }
 
+/*
+ * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
+ * in Stage-1 invalidation ASID = -1, means don't care.
+ */
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
+uint64_t num_pages, uint8_t ttl)
+{
+uint8_t granule = tg ? tg * 2 + 10 : 12;
+int asid = -1;
+
+   if (ttl && (num_pages == 1)) {
+SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
+
+if (g_hash_table_remove(s->iotlb, &key)) {
+return;
+}
+}
+
+SMMUIOTLBPageInvInfo info = {
+.iova = ipa,
+.vmid = vmid,
+.mask = (num_pages << granule) - 1};
+
+g_hash_table_foreach_remove(s->iotlb,
+smmu_hash_remove_by_vmid_ipa,
+&info);
+}
+
 void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 06a96c65eb..a0979f15a0 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1168,7 +1168,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 }
 }
 
-static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
+static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
 {
 dma_addr_t end, addr = CMD_ADDR(cmd);
 uint8_t type = CMD_TYPE(cmd);
@@ -1193,9 +1193,13 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 }
 
 if (!tg) {
-trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl);
+}
 return;
 }
 
@@ -1211,9 +1215,14 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
 
 num_pages = (mask + 1) >> granule;
-trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
+ ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl);
+}
 addr += mask + 1;
 }
 }
@@ -1372,7 +1381,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
-smmuv3_range_inval(bs, &cmd);
+smmuv3_range_inval(bs, &cmd, 

[PATCH v5 14/18] hw/arm/smmu: Support nesting in the rest of commands

2024-07-15 Thread Mostafa Saleh
Some commands need rework for nesting, as they used to assume S1
and S2 are mutually exclusive:

- CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
- CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
  invalidate everything, this required a new vmid invalidation
  function for stage-1 only (ASID >= 0)

Also, rework trace events to reflect the new implementation.

Reviewed-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 16 
 hw/arm/smmuv3.c  | 28 ++--
 hw/arm/trace-events  |  4 +++-
 include/hw/arm/smmu-common.h |  1 +
 4 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index bf35806b02..67cb134d23 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -178,6 +178,16 @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, 
gpointer value,
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
 }
 
+static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
+gpointer user_data)
+{
+int vmid = *(int *)user_data;
+SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
+
+return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
+   (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
+}
+
 static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer 
value,
   gpointer user_data)
 {
@@ -288,6 +298,12 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
 }
 
+inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
+{
+trace_smmu_iotlb_inv_vmid_s1(vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid);
+}
+
 /* VMSAv8-64 Translation */
 
 /**
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index cfee42add4..9a88b83511 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1353,25 +1353,49 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 case SMMU_CMD_TLBI_NH_ASID:
 {
 int asid = CMD_ASID(&cmd);
+int vmid = -1;
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
 
+/*
+ * VMID is only matched when stage 2 is supported, otherwise set it
+ * to -1 as the value used for stage-1 only VMIDs.
+ */
+if (STAGE2_SUPPORTED(s)) {
+vmid = CMD_VMID(&cmd);
+}
+
 trace_smmuv3_cmdq_tlbi_nh_asid(asid);
 smmu_inv_notifiers_all(&s->smmu_state);
-smmu_iotlb_inv_asid_vmid(bs, asid, -1);
+smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
 break;
 }
 case SMMU_CMD_TLBI_NH_ALL:
+{
+int vmid = -1;
+
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
+
+/*
+ * If stage-2 is supported, invalidate for this VMID only, 
otherwise
+ * invalidate the whole thing.
+ */
+if (STAGE2_SUPPORTED(s)) {
+vmid = CMD_VMID(&cmd);
+trace_smmuv3_cmdq_tlbi_nh(vmid);
+smmu_iotlb_inv_vmid_s1(bs, vmid);
+break;
+}
 QEMU_FALLTHROUGH;
+}
 case SMMU_CMD_TLBI_NSNH_ALL:
-trace_smmuv3_cmdq_tlbi_nh();
+trace_smmuv3_cmdq_tlbi_nsnh();
 smmu_inv_notifiers_all(&s->smmu_state);
 smmu_iotlb_inv_all(bs);
 break;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 4aa71b1b19..593cc571da 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -13,6 +13,7 @@ smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, 
uint64_t pte) "base
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
 smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
 smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
+smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
 smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, 
uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" 
hit=%d miss=%d hit rate=%d"
@@ -47,7 +48,8 @@ smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
 smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t 
perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
 smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, 
uint32_t perc) &quo

[PATCH v5 08/18] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-07-15 Thread Mostafa Saleh
According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
In "5.2 Stream Table Entry":
 [51:6] S1ContextPtr
 If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
 stage 2 and the programmed value must be within the range of the IAS.

In "5.4.1 CD notes":
 The translation table walks performed from TTB0 or TTB1 are always performed
 in IPA space if stage 2 translations are enabled.

This patch implements translation of the S1 context descriptor pointer and
TTBx base addresses through the S2 stage (IPA -> PA)

smmuv3_do_translate() is updated to have one arg which is translation
class, this is useful to:
 - Decide wether a translation is stage-2 only or use the STE config.
 - Populate the class in case of faults, WALK_EABT is left unchanged
   for stage-1 as it is always IN, while stage-2 would match the
   used class (TT, IN, CD), this will change slightly when the ptw
   supports nested translation as it can also issue TT event with
   class IN.

In case for stage-2 only translation, used in the context of nested
translation, the stage and asid are saved and restored before and
after calling smmu_translate().

Translating CD or TTBx can fail for the following reasons:
1) Large address size: This is described in
   (3.4.3 Address sizes of SMMU-originated accesses)
   - For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
 C_BAD_STE or Translation fault, we implement the latter as it
 requires no extra code.
   - For TTBx, if larger than the effective stage 1 output address size, it
 triggers C_BAD_CD.

2) Faults from PTWs (7.3 Event records)
   - F_ADDR_SIZE: large address size after first level causes stage 2 Address
 Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
   - F_PERMISSION: Same as an address translation. However, when
 CLASS == CD, the access is implicitly Data and a read.
   - F_ACCESS: Same as an address translation.
   - F_TRANSLATION: Same as an address translation.
   - F_WALK_EABT: Same as an address translation.
  These are already implemented in the PTW logic, so no extra handling
  required.

As in CD and TTBx translation context, the iova is not known, setting
the InputAddr was removed from "smmuv3_do_translate" and set after
from "smmuv3_translate" with the new function "smmuv3_fixup_event"

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 120 +---
 1 file changed, 103 insertions(+), 17 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 3f2dfada44..73d5a25705 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -337,14 +337,35 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, 
STE *buf,
 
 }
 
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry,
+ SMMUTranslationClass class);
 /* @ssid > 0 not supported yet */
-static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
-   CD *buf, SMMUEventInfo *event)
+static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
+   uint32_t ssid, CD *buf, SMMUEventInfo *event)
 {
 dma_addr_t addr = STE_CTXPTR(ste);
 int ret, i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 trace_smmuv3_get_cd(addr);
+
+if (cfg->stage == SMMU_NESTED) {
+status = smmuv3_do_translate(s, addr, cfg, event,
+ IOMMU_RO, &entry, SMMU_CLASS_CD);
+
+/* Same PTW faults are reported but with CLASS = CD. */
+if (status != SMMU_TRANS_SUCCESS) {
+return -EINVAL;
+}
+
+addr = CACHED_ENTRY_TO_ADDR(entry, addr);
+}
+
 /* TODO: guarantee 64-bit single-copy atomicity */
 ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf),
   MEMTXATTRS_UNSPECIFIED);
@@ -659,10 +680,13 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, 
STE *ste,
 return 0;
 }
 
-static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
+static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
+ CD *cd, SMMUEventInfo *event)
 {
 int ret = -EINVAL;
 int i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
 goto bad_cd;
@@ -713,9 +737,26 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 tt->tsz = tsz;
 tt->ttb = CD_TTB(cd, i);
+
 if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) {
 goto bad_cd;
 }
+
+/* T

[PATCH v5 11/18] hw/arm/smmu-common: Support nested translation

2024-07-15 Thread Mostafa Saleh
When nested translation is requested, do the following:
- Translate stage-1 table address IPA into PA through stage-2.
- Translate stage-1 table walk output (IPA) through stage-2.
- Create a single TLB entry from stage-1 and stage-2 translations
  using logic introduced before.

smmu_ptw() has a new argument SMMUState which include the TLB as
stage-1 table address can be cached in there.

Also in smmu_ptw(), a separate path used for nesting to simplify the
code, although some logic can be combined.

With nested translation class of translation fault can be different,
from the class of the translation, as faults from translating stage-1
tables are considered as CLASS_TT and not CLASS_IN, a new member
"is_ipa_descriptor" added to "SMMUPTWEventInfo" to differ faults
from walking stage 1 translation table and faults from translating
an IPA for a transaction.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 74 +++-
 hw/arm/smmuv3.c  | 14 +++
 include/hw/arm/smmu-common.h |  7 ++--
 3 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index c894c4c621..8ed53f5b1d 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -318,8 +318,41 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
dma_addr_t iova)
 return NULL;
 }
 
+/* Translate stage-1 table address using stage-2 page table. */
+static inline int translate_table_addr_ipa(SMMUState *bs,
+   dma_addr_t *table_addr,
+   SMMUTransCfg *cfg,
+   SMMUPTWEventInfo *info)
+{
+dma_addr_t addr = *table_addr;
+SMMUTLBEntry *cached_entry;
+int asid;
+
+/*
+ * The translation table walks performed from TTB0 or TTB1 are always
+ * performed in IPA space if stage 2 translations are enabled.
+ */
+asid = cfg->asid;
+cfg->stage = SMMU_STAGE_2;
+cfg->asid = -1;
+cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
+cfg->asid = asid;
+cfg->stage = SMMU_NESTED;
+
+if (cached_entry) {
+*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
+return 0;
+}
+
+info->stage = SMMU_STAGE_2;
+info->addr = addr;
+info->is_ipa_descriptor = true;
+return -EINVAL;
+}
+
 /**
  * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
+ * @bs: smmu state which includes TLB instance
  * @cfg: translation config
  * @iova: iova to translate
  * @perm: access type
@@ -331,7 +364,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t 
iova)
  * Upon success, @tlbe is filled with translated_addr and entry
  * permission rights.
  */
-static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
+static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg,
   dma_addr_t iova, IOMMUAccessFlags perm,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
@@ -381,6 +414,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 baseaddr = get_table_pte_address(pte, granule_sz);
+if (cfg->stage == SMMU_NESTED) {
+if (translate_table_addr_ipa(bs, &baseaddr, cfg, info)) {
+goto error;
+}
+}
 level++;
 continue;
 } else if (is_page_pte(pte, level)) {
@@ -568,10 +606,8 @@ error:
  * combine S1 and S2 TLB entries into a single entry.
  * As a result the S1 entry is overriden with combined data.
  */
-static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
-SMMUTLBEntry *tlbe_s2,
-dma_addr_t iova,
-SMMUTransCfg *cfg)
+static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova, SMMUTransCfg *cfg)
 {
 if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
 tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
@@ -591,6 +627,7 @@ static void __attribute__((unused)) 
combine_tlb(SMMUTLBEntry *tlbe,
 /**
  * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
  *
+ * @bs: smmu state which includes TLB instance
  * @cfg: translation configuration
  * @iova: iova to translate
  * @perm: tentative access type
@@ -599,11 +636,15 @@ static void __attribute__((unused)) 
combine_tlb(SMMUTLBEntry *tlbe,
  *
  * return 0 on success
  */
-int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
- SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova,
+ IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
+int ret;
+SMMUTLBEntry tlbe_s2;
+dma_addr_t ipa;
+
 if

[PATCH v5 00/18] SMMUv3 nested translation support

2024-07-15 Thread Mostafa Saleh
xes.

Changes in v5:
v4: 
https://lore.kernel.org/qemu-devel/20240701110241.2005222-1-smost...@google.com/
- Collect Eric and Jean Rbs
- Fix a bug with nested lookup granule and iova mask
- Fix InputAddr for events for cd and ttbx translation faults
- Fix class in translation fault events
- Fix smmuv3_notify_iova
- Fix CACHED_ENTRY_TO_ADDR macro
- Drop FWB patch
- Fix bisectability by moving smmu_iotlb_inv_asid_vmid

Changes in v4:
v3: 
https://lore.kernel.org/qemu-devel/20240429032403.74910-1-smost...@google.com/
- Collected Eric and Alex Rbs
- Rebased on master
- Dropped RFC tag
- Dropped last 2 patches about oas changes to avoid blocking this series
  and I will post them after as RFC
- Split patch 7, and introduce CACHED_ENTRY_TO_ADDR in a separate patch
- Reorder patch 8 and 9 (combine tlb and tlb lookup)
- Split patch 12, and introduce smmu_iotlb_inv_asid_vmid in a separate patch
- Split patch 14, to have fault changes in a separate patch
- Update commit messages and include Fixes sha
- Minor updates, renames and a lot of comments based on review

Changes in v3
v2: 
https://lore.kernel.org/qemu-devel/20240408140818.3799590-1-smost...@google.com/
- Collected Eric Rbs.
- Rebased on master.
- Fix an existing bug in class encoding.
- Fix an existing bug in S2 events missing IPA.
- Fix nesting event population (missing class and wrong events)
- Remove CALL_FUNC_CFG_S2.
- Rework TLB combination logic to cache the largest possible entries.
- Refactor nested translation code to be more clear.
- Split patch 05 to 4 patches.
- Convert asid/vmid in trace events to int also.
- Remove some extra traces as it was not needed.
- Improve commit messages.

Changes in v2:
v1: 
https://lore.kernel.org/qemu-devel/20240325101442.1306300-1-smost...@google.com/
- Collected Eric Rbs
- Rework TLB to rely on VMID/ASID instead of an extra key.
- Fixed TLB issue with large stage-1 reported by Julian.
- Cap the OAS to 48 bits as PTW doesn’t support 52 bits.
- Fix ASID/VMID representation in some contexts as 16 bits while
  they can be -1
- Increase visibility in trace points

Mostafa Saleh (18):
  hw/arm/smmu-common: Add missing size check for stage-1
  hw/arm/smmu: Fix IPA for stage-2 events
  hw/arm/smmuv3: Fix encoding of CLASS in events
  hw/arm/smmu: Use enum for SMMU stage
  hw/arm/smmu: Split smmuv3_translate()
  hw/arm/smmu: Consolidate ASID and VMID types
  hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR
  hw/arm/smmuv3: Translate CD and TT using stage-2 table
  hw/arm/smmu-common: Rework TLB lookup for nesting
  hw/arm/smmu-common: Add support for nested TLB
  hw/arm/smmu-common: Support nested translation
  hw/arm/smmu: Support nesting in smmuv3_range_inval()
  hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid
  hw/arm/smmu: Support nesting in the rest of commands
  hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()
  hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo
  hw/arm/smmuv3: Support and advertise nesting
  hw/arm/smmu: Refactor SMMU OAS

 hw/arm/smmu-common.c | 312 ---
 hw/arm/smmuv3-internal.h |  19 +-
 hw/arm/smmuv3.c  | 467 +++
 hw/arm/trace-events  |  26 +-
 include/hw/arm/smmu-common.h |  46 +++-
 5 files changed, 640 insertions(+), 230 deletions(-)

-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 04/18] hw/arm/smmu: Use enum for SMMU stage

2024-07-15 Thread Mostafa Saleh
Currently, translation stage is represented as an int, where 1 is stage-1 and
2 is stage-2, when nested is added, 3 would be confusing to represent nesting,
so we use an enum instead.

While keeping the same values, this is useful for:
 - Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is
   stage-2 and both is nested.
 - Tracing, as stage is printed as int.

Reviewed-by: Eric Auger 
Reviewed-by: Alex Bennée 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 14 +++---
 hw/arm/smmuv3.c  | 17 +
 include/hw/arm/smmu-common.h | 11 +--
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a8c718e6b..8a5858f69f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -304,7 +304,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
 dma_addr_t baseaddr, indexmask;
-int stage = cfg->stage;
+SMMUStage stage = cfg->stage;
 SMMUTransTableInfo *tt = select_tt(cfg, iova);
 uint8_t level, granule_sz, inputsize, stride;
 
@@ -402,7 +402,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
 error:
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -425,7 +425,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
   dma_addr_t ipa, IOMMUAccessFlags perm,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-const int stage = 2;
+const SMMUStage stage = SMMU_STAGE_2;
 int granule_sz = cfg->s2cfg.granule_sz;
 /* ARM DDI0487I.a: Table D8-7. */
 int inputsize = 64 - cfg->s2cfg.tsz;
@@ -525,7 +525,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 error_ipa:
 info->addr = ipa;
 error:
-info->stage = 2;
+info->stage = SMMU_STAGE_2;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -544,9 +544,9 @@ error:
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
-} else if (cfg->stage == 2) {
+} else if (cfg->stage == SMMU_STAGE_2) {
 /*
  * If bypassing stage 1(or unimplemented), the input address is passed
  * directly to stage 2 as IPA. If the input address of a transaction
@@ -555,7 +555,7 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
  */
 if (iova >= (1ULL << cfg->oas)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 3d214c9f57..7e9874b4a6 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,7 +34,8 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == 1) ? (cfg)->record_faults : \
+#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
+ (cfg)->record_faults : \
  (cfg)->s2cfg.record_faults)
 
 /**
@@ -402,7 +403,7 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = 2;
+cfg->stage = SMMU_STAGE_2;
 
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
@@ -678,7 +679,7 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = 1;
+cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
@@ -762,7 +763,7 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, 
SMMUTransCfg *cfg,
 return ret;
 }
 
-if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) {
+if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) {
 return 0;
 }
 
@@ -882,7 +883,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 goto epilogue;
 }
 
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 /* Select stage1 translation table. */
 tt = select_tt(cfg, addr);
 if (!tt) {
@@ -919,7 +920,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
  * nesting is not supported. So it is sufficient to check the
  * translation stage to know the TLB stage for now.
  */
-  

[PATCH v5 02/18] hw/arm/smmu: Fix IPA for stage-2 events

2024-07-15 Thread Mostafa Saleh
For the following events (ARM IHI 0070 F.b - 7.3 Event records):
- F_TRANSLATION
- F_ACCESS
- F_PERMISSION
- F_ADDR_SIZE

If fault occurs at stage 2, S2 == 1 and:
  - If translating an IPA for a transaction (whether by input to
stage 2-only configuration, or after successful stage 1 translation),
CLASS == IN, and IPA is provided.

At the moment only CLASS == IN is used which indicates input
translation.

However, this was not implemented correctly, as for stage 2, the code
only sets the  S2 bit but not the IPA.

This field has the same bits as FetchAddr in F_WALK_EABT which is
populated correctly, so we don’t change that.
The setting of this field should be done from the walker as the IPA address
wouldn't be known in case of nesting.

For stage 1, the spec says:
  If fault occurs at stage 1, S2 == 0 and:
  CLASS == IN, IPA is UNKNOWN.

So, no need to set it to for stage 1, as ptw_info is initialised by zero in
smmuv3_translate().

Fixes: e703f7076a “hw/arm/smmuv3: Add page table walk for stage-2”
Reviewed-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 hw/arm/smmuv3.c  |  4 
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index eb2356bc35..8a8c718e6b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -448,7 +448,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (ipa >= (1ULL << inputsize)) {
 info->type = SMMU_PTW_ERR_TRANSLATION;
-goto error;
+goto error_ipa;
 }
 
 while (level < VMSA_LEVELS) {
@@ -494,13 +494,13 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (!PTE_AF(pte) && !cfg->s2cfg.affd) {
 info->type = SMMU_PTW_ERR_ACCESS;
-goto error;
+goto error_ipa;
 }
 
 s2ap = PTE_AP(pte);
 if (is_permission_fault_s2(s2ap, perm)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-goto error;
+goto error_ipa;
 }
 
 /*
@@ -509,7 +509,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-goto error;
+goto error_ipa;
 }
 
 tlbe->entry.translated_addr = gpa;
@@ -522,6 +522,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 }
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
+error_ipa:
+info->addr = ipa;
 error:
 info->stage = 2;
 tlbe->entry.perm = IOMMU_NONE;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2d1e0d55ec..9dd3ea48e4 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -949,6 +949,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
+event.u.f_translation.addr2 = ptw_info.addr;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -956,6 +957,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
+event.u.f_addr_size.addr2 = ptw_info.addr;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -963,6 +965,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
+event.u.f_access.addr2 = ptw_info.addr;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -970,6 +973,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
+event.u.f_permission.addr2 = ptw_info.addr;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 10/18] hw/arm/smmu-common: Add support for nested TLB

2024-07-15 Thread Mostafa Saleh
This patch adds support for nested (combined) TLB entries.
The main function combine_tlb() is not used here but in the next
patches, but to simplify the patches it is introduced first.

Main changes:
1) New field added in the SMMUTLBEntry struct: parent_perm, for
   nested TLB, holds the stage-2 permission, this can be used to know
   the origin of a permission fault from a cached entry as caching
   the “and” of the permissions loses this information.

   SMMUPTWEventInfo is used to hold information about PTW faults so
   the event can be populated, the value of stage used to be set
   based on the current stage for TLB permission faults, however
   with the parent_perm, it is now set based on which perm has
   the missing permission

   When nesting is not enabled it has the same value as perm which
   doesn't change the logic.

2) As combined TLB implementation is used, the combination logic
   chooses:
   - tg and level from the entry which has the smallest addr_mask.
   - Based on that the iova that would be cached is recalculated.
   - Translated_addr is chosen from stage-2.

Reviewed-by: Eric Auger 
Reviewed-by: Jean-Philippe Brucker 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 37 
 include/hw/arm/smmu-common.h |  1 +
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index f224e9c1e0..c894c4c621 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -426,7 +426,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = PTE_AP_TO_PERM(ap);
+tlbe->parent_perm = PTE_AP_TO_PERM(ap);
+tlbe->entry.perm = tlbe->parent_perm;
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -547,7 +548,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = ipa & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = s2ap;
+tlbe->parent_perm = s2ap;
+tlbe->entry.perm = tlbe->parent_perm;
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -562,6 +564,30 @@ error:
 return -EINVAL;
 }
 
+/*
+ * combine S1 and S2 TLB entries into a single entry.
+ * As a result the S1 entry is overriden with combined data.
+ */
+static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
+SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova,
+SMMUTransCfg *cfg)
+{
+if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
+tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
+tlbe->granule = tlbe_s2->granule;
+tlbe->level = tlbe_s2->level;
+}
+
+tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
+tlbe->entry.translated_addr);
+
+tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
+/* parent_perm has s2 perm while perm keeps s1 perm. */
+tlbe->parent_perm = tlbe_s2->entry.perm;
+return;
+}
+
 /**
  * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
  *
@@ -629,9 +655,12 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg 
*cfg, dma_addr_t addr,
 
 cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr);
 if (cached_entry) {
-if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
+cached_entry->parent_perm & IOMMU_WO)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-info->stage = cfg->stage;
+info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
+  SMMU_STAGE_1 :
+  SMMU_STAGE_2;
 return NULL;
 }
 return cached_entry;
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index eecbebaaac..d84de64122 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -77,6 +77,7 @@ typedef struct SMMUTLBEntry {
 IOMMUTLBEntry entry;
 uint8_t level;
 uint8_t granule;
+IOMMUAccessFlags parent_perm;
 } SMMUTLBEntry;
 
 /* Stage-2 configuration. */
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 01/18] hw/arm/smmu-common: Add missing size check for stage-1

2024-07-15 Thread Mostafa Saleh
According to the SMMU architecture specification (ARM IHI 0070 F.b),
in “3.4 Address sizes”
The address output from the translation causes a stage 1 Address Size
fault if it exceeds the range of the effective IPA size for the given CD.

However, this check was missing.

There is already a similar check for stage-2 against effective PA.

Reviewed-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 1ce706bf94..eb2356bc35 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -381,6 +381,16 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 
+/*
+ * The address output from the translation causes a stage 1 Address
+ * Size fault if it exceeds the range of the effective IPA size for
+ * the given CD.
+ */
+if (gpa >= (1ULL << cfg->oas)) {
+info->type = SMMU_PTW_ERR_ADDR_SIZE;
+goto error;
+}
+
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 16/18] hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo

2024-07-15 Thread Mostafa Saleh
Previously, to check if faults are enabled, it was sufficient to check
the current stage of translation and check the corresponding
record_faults flag.

However, with nesting, it is possible for stage-1 (nested) translation
to trigger a stage-2 fault, so we check SMMUPTWEventInfo as it would
have the correct stage set from the page table walk.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 84cd314b33..d052a2ba24 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,9 +34,10 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
- (cfg)->record_faults : \
- (cfg)->s2cfg.record_faults)
+#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \
+(cfg)->record_faults) || \
+((ptw_info).stage == SMMU_STAGE_2 && \
+(cfg)->s2cfg.record_faults))
 
 /**
  * smmuv3_trigger_irq - pulse @irq if enabled and update
@@ -933,7 +934,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 event->u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_TRANSLATION;
 event->u.f_translation.addr2 = ptw_info.addr;
 event->u.f_translation.class = class;
@@ -941,7 +942,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ADDR_SIZE:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ADDR_SIZE;
 event->u.f_addr_size.addr2 = ptw_info.addr;
 event->u.f_addr_size.class = class;
@@ -949,7 +950,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ACCESS:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ACCESS;
 event->u.f_access.addr2 = ptw_info.addr;
 event->u.f_access.class = class;
@@ -957,7 +958,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_PERMISSION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_PERMISSION;
 event->u.f_permission.addr2 = ptw_info.addr;
 event->u.f_permission.class = class;
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 05/18] hw/arm/smmu: Split smmuv3_translate()

2024-07-15 Thread Mostafa Saleh
smmuv3_translate() does everything from STE/CD parsing to TLB lookup
and PTW.

Soon, when nesting is supported, stage-1 data (tt, CD) needs to be
translated using stage-2.

Split smmuv3_translate() to 3 functions:

- smmu_translate(): in smmu-common.c, which does the TLB lookup, PTW,
  TLB insertion, all the functions are already there, this just puts
  them together.
  This also simplifies the code as it consolidates event generation
  in case of TLB lookup permission failure or in TT selection.

- smmuv3_do_translate(): in smmuv3.c, Calls smmu_translate() and does
  the event population in case of errors.

- smmuv3_translate(), now calls smmuv3_do_translate() for
  translation while the rest is the same.

Also, add stage in trace_smmuv3_translate_success()

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  59 +++
 hw/arm/smmuv3.c  | 194 +--
 hw/arm/trace-events  |   2 +-
 include/hw/arm/smmu-common.h |   8 ++
 4 files changed, 142 insertions(+), 121 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a5858f69f..d94db6b34f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -566,6 +566,65 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 g_assert_not_reached();
 }
 
+SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
+ IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
+{
+uint64_t page_mask, aligned_addr;
+SMMUTLBEntry *cached_entry = NULL;
+SMMUTransTableInfo *tt;
+int status;
+
+/*
+ * Combined attributes used for TLB lookup, as only one stage is supported,
+ * it will hold attributes based on the enabled stage.
+ */
+SMMUTransTableInfo tt_combined;
+
+if (cfg->stage == SMMU_STAGE_1) {
+/* Select stage1 translation table. */
+tt = select_tt(cfg, addr);
+if (!tt) {
+info->type = SMMU_PTW_ERR_TRANSLATION;
+info->stage = SMMU_STAGE_1;
+return NULL;
+}
+tt_combined.granule_sz = tt->granule_sz;
+tt_combined.tsz = tt->tsz;
+
+} else {
+/* Stage2. */
+tt_combined.granule_sz = cfg->s2cfg.granule_sz;
+tt_combined.tsz = cfg->s2cfg.tsz;
+}
+
+/*
+ * TLB lookup looks for granule and input size for a translation stage,
+ * as only one stage is supported right now, choose the right values
+ * from the configuration.
+ */
+page_mask = (1ULL << tt_combined.granule_sz) - 1;
+aligned_addr = addr & ~page_mask;
+
+cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
+if (cached_entry) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+info->type = SMMU_PTW_ERR_PERMISSION;
+info->stage = cfg->stage;
+return NULL;
+}
+return cached_entry;
+}
+
+cached_entry = g_new0(SMMUTLBEntry, 1);
+status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
+if (status) {
+g_free(cached_entry);
+return NULL;
+}
+smmu_iotlb_insert(bs, cfg, cached_entry);
+return cached_entry;
+}
+
 /**
  * The bus number is used for lookup when SID based invalidation occurs.
  * In that case we lazily populate the SMMUPciBus array from the bus hash
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 7e9874b4a6..85a3efd357 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -827,6 +827,76 @@ static void smmuv3_flush_config(SMMUDevice *sdev)
 g_hash_table_remove(bc->configs, sdev);
 }
 
+/* Do translation with TLB lookup. */
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry)
+{
+SMMUPTWEventInfo ptw_info = {};
+SMMUState *bs = ARM_SMMU(s);
+SMMUTLBEntry *cached_entry = NULL;
+
+cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info);
+if (!cached_entry) {
+/* All faults from PTW has S2 field. */
+event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
+switch (ptw_info.type) {
+case SMMU_PTW_ERR_WALK_EABT:
+event->type = SMMU_EVT_F_WALK_EABT;
+event->u.f_walk_eabt.addr = addr;
+event->u.f_walk_eabt.rnw = flag & 0x1;
+event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ?
+  SMMU_CLASS_IN : SMMU_CLASS_TT;
+event->u.f_walk_eabt.addr2 = ptw_info.addr;
+break;
+case SMMU_PTW_ERR_TRANSLATION:
+  

[PATCH v5 09/18] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-07-15 Thread Mostafa Saleh
In the next patch, combine_tlb() will be added which combines 2 TLB
entries into one for nested translations, which chooses the granule
and level from the smallest entry.

This means that with nested translation, an entry can be cached with
the granule of stage-2 and not stage-1.

However, currently, the lookup for an IOVA is done with input stage
granule, which is stage-1 for nested configuration, which will not
work with the above logic.
This patch reworks lookup in that case, so it falls back to stage-2
granule if no entry is found using stage-1 granule.

Also, drop aligning the iova to avoid over-aligning in case the iova
is cached with a smaller granule, the TLB lookup will align the iova
anyway for each granule and level, and the page table walker doesn't
consider the page offset bits.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 64 +---
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 21982621c0..f224e9c1e0 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t 
iova,
 return key;
 }
 
-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
-SMMUTransTableInfo *tt, hwaddr iova)
+static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
+  SMMUTransCfg *cfg,
+  SMMUTransTableInfo *tt,
+  hwaddr iova)
 {
 uint8_t tg = (tt->granule_sz - 10) / 2;
 uint8_t inputsize = 64 - tt->tsz;
@@ -88,6 +90,36 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg 
*cfg,
 }
 level++;
 }
+return entry;
+}
+
+/**
+ * smmu_iotlb_lookup - Look up for a TLB entry.
+ * @bs: SMMU state which includes the TLB instance
+ * @cfg: Configuration of the translation
+ * @tt: Translation table info (granule and tsz)
+ * @iova: IOVA address to lookup
+ *
+ * returns a valid entry on success, otherwise NULL.
+ * In case of nested translation, tt can be updated to include
+ * the granule of the found entry as it might different from
+ * the IOVA granule.
+ */
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
+SMMUTransTableInfo *tt, hwaddr iova)
+{
+SMMUTLBEntry *entry = NULL;
+
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+/*
+ * For nested translation also try the s2 granule, as the TLB will insert
+ * it if the size of s2 tlb entry was smaller.
+ */
+if (!entry && (cfg->stage == SMMU_NESTED) &&
+(cfg->s2cfg.granule_sz != tt->granule_sz)) {
+tt->granule_sz = cfg->s2cfg.granule_sz;
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+}
 
 if (entry) {
 cfg->iotlb_hits++;
@@ -569,18 +601,21 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
  IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
 {
-uint64_t page_mask, aligned_addr;
 SMMUTLBEntry *cached_entry = NULL;
 SMMUTransTableInfo *tt;
 int status;
 
 /*
- * Combined attributes used for TLB lookup, as only one stage is supported,
- * it will hold attributes based on the enabled stage.
+ * Combined attributes used for TLB lookup, holds the attributes for
+ * the input stage.
  */
 SMMUTransTableInfo tt_combined;
 
-if (cfg->stage == SMMU_STAGE_1) {
+if (cfg->stage == SMMU_STAGE_2) {
+/* Stage2. */
+tt_combined.granule_sz = cfg->s2cfg.granule_sz;
+tt_combined.tsz = cfg->s2cfg.tsz;
+} else {
 /* Select stage1 translation table. */
 tt = select_tt(cfg, addr);
 if (!tt) {
@@ -590,22 +625,9 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg 
*cfg, dma_addr_t addr,
 }
 tt_combined.granule_sz = tt->granule_sz;
 tt_combined.tsz = tt->tsz;
-
-} else {
-/* Stage2. */
-tt_combined.granule_sz = cfg->s2cfg.granule_sz;
-tt_combined.tsz = cfg->s2cfg.tsz;
 }
 
-/*
- * TLB lookup looks for granule and input size for a translation stage,
- * as only one stage is supported right now, choose the right values
- * from the configuration.
- */
-page_mask = (1ULL << tt_combined.granule_sz) - 1;
-aligned_addr = addr & ~page_mask;
-
-cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
+cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr);
 if (cached_entry) {
 if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
 info->type = SMMU_PTW_E

[PATCH v5 07/18] hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR

2024-07-15 Thread Mostafa Saleh
Soon, smmuv3_do_translate() will be used to translate the CD and the
TTBx, instead of re-writting the same logic to convert the returned
cached entry to an address, add a new macro CACHED_ENTRY_TO_ADDR.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c  | 3 +--
 include/hw/arm/smmu-common.h | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 11cd12e32f..3f2dfada44 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -951,8 +951,7 @@ epilogue:
 switch (status) {
 case SMMU_TRANS_SUCCESS:
 entry.perm = cached_entry->entry.perm;
-entry.translated_addr = cached_entry->entry.translated_addr +
-(addr & cached_entry->entry.addr_mask);
+entry.translated_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
 entry.addr_mask = cached_entry->entry.addr_mask;
 trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr,
entry.translated_addr, entry.perm,
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 96eb017e50..eecbebaaac 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -37,6 +37,9 @@
 #define VMSA_IDXMSK(isz, strd, lvl) ((1ULL << \
  VMSA_BIT_LVL(isz, strd, lvl)) - 1)
 
+#define CACHED_ENTRY_TO_ADDR(ent, addr)  ((ent)->entry.translated_addr + \
+ ((addr) & (ent)->entry.addr_mask))
+
 /*
  * Page table walk error types
  */
-- 
2.45.2.993.g49e7a77208-goog




[PATCH v5 06/18] hw/arm/smmu: Consolidate ASID and VMID types

2024-07-15 Thread Mostafa Saleh
ASID and VMID used to be uint16_t in the translation config, however,
in other contexts they can be int as -1 in case of TLB invalidation,
to represent all (don’t care).
When stage-2 was added asid was set to -1 in stage-2 and vmid to -1
in stage-1 configs. However, that meant they were set as (65536),
this was not an issue as nesting was not supported and no
commands/lookup uses both.

With nesting, it’s critical to get this right as translation must be
tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2.
Represent ASID/VMID everywhere as int.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 +-
 hw/arm/smmuv3.c  |  4 ++--
 hw/arm/trace-events  | 18 +-
 include/hw/arm/smmu-common.h | 14 +++---
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d94db6b34f..21982621c0 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -57,7 +57,7 @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, 
gconstpointer v2)
(k1->vmid == k2->vmid);
 }
 
-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
+SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
 uint8_t tg, uint8_t level)
 {
 SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova,
@@ -130,7 +130,7 @@ void smmu_iotlb_inv_all(SMMUState *s)
 static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t asid = *(uint16_t *)user_data;
+int asid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_ASID(*iotlb_key) == asid;
@@ -139,7 +139,7 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, 
gpointer value,
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t vmid = *(uint16_t *)user_data;
+int vmid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
@@ -191,13 +191,13 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int 
vmid, dma_addr_t iova,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
+void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
 }
 
-void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
+void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 {
 trace_smmu_iotlb_inv_vmid(vmid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 85a3efd357..11cd12e32f 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1244,7 +1244,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 }
 case SMMU_CMD_TLBI_NH_ASID:
 {
-uint16_t asid = CMD_ASID(&cmd);
+int asid = CMD_ASID(&cmd);
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
@@ -1277,7 +1277,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 break;
 case SMMU_CMD_TLBI_S12_VMALL:
 {
-uint16_t vmid = CMD_VMID(&cmd);
+int vmid = CMD_VMID(&cmd);
 
 if (!STAGE2_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index cc12924a84..09ccd39548 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -11,13 +11,13 @@ smmu_ptw_page_pte(int stage, int level,  uint64_t iova, 
uint64_t baseaddr, uint6
 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, 
uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d 
base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block 
address = 0x%"PRIx64" block size = %d MiB"
 smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) 
"baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
-smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
-smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
-smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
+smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
+smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
+smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
-smmu_iotlb_lookup_hit(uint16_t

Re: [PATCH v4 08/19] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-07-11 Thread Mostafa Saleh
Hi Jean,

On Tue, Jul 09, 2024 at 02:00:04PM +0100, Jean-Philippe Brucker wrote:
> Hi Mostafa,
> 
> On Tue, Jul 09, 2024 at 07:12:59AM +, Mostafa Saleh wrote:
> > > In this case I think we're reporting InputAddr as the CD address, but it
> > > should be the IOVA
> > 
> > As Eric mentioned this would require some rework to propagate the iova,
> > but what I am more worried about is the readability in that case, may be we
> > can just fixup the event after smmuv3_get_config() in case of errors,
> > something like:
> > 
> > /*
> >  * smmuv3_get_config() Only return translation faults in case of
> >  * nested translation, otherwise it can only return C_BAD_CD,
> >  * C_BAD_STE, C_BAD_STREAMID or F_STE_FETCH.
> >  * But in case of translation fault, we need to fixup the
> >  * InputAddr to be the IOVA of the translation as the decode
> >  * functions don't know about it.
> >  */
> > static void smmuv3_config_fixup_event(SMMUEventInfo *event, hwaddr iova)
> > {
> >switch (event->type) {
> >case SMMU_EVT_F_WALK_EABT:
> >case SMMU_EVT_F_TRANSLATION:
> >case SMMU_EVT_F_ADDR_SIZE:
> >case SMMU_EVT_F_ACCESS:
> >case SMMU_EVT_F_PERMISSION:
> >event->u.f_walk_eabt.addr = iova;
> >break;
> >default:
> >break;
> >}
> > }
> > 
> > What do you think?
> 
> Yes, I think that's also what I came up with. Maybe it would be simpler to
> unconditionally do the fixup at the end of smmuv3_translate() and remove
> .addr write from smmuv3_do_translate()?

I wanted to make it clear what case causes the IOVA to be missing, but I
guess if we unify the setup for the InputAddr it would be easier to
read and just add a comment instead.

> 
> A separate union field "f_common" rather than f_walk_eabt may be clearer.
> 

Makes sense, but I will not do it in this patch to avoid making it larger
and harder to review, and this can be a separate cleanup as I see in other
places we use eabt already (smmuv3_record_event and for s2 population).

Thanks,
Mostafa

> Thanks,
> Jean



Re: [PATCH v4 09/19] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-07-10 Thread Mostafa Saleh
Hi Jean,

On Tue, Jul 09, 2024 at 06:13:45PM +0100, Jean-Philippe Brucker wrote:
> On Tue, Jul 09, 2024 at 07:14:19AM +0000, Mostafa Saleh wrote:
> > Hi Jean,
> > 
> > On Thu, Jul 04, 2024 at 07:12:35PM +0100, Jean-Philippe Brucker wrote:
> > > On Mon, Jul 01, 2024 at 11:02:31AM +, Mostafa Saleh wrote:
> > > > In the next patch, combine_tlb() will be added which combines 2 TLB
> > > > entries into one for nested translations, which chooses the granule
> > > > and level from the smallest entry.
> > > > 
> > > > This means that with nested translation, an entry can be cached with
> > > > the granule of stage-2 and not stage-1.
> > > > 
> > > > However, currently, the lookup for an IOVA is done with input stage
> > > > granule, which is stage-1 for nested configuration, which will not
> > > > work with the above logic.
> > > > This patch reworks lookup in that case, so it falls back to stage-2
> > > > granule if no entry is found using stage-1 granule.
> > > 
> > > Why not initialize tt_combined to the minimum granule of stages 1 and 2?
> > > It looks like you introduced it for this. I'm wondering if we lookup the
> > > wrong IOVA if changing the granule size after the address is masked in
> > > smmu_translate()
> > 
> > I am not sure I fully understand, but I don’t think that would work as it is
> > not guaranteed that the minimum granule is the one that would be cached,
> > as we might hit block mappings.
> > 
> > The IOVA at first is masked with the first stage mask for the expected page
> > address, and the lookup logic would mask the address for each level look up,
> > so It should match the alignment of the cached page of that granule and 
> > level,
> > and as the combine logic is done with the aligned_addr it is guaranteed by
> > construction that it has to be aligned with stage-1.
> 
> I missed something, this is what I had in mind initially:
> 
> * s1 granule is 64k, s2 granule is 4k
> * the tlb already contains a translations for IOVA 0x3, tg=4k
> * now we lookup IOVA 0x31000. Masked with the s1 granule, aligned_addr is
>   0x3. Not found at first because lookup is with tg=64k, but then we
>   call smmu_iotlb_lookup_all_levels() again with the s2 granule and the
>   same IOVA, which returns the wrong translation

If the granules are s1=64k, s2=4k, the only way we get a cached entry as
(IOVA 0x3, tg=4k) would be for s2 and level-3 as for level-2 it has
to be aligned with 0x20

So when we look up for 0x31000, there is no entry for it anyway.

But I can see some problems here:
In case also s1 granule is 64k, s2 granule is 4k
- Translation A: 0x31000
- TLB is empty => PTW, entry s1 =  64k 0x3, s2 = 4k, 0x3 and
  the cached entry would be 0x3,tg=4k as the combine logic also
  uses the aligned address
- Translation B: 0x31000 => also misses as the only cached entry
  is 0x3, 4k

I think this is actually a bug and not just a TLB inefficiency, I need
to think more about it, but my initial thought is not to align the
iova until it’s used by a stage so it can use its granule.

> 
> But it's not actually possible, because if cfg->stage == SMMU_NESTED, then
> in smmu_translate() we end up with
> 
> } else {
> /* Stage2. */
> tt_combined.granule_sz = cfg->s2cfg.granule_sz;
> 
> So I think the condition
> 
>   (cfg->stage == SMMU_NESTED) && (cfg->s2cfg.granule_sz != tt->granule_sz)
> 
> in this patch is never true?
> 

Ah, that’s a bug, I will fix it, NESTED should use stage-1 granule.

> 
> Then the following scenario:
> 
> * s1 granule is 4k, s2 granule is 64k
> * we lookup IOVA A, miss. The translation gets cached with granule 4k
> * we lookup IOVA A again, but with tt->granule_sz = 64k so we'll
>   never find the entry?
> 
> 
> I guess we want to start the lookup with the smallest granule, and then if
> the s1 and s2 granules differ, retry with the other one. Or with
> SMMU_NESTED, start with the s1 granule and keep this patch to fallback to
> s2 granule, but without masking the IOVA in smmu_translate() (it will be
> masked correctly by smmu_iotlb_lookup_all_levels()).

Thanks for pointing that out, I will think more about it but I sense
that we would need to modify where we align the iova, for translation
and lookup.


Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> > 
> > Thanks,
> > Mostafa
> > 
> > > 
> > > Thanks,
> > > Jean
> > > 
> > > > 
> > > > Signed-off-by: Mostafa Saleh 
> > >

Re: [PATCH v4 18/19] hw/arm/smmuv3: Advertise S2FWB

2024-07-09 Thread Mostafa Saleh
Hi Eric,

On Mon, Jul 08, 2024 at 07:09:02PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 7/4/24 20:36, Jean-Philippe Brucker wrote:
> > On Mon, Jul 01, 2024 at 11:02:40AM +, Mostafa Saleh wrote:
> >> QEMU doesn's support memory attributes, so FWB is NOP, this
> >> might change in the future if memory attributre would be supported.
> attributes here and below as reported along with v3
> >>
> >> Signed-off-by: Mostafa Saleh 
> >> ---
> >>  hw/arm/smmuv3.c | 8 
> >>  1 file changed, 8 insertions(+)
> >>
> >> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> >> index 807f26f2da..88378e83dd 100644
> >> --- a/hw/arm/smmuv3.c
> >> +++ b/hw/arm/smmuv3.c
> >> @@ -287,6 +287,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
> >>  if (FIELD_EX32(s->idr[0], IDR0, S2P)) {
> >>  /* XNX is a stage-2-specific feature */
> >>  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1);
> >> +if (FIELD_EX32(s->idr[0], IDR0, S1P)) {
> > Why is this check needed?
> >
> >> +/*
> >> + * QEMU doesn's support memory attributes, so FWB is NOP, this
> > doesn't
> I have just seen your reply on my v3 comments. I still do not understand
> why we expose this bit at this stage.

As I replied to Jean, I will drop this patch for now, we can always add it 
later,
as it doens't add much value.

Thanks,
Mostafa

> 
> Thanks
> 
> Eric
> >
> > Thanks,
> > Jean
> >
> >> + * might change in the future if memory attributre would be
> >> + * supported.
> >> + */
> >> +   s->idr[3] = FIELD_DP32(s->idr[3], IDR3, FWB, 1);
> >> +}
> >>  }
> >>  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
> >>  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
> >> -- 
> >> 2.45.2.803.g4e1b14247a-goog
> >>
> 



Re: [PATCH v4 18/19] hw/arm/smmuv3: Advertise S2FWB

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:36:58PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:40AM +0000, Mostafa Saleh wrote:
> > QEMU doesn's support memory attributes, so FWB is NOP, this
> > might change in the future if memory attributre would be supported.
> > 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 807f26f2da..88378e83dd 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -287,6 +287,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
> >  if (FIELD_EX32(s->idr[0], IDR0, S2P)) {
> >  /* XNX is a stage-2-specific feature */
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1);
> > +if (FIELD_EX32(s->idr[0], IDR0, S1P)) {
> 
> Why is this check needed?
>

I thought that only made sense only for nested SMMUs, but I guess in
practice it’s not important for qemu and Linux doesn’t use it, I can
just drop this patch.

Thanks,
Mostafa

> 
> > +/*
> > + * QEMU doesn's support memory attributes, so FWB is NOP, this
> 
> doesn't
> 
> Thanks,
> Jean
> 
> > + * might change in the future if memory attributre would be
> > + * supported.
> > + */
> > +   s->idr[3] = FIELD_DP32(s->idr[3], IDR3, FWB, 1);
> > +}
> >  }
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
> > -- 
> > 2.45.2.803.g4e1b14247a-goog
> > 



Re: [PATCH v4 15/19] hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:35:03PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:37AM +0000, Mostafa Saleh wrote:
> > IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
> > SMMU instances we consider the input address as the IOVA, but when
> > nesting is used, we can't mix stage-1 and stage-2 addresses, so for
> > nesting only stage-1 is considered the IOVA and would be notified.
> > 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 28 +++-
> >  hw/arm/trace-events |  2 +-
> >  2 files changed, 20 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index e9007af3cd..36eb6f514a 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1064,17 +1064,17 @@ epilogue:
> >   * @iova: iova
> >   * @tg: translation granule (if communicated through range invalidation)
> >   * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1
> > + * @stage: Which stage(1 or 2) is used
> >   */
> >  static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
> > IOMMUNotifier *n,
> > int asid, int vmid,
> > dma_addr_t iova, uint8_t tg,
> > -   uint64_t num_pages)
> > +   uint64_t num_pages, int stage)
> >  {
> >  SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
> >  IOMMUTLBEvent event;
> >  uint8_t granule;
> > -SMMUv3State *s = sdev->smmu;
> >  
> >  if (!tg) {
> 
> Should the stage check be outside of the TG test?  I think CMD_TLBI_S2_IPA
> with TG != 0 and nesting enabled would still go through.

Ah yes, I will fix it.

Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> >  SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
> > @@ -1093,14 +1093,24 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion 
> > *mr,
> >  return;
> >  }
> >  
> > -if (STAGE1_SUPPORTED(s)) {
> > +/*
> > + * stage is passed from TLB invalidation commands which can be 
> > either
> > + * stage-1 or stage-2.
> > + * However, IOMMUTLBEvent only understands IOVA, for stage-1 or 
> > stage-2
> > + * SMMU instances we consider the input address as the IOVA, but 
> > when
> > + * nesting is used, we can't mix stage-1 and stage-2 addresses, so 
> > for
> > + * nesting only stage-1 is considered the IOVA and would be 
> > notified.
> > + */
> > +if (stage == SMMU_STAGE_1) {
> >  tt = select_tt(cfg, iova);
> >  if (!tt) {
> >  return;
> >  }
> >  granule = tt->granule_sz;
> > -} else {
> > +} else if ((stage == SMMU_STAGE_2) && (cfg->stage != SMMU_NESTED)) 
> > {
> >  granule = cfg->s2cfg.granule_sz;
> > +} else {
> > +return;
> >  }
> >  
> >  } else {
> > @@ -1119,7 +1129,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
> >  /* invalidate an asid/vmid/iova range tuple in all mr's */
> >  static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
> >dma_addr_t iova, uint8_t tg,
> > -  uint64_t num_pages)
> > +  uint64_t num_pages, int stage)
> >  {
> >  SMMUDevice *sdev;
> >  
> > @@ -1128,10 +1138,10 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, 
> > int asid, int vmid,
> >  IOMMUNotifier *n;
> >  
> >  trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
> > -iova, tg, num_pages);
> > +iova, tg, num_pages, stage);
> >  
> >  IOMMU_NOTIFIER_FOREACH(n, mr) {
> > -smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
> > +smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, 
> > stage);
> >  }
> >  }
> >  }
> > @@ -1162,7 +1172,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd 
> > *cmd, SMMUStage stage)
> >  
> >  if (!tg) {
> >  trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, 
> > stage);
> > -smmuv3_inv_notifiers_iova(s, as

Re: [PATCH v4 12/19] hw/arm/smmu: Support nesting in smmuv3_range_inval()

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:32:36PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:34AM +0000, Mostafa Saleh wrote:
> > With nesting, we would need to invalidate IPAs without
> > over-invalidating stage-1 IOVAs. This can be done by
> > distinguishing IPAs in the TLBs by having ASID=-1.
> > To achieve that, rework the invalidation for IPAs to have a
> > separate function, while for IOVA invalidation ASID=-1 means
> > invalidate for all ASIDs.
> > 
> > Reviewed-by: Eric Auger 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 47 
> >  hw/arm/smmuv3.c  | 23 --
> >  hw/arm/trace-events  |  2 +-
> >  include/hw/arm/smmu-common.h |  3 ++-
> >  4 files changed, 66 insertions(+), 9 deletions(-)
> > 
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 71afd486ba..5bf9eadeff 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -195,6 +195,25 @@ static gboolean 
> > smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
> > ((entry->iova & ~info->mask) == info->iova);
> >  }
> >  
> > +static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
> > + gpointer user_data)
> > +{
> > +SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
> > +IOMMUTLBEntry *entry = &iter->entry;
> > +SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
> > +SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
> > +
> > +if (info->asid >= 0) {
> 
> Should this test SMMU_IOTLB_ASID(iotlb_key) instead?
> 
Yes, nice catch!

> > +/* This is a stage-1 address. */
> > +return false;
> > +}
> > +if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
> > +return false;
> > +}
> > +return ((info->iova & ~entry->addr_mask) == entry->iova) ||
> > +   ((entry->iova & ~info->mask) == info->iova);
> > +}
> > +
> >  void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
> >   uint8_t tg, uint64_t num_pages, uint8_t ttl)
> >  {
> > @@ -223,6 +242,34 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int 
> > vmid, dma_addr_t iova,
> >  &info);
> >  }
> >  
> > +/*
> > + * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
> > + * in Stage-1 invalidation ASID = -1, means don't care.
> > + */
> > +void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
> > +uint64_t num_pages, uint8_t ttl)
> > +{
> > +uint8_t granule = tg ? tg * 2 + 10 : 12;
> > +int asid = -1;
> > +
> > +   if (ttl && (num_pages == 1)) {
> > +SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
> > +
> > +if (g_hash_table_remove(s->iotlb, &key)) {
> > +return;
> > +}
> > +}
> > +
> > +SMMUIOTLBPageInvInfo info = {
> > +.iova = ipa,
> > +.vmid = vmid,
> > +.mask = (num_pages * 1 << granule) - 1};
> 
> Since multiplication takes precedence over shift this looks strange.
> We could just remove "* 1" here and in smmu_iotlb_inv_iova() to avoid the
> confusion?

Agh, I just copied this and didn’t notice, that makes sense, I will fix it
here for smmu_iotlb_inv_ipa and maybe iova can be fixed separately?

Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> > +
> > +g_hash_table_foreach_remove(s->iotlb,
> > +smmu_hash_remove_by_vmid_ipa,
> > +&info);
> > +}
> > +
> >  void smmu_iotlb_inv_asid(SMMUState *s, int asid)
> >  {
> >  trace_smmu_iotlb_inv_asid(asid);
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 86f95c1e40..e5ecd93258 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1136,7 +1136,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, 
> > int asid, int vmid,
> >  }
> >  }
> >  
> > -static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
> > +static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
> >  {
> >  dma_addr_t end, addr = CMD_ADDR(cmd);
> >  uint8_t type = CMD_TYPE(cmd);
> > @@ -1161,9 +1161,13 @@ stati

Re: [PATCH v4 11/19] hw/arm/smmu-common: Support nested translation

2024-07-09 Thread Mostafa Saleh
Hi Eric,

On Mon, Jul 08, 2024 at 05:19:59PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 7/1/24 13:02, Mostafa Saleh wrote:
> > When nested translation is requested, do the following:
> >
> > - Translate stage-1 table address IPA into PA through stage-2.
> > - Translate stage-1 table walk output (IPA) through stage-2.
> > - Create a single TLB entry from stage-1 and stage-2 translations
> >   using logic introduced before.
> >
> > For stage-1 table translation, the spec (ARM IHI 0070 F.b) says in:
> > 7.3.12 F_WALK_EABT:
> > Translation of an IPA for Stage 1 descriptor fetch:
> > S2 == 1 (stage 2), CLASS == T
> > So, F_WALK_EABT is used which propagtes to CLASS == TT.
> >
> > smmu_ptw() has a new argument SMMUState which include the TLB as
> > stage-1 table address can be cached in there.
> >
> > Also in smmu_ptw() a separate path used for nesting to simplify the
> > code, although some logic can be combined.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 72 +++-
> >  include/hw/arm/smmu-common.h |  2 +-
> >  2 files changed, 64 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 24b7d09e2b..71afd486ba 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -318,6 +318,38 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >  return NULL;
> >  }
> >  
> > +/* Translate stage-1 table address using stage-2 page table. */
> > +static inline int translate_table_addr_ipa(dma_addr_t *table_addr,
> > +   SMMUTransCfg *cfg,
> > +   SMMUPTWEventInfo *info,
> > +   SMMUState *bs)
> Nit: in general the SMMUState if the 1st arg, as the most global state.
> > +{
> > +dma_addr_t addr = *table_addr;
> > +SMMUTLBEntry *cached_entry;
> > +int asid;
> > +
> > +/*
> > + * The translation table walks performed from TTB0 or TTB1 are always
> > + * performed in IPA space if stage 2 translations are enabled.
> > + */
> > +asid = cfg->asid;
> > +cfg->stage = SMMU_STAGE_2;
> > +cfg->asid = -1;
> > +cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
> > +cfg->asid = asid;
> > +cfg->stage = SMMU_NESTED;
> > +
> > +if (cached_entry) {
> > +*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> > +return 0;
> > +}
> > +
> > +info->stage = SMMU_STAGE_2;
> > +info->type = SMMU_PTW_ERR_WALK_EABT;
> > +info->addr = addr;
> so I guess also here the recorded address should be the IOVA (Jean's
> previous comment)?

This address maps to FetchAddr and not InputAddr, which is set from the
calling function, so that should be correct. (besides event type as Jean
mentioned it needs be fixed).

Thanks,
Mostafa

> 
> Eric
> > +return -EINVAL;
> > +}
> > +
> >  /**
> >   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
> >   * @cfg: translation config
> > @@ -333,7 +365,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >   */
> >  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >dma_addr_t iova, IOMMUAccessFlags perm,
> > -  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > +  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> > +  SMMUState *bs)
> >  {
> >  dma_addr_t baseaddr, indexmask;
> >  SMMUStage stage = cfg->stage;
> > @@ -381,6 +414,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >  goto error;
> >  }
> >  baseaddr = get_table_pte_address(pte, granule_sz);
> > +if (cfg->stage == SMMU_NESTED) {
> > +if (translate_table_addr_ipa(&baseaddr, cfg, info, bs)) {
> > +goto error;
> > +}
> > +}
> >  level++;
> >  continue;
> >  } else if (is_page_pte(pte, level)) {
> > @@ -568,10 +606,8 @@ error:
> >   * combine S1 and S2 TLB entries into a single entry.
> >   * As a result the S1 entry is overriden with combined data.
> >   */
> > -static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
> > 

Re: [PATCH v4 11/19] hw/arm/smmu-common: Support nested translation

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:31:10PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:33AM +0000, Mostafa Saleh wrote:
> > When nested translation is requested, do the following:
> > 
> > - Translate stage-1 table address IPA into PA through stage-2.
> > - Translate stage-1 table walk output (IPA) through stage-2.
> > - Create a single TLB entry from stage-1 and stage-2 translations
> >   using logic introduced before.
> > 
> > For stage-1 table translation, the spec (ARM IHI 0070 F.b) says in:
> > 7.3.12 F_WALK_EABT:
> > Translation of an IPA for Stage 1 descriptor fetch:
> > S2 == 1 (stage 2), CLASS == T
> > So, F_WALK_EABT is used which propagtes to CLASS == TT.
> 
> I don't think the text applies to this case, the context is:
> 
>   A stage 2 table walk can encounter EABT accessing the physical
>   address of a stage 2 descriptor, because of a:
>   [...]
>   * Translation of an IPA for Stage 1 descriptor fetch
> 
> So EABT is when failing to load the stage 2 descriptor. I can't find
> exact text for this case but looking at the flowchart 15.5, I think
> this should be F_TRANSLATION/F_ADDR_SIZE/F_PERMISSION/F_ACCESS with
> CLASS=TT and S2.

I see, thanks for clarifying, I guess that would be another argument that
we propagate more info in SMMUPTWEventInfo so we can set the fault CLASS.

Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> > 
> > smmu_ptw() has a new argument SMMUState which include the TLB as
> > stage-1 table address can be cached in there.
> > 
> > Also in smmu_ptw() a separate path used for nesting to simplify the
> > code, although some logic can be combined.
> > 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 72 +++-
> >  include/hw/arm/smmu-common.h |  2 +-
> >  2 files changed, 64 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 24b7d09e2b..71afd486ba 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -318,6 +318,38 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >  return NULL;
> >  }
> >  
> > +/* Translate stage-1 table address using stage-2 page table. */
> > +static inline int translate_table_addr_ipa(dma_addr_t *table_addr,
> > +   SMMUTransCfg *cfg,
> > +   SMMUPTWEventInfo *info,
> > +   SMMUState *bs)
> > +{
> > +dma_addr_t addr = *table_addr;
> > +SMMUTLBEntry *cached_entry;
> > +int asid;
> > +
> > +/*
> > + * The translation table walks performed from TTB0 or TTB1 are always
> > + * performed in IPA space if stage 2 translations are enabled.
> > + */
> > +asid = cfg->asid;
> > +cfg->stage = SMMU_STAGE_2;
> > +cfg->asid = -1;
> > +cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
> > +cfg->asid = asid;
> > +cfg->stage = SMMU_NESTED;
> > +
> > +if (cached_entry) {
> > +*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> > +return 0;
> > +}
> > +
> > +info->stage = SMMU_STAGE_2;
> > +info->type = SMMU_PTW_ERR_WALK_EABT;
> > +info->addr = addr;
> > +return -EINVAL;
> > +}
> > +
> >  /**
> >   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
> >   * @cfg: translation config
> > @@ -333,7 +365,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >   */
> >  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >dma_addr_t iova, IOMMUAccessFlags perm,
> > -  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > +  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> > +  SMMUState *bs)
> >  {
> >  dma_addr_t baseaddr, indexmask;
> >  SMMUStage stage = cfg->stage;
> > @@ -381,6 +414,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >  goto error;
> >  }
> >  baseaddr = get_table_pte_address(pte, granule_sz);
> > +if (cfg->stage == SMMU_NESTED) {
> > +if (translate_table_addr_ipa(&baseaddr, cfg, info, bs)) {
> > +goto error;
> > +}
> > +}
&g

Re: [PATCH v4 09/19] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:12:35PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:31AM +0000, Mostafa Saleh wrote:
> > In the next patch, combine_tlb() will be added which combines 2 TLB
> > entries into one for nested translations, which chooses the granule
> > and level from the smallest entry.
> > 
> > This means that with nested translation, an entry can be cached with
> > the granule of stage-2 and not stage-1.
> > 
> > However, currently, the lookup for an IOVA is done with input stage
> > granule, which is stage-1 for nested configuration, which will not
> > work with the above logic.
> > This patch reworks lookup in that case, so it falls back to stage-2
> > granule if no entry is found using stage-1 granule.
> 
> Why not initialize tt_combined to the minimum granule of stages 1 and 2?
> It looks like you introduced it for this. I'm wondering if we lookup the
> wrong IOVA if changing the granule size after the address is masked in
> smmu_translate()

I am not sure I fully understand, but I don’t think that would work as it is
not guaranteed that the minimum granule is the one that would be cached,
as we might hit block mappings.

The IOVA at first is masked with the first stage mask for the expected page
address, and the lookup logic would mask the address for each level look up,
so It should match the alignment of the cached page of that granule and level,
and as the combine logic is done with the aligned_addr it is guaranteed by
construction that it has to be aligned with stage-1.

Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> > 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 36 ++--
> >  1 file changed, 34 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 21982621c0..0840b5cffd 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, 
> > uint64_t iova,
> >  return key;
> >  }
> >  
> > -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > -SMMUTransTableInfo *tt, hwaddr iova)
> > +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> > +  SMMUTransCfg *cfg,
> > +  SMMUTransTableInfo *tt,
> > +  hwaddr iova)
> >  {
> >  uint8_t tg = (tt->granule_sz - 10) / 2;
> >  uint8_t inputsize = 64 - tt->tsz;
> > @@ -88,6 +90,36 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, 
> > SMMUTransCfg *cfg,
> >  }
> >  level++;
> >  }
> > +return entry;
> > +}
> > +
> > +/**
> > + * smmu_iotlb_lookup - Look up for a TLB entry.
> > + * @bs: SMMU state which includes the TLB instance
> > + * @cfg: Configuration of the translation
> > + * @tt: Translation table info (granule and tsz)
> > + * @iova: IOVA address to lookup
> > + *
> > + * returns a valid entry on success, otherwise NULL.
> > + * In case of nested translation, tt can be updated to include
> > + * the granule of the found entry as it might different from
> > + * the IOVA granule.
> > + */
> > +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > +SMMUTransTableInfo *tt, hwaddr iova)
> > +{
> > +SMMUTLBEntry *entry = NULL;
> > +
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +/*
> > + * For nested translation also try the s2 granule, as the TLB will 
> > insert
> > + * it if the size of s2 tlb entry was smaller.
> > + */
> > +if (!entry && (cfg->stage == SMMU_NESTED) &&
> > +(cfg->s2cfg.granule_sz != tt->granule_sz)) {
> > +tt->granule_sz = cfg->s2cfg.granule_sz;
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +}
> >  
> >  if (entry) {
> >  cfg->iotlb_hits++;
> > -- 
> > 2.45.2.803.g4e1b14247a-goog
> > 



Re: [PATCH v4 08/19] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:08:43PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:30AM +0000, Mostafa Saleh wrote:
> > According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
> > In "5.2 Stream Table Entry":
> >  [51:6] S1ContextPtr
> >  If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
> >  stage 2 and the programmed value must be within the range of the IAS.
> > 
> > In "5.4.1 CD notes":
> >  The translation table walks performed from TTB0 or TTB1 are always 
> > performed
> >  in IPA space if stage 2 translations are enabled.
> > 
> > This patch implements translation of the S1 context descriptor pointer and
> > TTBx base addresses through the S2 stage (IPA -> PA)
> > 
> > smmuv3_do_translate() is updated to have one arg which is translation
> > class, this is useful to:
> >  - Decide wether a translation is stage-2 only or use the STE config.
> >  - Populate the class in case of faults, WALK_EABT is left unchanged,
> >as it is always triggered from TT access so no need to use the
> >input class.
> > 
> > In case for stage-2 only translation, used in the context of nested
> > translation, the stage and asid are saved and restored before and
> > after calling smmu_translate().
> > 
> > Translating CD or TTBx can fail for the following reasons:
> > 1) Large address size: This is described in
> >(3.4.3 Address sizes of SMMU-originated accesses)
> >- For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
> >  C_BAD_STE or Translation fault, we implement the latter as it
> >  requires no extra code.
> >- For TTBx, if larger than the effective stage 1 output address size, it
> >  triggers C_BAD_CD.
> > 
> > 2) Faults from PTWs (7.3 Event records)
> >- F_ADDR_SIZE: large address size after first level causes stage 2 
> > Address
> >  Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
> >- F_PERMISSION: Same as an address translation. However, when
> >  CLASS == CD, the access is implicitly Data and a read.
> >- F_ACCESS: Same as an address translation.
> >- F_TRANSLATION: Same as an address translation.
> >- F_WALK_EABT: Same as an address translation.
> >   These are already implemented in the PTW logic, so no extra handling
> >   required.
> > 
> > Reviewed-by: Eric Auger 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 91 +++--
> >  1 file changed, 80 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 229b3c388c..86f95c1e40 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -337,14 +337,35 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t 
> > addr, STE *buf,
> >  
> >  }
> >  
> > +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr 
> > addr,
> > + SMMUTransCfg *cfg,
> > + SMMUEventInfo *event,
> > + IOMMUAccessFlags flag,
> > + SMMUTLBEntry **out_entry,
> > + SMMUTranslationClass 
> > class);
> >  /* @ssid > 0 not supported yet */
> > -static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
> > -   CD *buf, SMMUEventInfo *event)
> > +static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
> > +   uint32_t ssid, CD *buf, SMMUEventInfo *event)
> >  {
> >  dma_addr_t addr = STE_CTXPTR(ste);
> >  int ret, i;
> > +SMMUTranslationStatus status;
> > +SMMUTLBEntry *entry;
> >  
> >  trace_smmuv3_get_cd(addr);
> > +
> > +if (cfg->stage == SMMU_NESTED) {
> > +status = smmuv3_do_translate(s, addr, cfg, event,
> > + IOMMU_RO, &entry, SMMU_CLASS_CD);
> > +
> > +/* Same PTW faults are reported but with CLASS = CD. */
> > +if (status != SMMU_TRANS_SUCCESS) {
> 
> In this case I think we're reporting InputAddr as the CD address, but it
> should be the IOVA

As Eric mentioned this would require some rework to propagate the iova,
but what I am more worried about is the readability in that case, may be we
can just fixup the event after smmuv3_get_config() in case of errors

Re: [PATCH v4 03/19] hw/arm/smmuv3: Fix encoding of CLASS in events

2024-07-09 Thread Mostafa Saleh
Hi Jean,

On Thu, Jul 04, 2024 at 07:02:00PM +0100, Jean-Philippe Brucker wrote:
> On Mon, Jul 01, 2024 at 11:02:25AM +0000, Mostafa Saleh wrote:
> > The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
> > class of events faults as:
> > 
> > CLASS: The class of the operation that caused the fault:
> > - 0b00: CD, CD fetch.
> > - 0b01: TTD, Stage 1 translation table fetch.
> > - 0b10: IN, Input address
> > 
> > However, this value was not set and left as 0 which means CD and not
> > IN (0b10).
> > While at it, add an enum for class as it would be used for nesting.
> > However, at the moment stage-1 and stage-2 use the same class values.
> > 
> > Fixes: 9bde7f0674 “hw/arm/smmuv3: Implement translate callback”
> > Reviewed-by: Eric Auger 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3-internal.h | 6 ++
> >  hw/arm/smmuv3.c  | 6 +-
> >  2 files changed, 11 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> > index e4dd11e1e6..0f3ecec804 100644
> > --- a/hw/arm/smmuv3-internal.h
> > +++ b/hw/arm/smmuv3-internal.h
> > @@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus {
> >  SMMU_TRANS_SUCCESS,
> >  } SMMUTranslationStatus;
> >  
> > +typedef enum SMMUTranslationClass {
> > +SMMU_CLASS_CD,
> > +SMMU_CLASS_TT,
> > +SMMU_CLASS_IN,
> > +} SMMUTranslationClass;
> > +
> >  /* MMIO Registers */
> >  
> >  REG32(IDR0,0x0)
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 9dd3ea48e4..1eb5b160d2 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -942,7 +942,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_WALK_EABT;
> >  event.u.f_walk_eabt.addr = addr;
> >  event.u.f_walk_eabt.rnw = flag & 0x1;
> > -event.u.f_walk_eabt.class = 0x1;
> > +event.u.f_walk_eabt.class = SMMU_CLASS_TT;
> 
> For EABT I think we have to differentiate S1/S2:
> 
> * s1-only walk that encounters EABT on S1 descriptor access is reported as
>   class=TT, 
> * s2 walk that encounters EABT on S2 descriptor while translating
>   non-descriptor IPA is reported as class=IN, even when doing s2-only.
> 
> Maybe it can be done in the later patch where you propagate the different
> classes, because it's a minor detail.

Thanks, for pointing it out. It is a bit tricky, for s2 only we need
class = IN which is easy.
But for nested, if we get a fault with s2 == 1 and EABT, we need to know
if it’s from:
- Translation of a s1 table IPA (translate_table_addr_ipa) as class => TT
- Access for s2 table as class = IN

We might need a new member in SMMUPTWEventInfo to distinguish this as
bool is_ipa_descriptor

Thanks,
Mostafa

> 
> Thanks,
> Jean
> 
> >  event.u.f_walk_eabt.addr2 = ptw_info.addr;
> >  break;
> >  case SMMU_PTW_ERR_TRANSLATION:
> > @@ -950,6 +950,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_TRANSLATION;
> >  event.u.f_translation.addr = addr;
> >  event.u.f_translation.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_translation.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -958,6 +959,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_ADDR_SIZE;
> >  event.u.f_addr_size.addr = addr;
> >  event.u.f_addr_size.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_addr_size.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -966,6 +968,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_ACCESS;
> >  event.u.f_access.addr = addr;
> >  event.u.f_access.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_access.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -974,6 +977,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_PERMISSION;
> >  event.u.f_permission.addr = addr;
> >  event.u.f_permission.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_permission.rnw = flag & 0x1;
> >  }
> >  break;
> > -- 
> > 2.45.2.803.g4e1b14247a-goog
> > 



Re: [PATCH v4 00/19] SMMUv3 nested translation support

2024-07-08 Thread Mostafa Saleh
Hi Eric,

On Mon, Jul 08, 2024 at 07:30:34PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 7/1/24 13:02, Mostafa Saleh wrote:
> > Currently, QEMU supports emulating either stage-1 or stage-2 SMMUs
> > but not nested instances.
> > This patch series adds support for nested translation in SMMUv3,
> > this is controlled by property “arm-smmuv3.stage=nested”, and
> > advertised to guests as (IDR0.S1P == 1 && IDR0.S2P == 2)
> >
> > Main changes(architecture):
> > 
> > 1) CDs are considered IPA and translated with stage-2.
> > 2) TTBx and tables for stage-1 are considered IPA and translated
> >with stage-2.
> > 3) Translate the IPA address with stage-2.
> 
> If you respin quickly you may have a chance to get this in 9.1 (as a
> reminder the soft freeze is on 2024-07-23). Is it your target?

Thanks for the heads up, I will prioritize this and I will try to post
v5 by this week.

Thanks,
Mostafa

> 
> Thanks
> 
> Eric
> >
> > TLBs:
> > ==
> > TLBs are the most tricky part.
> >
> > 1) General design
> >Unified(Combined) design is used, where entries with ASID=-1 are
> >IPAs(cached from stage-2 config)
> >
> >TLBs are also modified to cache 2 permissions, a new permission added
> >"parent_perm."
> >
> >For non-nested configuration, perm == parent_perm and nothing
> >changes. This is used to know which stage to use in case there is
> >a permission fault from a TLB entry.
> >
> > 2) Caching in TLB
> >Stage-1 and stage-2 are inserted in the TLB as is.
> >For nested translation, both entries are combined into one TLB
> >entry. The size (level and granule) are chosen from the smallest entries.
> >That means that a stage-1 translation can be cached with sage-2
> >granule in key, this is taken into account for lookup.
> >
> > 3) TLB Lookup
> >TLB lookup already uses ASID in key, so it can distinguish between
> >stage-1 and stage-2.
> >And as mentioned above, the granule for stage-1 can be different,
> >If stage-1 lookup failed, we try again with the stage-2 granule.
> >
> > 4) TLB invalidation
> >- Address invalidation is split, for IOVA(CMD_TLBI_NH_VA
> >  /CMD_TLBI_NH_VAA) and IPA(CMD_TLBI_S2_IPA) based on ASID value
> >- CMD_TLBI_NH_ASID/CMD_TLBI_NH_ALL: Consider VMID if stage-2 is
> >  supported, and invalidate stage-1 only by VMIDs
> >
> > As far as I understand, this is compliant with the ARM architecture:
> > - ARM ARM DDI 0487J.a: RLGSCG, RTVTYQ, RGNJPZ
> > - ARM IHI 0070F.b: 16.2 Caching
> >
> > An alternative approach would be to instantiate 2 TLBs, one per each
> > stage. I haven’t investigated that.
> >
> > Others
> > ===
> > - Advertise SMMUv3.2-S2FWB, it is NOP for QEMU as it doesn’t support
> >   attributes.
> >
> > - OAS: A typical setup with nesting is to share CPU stage-2 with the
> >   SMMU, and according to the user manual, SMMU OAS must match the
> >   system physical address.
> >
> >   This was discussed before in
> >   https://lore.kernel.org/all/20230226220650.1480786-11-smost...@google.com/
> >   This series doesn’t implement that, but reworks OAS to make it easier
> >   to configure in the future.
> >
> > - For nested configuration, IOVA notifier only notifies for stage-1
> >   invalidations (as far as I understand this is the intended
> >   behaviour as it notifies for IOVA).
> >
> > - Stop ignoring VMID for stage-1 if stage-2 is also supported.
> >
> >
> > Future improvements:
> > =
> > 1) One small improvement, that I don’t think it’s worth the extra
> >complexity, is in case of Stage-1 TLB miss for nested translation,
> >we can do stage-1 walk and lookup for stage-2 TLBs, instead of
> >doing the full walk.
> >
> > Testing
> > 
> > 1) IOMMUFD + VFIO
> >Kernel: 
> > https://lore.kernel.org/all/cover.1683688960.git.nicol...@nvidia.com/
> >VMM: 
> > https://qemu-devel.nongnu.narkive.com/o815DqpI/rfc-v5-0-8-arm-smmuv3-emulation-support
> >
> >By assigning 
> > “virtio-net-pci,netdev=net0,disable-legacy=on,iommu_platform=on,ats=on”,
> >to a guest VM (on top of QEMU guest) with VIFO and IOMMUFD.
> >
> > 2) Work in progress prototype I am hacking on for nesting on KVM
> >(this is nowhere near complete, and misses many stuff but it
> >doesn't require VMs/VFIO) also with virtio-net-pci and git
&g

[PATCH v4 14/19] hw/arm/smmu: Support nesting in the rest of commands

2024-07-01 Thread Mostafa Saleh
Some commands need rework for nesting, as they used to assume S1
and S2 are mutually exclusive:

- CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
- CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
  invalidate everything, this required a new vmid invalidation
  function for stage-1 only (ASID >= 0)

Also, rework trace events to reflect the new implementation.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 16 
 hw/arm/smmuv3.c  | 28 ++--
 hw/arm/trace-events  |  6 --
 include/hw/arm/smmu-common.h |  1 +
 4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d0309a95b2..faba4adc49 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -178,6 +178,16 @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, 
gpointer value,
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
 }
 
+static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
+gpointer user_data)
+{
+int vmid = *(int *)user_data;
+SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
+
+return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
+   (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
+}
+
 static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer 
value,
   gpointer user_data)
 {
@@ -288,6 +298,12 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
 }
 
+inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
+{
+trace_smmu_iotlb_inv_vmid_s1(vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid);
+}
+
 /* VMSAv8-64 Translation */
 
 /**
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 928f125523..e9007af3cd 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1321,25 +1321,49 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 case SMMU_CMD_TLBI_NH_ASID:
 {
 int asid = CMD_ASID(&cmd);
+int vmid = -1;
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
 
+/*
+ * VMID is only matched when stage 2 is supported, otherwise set it
+ * to -1 as the value used for stage-1 only VMIDs.
+ */
+if (STAGE2_SUPPORTED(s)) {
+vmid = CMD_VMID(&cmd);
+}
+
 trace_smmuv3_cmdq_tlbi_nh_asid(asid);
 smmu_inv_notifiers_all(&s->smmu_state);
-smmu_iotlb_inv_asid_vmid(bs, asid, -1);
+smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
 break;
 }
 case SMMU_CMD_TLBI_NH_ALL:
+{
+int vmid = -1;
+
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
+
+/*
+ * If stage-2 is supported, invalidate for this VMID only, 
otherwise
+ * invalidate the whole thing.
+ */
+if (STAGE2_SUPPORTED(s)) {
+vmid = CMD_VMID(&cmd);
+trace_smmuv3_cmdq_tlbi_nh(vmid);
+smmu_iotlb_inv_vmid_s1(bs, vmid);
+break;
+}
 QEMU_FALLTHROUGH;
+}
 case SMMU_CMD_TLBI_NSNH_ALL:
-trace_smmuv3_cmdq_tlbi_nh();
+trace_smmuv3_cmdq_tlbi_nsnh();
 smmu_inv_notifiers_all(&s->smmu_state);
 smmu_iotlb_inv_all(bs);
 break;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 7d9c1703da..593cc571da 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -11,8 +11,9 @@ smmu_ptw_page_pte(int stage, int level,  uint64_t iova, 
uint64_t baseaddr, uint6
 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, 
uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d 
base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block 
address = 0x%"PRIx64" block size = %d MiB"
 smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) 
"baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
-smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
+smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
 smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
+smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d"
 smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
 smmu_iotlb

[PATCH v4 08/19] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-07-01 Thread Mostafa Saleh
According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
In "5.2 Stream Table Entry":
 [51:6] S1ContextPtr
 If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
 stage 2 and the programmed value must be within the range of the IAS.

In "5.4.1 CD notes":
 The translation table walks performed from TTB0 or TTB1 are always performed
 in IPA space if stage 2 translations are enabled.

This patch implements translation of the S1 context descriptor pointer and
TTBx base addresses through the S2 stage (IPA -> PA)

smmuv3_do_translate() is updated to have one arg which is translation
class, this is useful to:
 - Decide wether a translation is stage-2 only or use the STE config.
 - Populate the class in case of faults, WALK_EABT is left unchanged,
   as it is always triggered from TT access so no need to use the
   input class.

In case for stage-2 only translation, used in the context of nested
translation, the stage and asid are saved and restored before and
after calling smmu_translate().

Translating CD or TTBx can fail for the following reasons:
1) Large address size: This is described in
   (3.4.3 Address sizes of SMMU-originated accesses)
   - For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
 C_BAD_STE or Translation fault, we implement the latter as it
 requires no extra code.
   - For TTBx, if larger than the effective stage 1 output address size, it
 triggers C_BAD_CD.

2) Faults from PTWs (7.3 Event records)
   - F_ADDR_SIZE: large address size after first level causes stage 2 Address
 Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
   - F_PERMISSION: Same as an address translation. However, when
 CLASS == CD, the access is implicitly Data and a read.
   - F_ACCESS: Same as an address translation.
   - F_TRANSLATION: Same as an address translation.
   - F_WALK_EABT: Same as an address translation.
  These are already implemented in the PTW logic, so no extra handling
  required.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 91 +++--
 1 file changed, 80 insertions(+), 11 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 229b3c388c..86f95c1e40 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -337,14 +337,35 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, 
STE *buf,
 
 }
 
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry,
+ SMMUTranslationClass class);
 /* @ssid > 0 not supported yet */
-static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
-   CD *buf, SMMUEventInfo *event)
+static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
+   uint32_t ssid, CD *buf, SMMUEventInfo *event)
 {
 dma_addr_t addr = STE_CTXPTR(ste);
 int ret, i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 trace_smmuv3_get_cd(addr);
+
+if (cfg->stage == SMMU_NESTED) {
+status = smmuv3_do_translate(s, addr, cfg, event,
+ IOMMU_RO, &entry, SMMU_CLASS_CD);
+
+/* Same PTW faults are reported but with CLASS = CD. */
+if (status != SMMU_TRANS_SUCCESS) {
+return -EINVAL;
+}
+
+addr = CACHED_ENTRY_TO_ADDR(entry, addr);
+}
+
 /* TODO: guarantee 64-bit single-copy atomicity */
 ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf),
   MEMTXATTRS_UNSPECIFIED);
@@ -659,10 +680,13 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, 
STE *ste,
 return 0;
 }
 
-static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
+static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
+ CD *cd, SMMUEventInfo *event)
 {
 int ret = -EINVAL;
 int i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
 goto bad_cd;
@@ -713,9 +737,26 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 tt->tsz = tsz;
 tt->ttb = CD_TTB(cd, i);
+
 if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) {
 goto bad_cd;
 }
+
+/* Translate the TTBx, from IPA to PA if nesting is enabled. */
+if (cfg->stage == SMMU_NESTED) {
+status = smmuv3_do_translate(s, tt->ttb, cfg, event, IOMMU_RO,
+ &entry, SMMU_CLASS_TT);
+/*
+ * Same PTW faults are reported but with CLASS = TT.
+  

[PATCH v4 18/19] hw/arm/smmuv3: Advertise S2FWB

2024-07-01 Thread Mostafa Saleh
QEMU doesn's support memory attributes, so FWB is NOP, this
might change in the future if memory attributre would be supported.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 807f26f2da..88378e83dd 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -287,6 +287,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
 if (FIELD_EX32(s->idr[0], IDR0, S2P)) {
 /* XNX is a stage-2-specific feature */
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1);
+if (FIELD_EX32(s->idr[0], IDR0, S1P)) {
+/*
+ * QEMU doesn's support memory attributes, so FWB is NOP, this
+ * might change in the future if memory attributre would be
+ * supported.
+ */
+   s->idr[3] = FIELD_DP32(s->idr[3], IDR3, FWB, 1);
+}
 }
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 17/19] hw/arm/smmuv3: Support and advertise nesting

2024-07-01 Thread Mostafa Saleh
Everything is in place, consolidate parsing of STE cfg and setting
translation stage.

Advertise nesting if stage requested is "nested".

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 6c18dc0acf..807f26f2da 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -261,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
 /* Based on sys property, the stages supported in smmu will be 
advertised.*/
 if (s->stage && !strcmp("2", s->stage)) {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
+} else if (s->stage && !strcmp("nested", s->stage)) {
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
 } else {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
 }
@@ -425,8 +428,6 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = SMMU_STAGE_2;
-
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -509,6 +510,27 @@ bad_ste:
 return -EINVAL;
 }
 
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
+{
+
+if (STE_CFG_ABORT(config)) {
+cfg->aborted = true;
+return;
+}
+if (STE_CFG_BYPASS(config)) {
+cfg->bypassed = true;
+return;
+}
+
+if (STE_CFG_S1_ENABLED(config)) {
+cfg->stage = SMMU_STAGE_1;
+}
+
+if (STE_CFG_S2_ENABLED(config)) {
+cfg->stage |= SMMU_STAGE_2;
+}
+}
+
 /* Returns < 0 in case of invalid STE, 0 otherwise */
 static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
@@ -525,13 +547,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
 
 config = STE_CONFIG(ste);
 
-if (STE_CFG_ABORT(config)) {
-cfg->aborted = true;
-return 0;
-}
+decode_ste_config(cfg, config);
 
-if (STE_CFG_BYPASS(config)) {
-cfg->bypassed = true;
+if (cfg->aborted || cfg->bypassed) {
 return 0;
 }
 
@@ -704,7 +722,6 @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 16/19] hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo

2024-07-01 Thread Mostafa Saleh
Previously, to check if faults are enabled, it was sufficient to check
the current stage of translation and check the corresponding
record_faults flag.

However, with nesting, it is possible for stage-1 (nested) translation
to trigger a stage-2 fault, so we check SMMUPTWEventInfo as it would
have the correct stage set from the page table walk.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 36eb6f514a..6c18dc0acf 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,9 +34,10 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
- (cfg)->record_faults : \
- (cfg)->s2cfg.record_faults)
+#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \
+(cfg)->record_faults) || \
+((ptw_info).stage == SMMU_STAGE_2 && \
+(cfg)->s2cfg.record_faults))
 
 /**
  * smmuv3_trigger_irq - pulse @irq if enabled and update
@@ -919,7 +920,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 event->u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_TRANSLATION;
 event->u.f_translation.addr = addr;
 event->u.f_translation.addr2 = ptw_info.addr;
@@ -928,7 +929,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ADDR_SIZE:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ADDR_SIZE;
 event->u.f_addr_size.addr = addr;
 event->u.f_addr_size.addr2 = ptw_info.addr;
@@ -937,7 +938,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ACCESS:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ACCESS;
 event->u.f_access.addr = addr;
 event->u.f_access.addr2 = ptw_info.addr;
@@ -946,7 +947,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_PERMISSION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_PERMISSION;
 event->u.f_permission.addr = addr;
 event->u.f_permission.addr2 = ptw_info.addr;
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 10/19] hw/arm/smmu-common: Add support for nested TLB

2024-07-01 Thread Mostafa Saleh
This patch adds support for nested (combined) TLB entries.
The main function combine_tlb() is not used here but in the next
patches, but to simplify the patches it is introduced first.

Main changes:
1) New field added in the SMMUTLBEntry struct: parent_perm, for
   nested TLB, holds the stage-2 permission, this can be used to know
   the origin of a permission fault from a cached entry as caching
   the “and” of the permissions loses this information.

   SMMUPTWEventInfo is used to hold information about PTW faults so
   the event can be populated, the value of stage used to be set
   based on the current stage for TLB permission faults, however
   with the parent_perm, it is now set based on which perm has
   the missing permission

   When nesting is not enabled it has the same value as perm which
   doesn't change the logic.

2) As combined TLB implementation is used, the combination logic
   chooses:
   - tg and level from the entry which has the smallest addr_mask.
   - Based on that the iova that would be cached is recalculated.
   - Translated_addr is chosen from stage-2.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 37 
 include/hw/arm/smmu-common.h |  1 +
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 0840b5cffd..24b7d09e2b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -426,7 +426,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = PTE_AP_TO_PERM(ap);
+tlbe->parent_perm = PTE_AP_TO_PERM(ap);
+tlbe->entry.perm = tlbe->parent_perm;
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -547,7 +548,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = ipa & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = s2ap;
+tlbe->parent_perm = s2ap;
+tlbe->entry.perm = tlbe->parent_perm;
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -562,6 +564,30 @@ error:
 return -EINVAL;
 }
 
+/*
+ * combine S1 and S2 TLB entries into a single entry.
+ * As a result the S1 entry is overriden with combined data.
+ */
+static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
+SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova,
+SMMUTransCfg *cfg)
+{
+if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
+tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
+tlbe->granule = tlbe_s2->granule;
+tlbe->level = tlbe_s2->level;
+}
+
+tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
+tlbe->entry.translated_addr);
+
+tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
+/* parent_perm has s2 perm while perm keeps s1 perm. */
+tlbe->parent_perm = tlbe_s2->entry.perm;
+return;
+}
+
 /**
  * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
  *
@@ -639,9 +665,12 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg 
*cfg, dma_addr_t addr,
 
 cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
 if (cached_entry) {
-if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
+cached_entry->parent_perm & IOMMU_WO)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-info->stage = cfg->stage;
+info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
+  SMMU_STAGE_1 :
+  SMMU_STAGE_2;
 return NULL;
 }
 return cached_entry;
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 09d3b9e734..1db566d451 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -77,6 +77,7 @@ typedef struct SMMUTLBEntry {
 IOMMUTLBEntry entry;
 uint8_t level;
 uint8_t granule;
+IOMMUAccessFlags parent_perm;
 } SMMUTLBEntry;
 
 /* Stage-2 configuration. */
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 13/19] hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid

2024-07-01 Thread Mostafa Saleh
Soon, Instead of doing TLB invalidation by ASID only, VMID will be
also required.
Add smmu_iotlb_inv_asid_vmid() which invalidates by both ASID and VMID.

However, at the moment this function is only used in SMMU_CMD_TLBI_NH_ASID
which is a stage-1 command, so passing VMID = -1 keeps the original
behaviour.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 20 +---
 hw/arm/smmuv3.c  |  2 +-
 include/hw/arm/smmu-common.h |  2 +-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 5bf9eadeff..d0309a95b2 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -159,13 +159,14 @@ void smmu_iotlb_inv_all(SMMUState *s)
 g_hash_table_remove_all(s->iotlb);
 }
 
-static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
- gpointer user_data)
+static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
+  gpointer user_data)
 {
-int asid = *(int *)user_data;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
-return SMMU_IOTLB_ASID(*iotlb_key) == asid;
+return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
+   (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
 }
 
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
@@ -270,10 +271,15 @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, 
dma_addr_t ipa, uint8_t tg,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, int asid)
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
 {
-trace_smmu_iotlb_inv_asid(asid);
-g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
+SMMUIOTLBPageInvInfo info = {
+.asid = asid,
+.vmid = vmid,
+};
+
+trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, 
&info);
 }
 
 void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index e5ecd93258..928f125523 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1329,7 +1329,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 
 trace_smmuv3_cmdq_tlbi_nh_asid(asid);
 smmu_inv_notifiers_all(&s->smmu_state);
-smmu_iotlb_inv_asid(bs, asid);
+smmu_iotlb_inv_asid_vmid(bs, asid, -1);
 break;
 }
 case SMMU_CMD_TLBI_NH_ALL:
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index de032fdfd1..2bc9a03d47 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -212,7 +212,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, 
SMMUTLBEntry *entry);
 SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
 uint8_t tg, uint8_t level);
 void smmu_iotlb_inv_all(SMMUState *s);
-void smmu_iotlb_inv_asid(SMMUState *s, int asid);
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid);
 void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl);
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 15/19] hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()

2024-07-01 Thread Mostafa Saleh
IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
SMMU instances we consider the input address as the IOVA, but when
nesting is used, we can't mix stage-1 and stage-2 addresses, so for
nesting only stage-1 is considered the IOVA and would be notified.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 28 +++-
 hw/arm/trace-events |  2 +-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index e9007af3cd..36eb6f514a 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1064,17 +1064,17 @@ epilogue:
  * @iova: iova
  * @tg: translation granule (if communicated through range invalidation)
  * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1
+ * @stage: Which stage(1 or 2) is used
  */
 static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
IOMMUNotifier *n,
int asid, int vmid,
dma_addr_t iova, uint8_t tg,
-   uint64_t num_pages)
+   uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
 IOMMUTLBEvent event;
 uint8_t granule;
-SMMUv3State *s = sdev->smmu;
 
 if (!tg) {
 SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
@@ -1093,14 +1093,24 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 return;
 }
 
-if (STAGE1_SUPPORTED(s)) {
+/*
+ * stage is passed from TLB invalidation commands which can be either
+ * stage-1 or stage-2.
+ * However, IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
+ * SMMU instances we consider the input address as the IOVA, but when
+ * nesting is used, we can't mix stage-1 and stage-2 addresses, so for
+ * nesting only stage-1 is considered the IOVA and would be notified.
+ */
+if (stage == SMMU_STAGE_1) {
 tt = select_tt(cfg, iova);
 if (!tt) {
 return;
 }
 granule = tt->granule_sz;
-} else {
+} else if ((stage == SMMU_STAGE_2) && (cfg->stage != SMMU_NESTED)) {
 granule = cfg->s2cfg.granule_sz;
+} else {
+return;
 }
 
 } else {
@@ -1119,7 +1129,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 /* invalidate an asid/vmid/iova range tuple in all mr's */
 static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
   dma_addr_t iova, uint8_t tg,
-  uint64_t num_pages)
+  uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev;
 
@@ -1128,10 +1138,10 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 IOMMUNotifier *n;
 
 trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
-iova, tg, num_pages);
+iova, tg, num_pages, stage);
 
 IOMMU_NOTIFIER_FOREACH(n, mr) {
-smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
+smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage);
 }
 }
 }
@@ -1162,7 +1172,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 
 if (!tg) {
 trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
 } else {
@@ -1185,7 +1195,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 num_pages = (mask + 1) >> granule;
 trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
  ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
 } else {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 593cc571da..be6c8f720b 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -55,7 +55,7 @@ smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
 smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu 
mr=%s"
 smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu 
mr=%s"
-smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, 
uint8_t tg, uint64_t num_pages) "iommu mr=%s asid

[PATCH v4 05/19] hw/arm/smmu: Split smmuv3_translate()

2024-07-01 Thread Mostafa Saleh
smmuv3_translate() does everything from STE/CD parsing to TLB lookup
and PTW.

Soon, when nesting is supported, stage-1 data (tt, CD) needs to be
translated using stage-2.

Split smmuv3_translate() to 3 functions:

- smmu_translate(): in smmu-common.c, which does the TLB lookup, PTW,
  TLB insertion, all the functions are already there, this just puts
  them together.
  This also simplifies the code as it consolidates event generation
  in case of TLB lookup permission failure or in TT selection.

- smmuv3_do_translate(): in smmuv3.c, Calls smmu_translate() and does
  the event population in case of errors.

 - smmuv3_translate(), now calls smmuv3_do_translate() for
   translation while the rest is the same.

Also, add stage in trace_smmuv3_translate_success()

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  59 +++
 hw/arm/smmuv3.c  | 191 +--
 hw/arm/trace-events  |   2 +-
 include/hw/arm/smmu-common.h |   8 ++
 4 files changed, 141 insertions(+), 119 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a5858f69f..d94db6b34f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -566,6 +566,65 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 g_assert_not_reached();
 }
 
+SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
+ IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
+{
+uint64_t page_mask, aligned_addr;
+SMMUTLBEntry *cached_entry = NULL;
+SMMUTransTableInfo *tt;
+int status;
+
+/*
+ * Combined attributes used for TLB lookup, as only one stage is supported,
+ * it will hold attributes based on the enabled stage.
+ */
+SMMUTransTableInfo tt_combined;
+
+if (cfg->stage == SMMU_STAGE_1) {
+/* Select stage1 translation table. */
+tt = select_tt(cfg, addr);
+if (!tt) {
+info->type = SMMU_PTW_ERR_TRANSLATION;
+info->stage = SMMU_STAGE_1;
+return NULL;
+}
+tt_combined.granule_sz = tt->granule_sz;
+tt_combined.tsz = tt->tsz;
+
+} else {
+/* Stage2. */
+tt_combined.granule_sz = cfg->s2cfg.granule_sz;
+tt_combined.tsz = cfg->s2cfg.tsz;
+}
+
+/*
+ * TLB lookup looks for granule and input size for a translation stage,
+ * as only one stage is supported right now, choose the right values
+ * from the configuration.
+ */
+page_mask = (1ULL << tt_combined.granule_sz) - 1;
+aligned_addr = addr & ~page_mask;
+
+cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
+if (cached_entry) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+info->type = SMMU_PTW_ERR_PERMISSION;
+info->stage = cfg->stage;
+return NULL;
+}
+return cached_entry;
+}
+
+cached_entry = g_new0(SMMUTLBEntry, 1);
+status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
+if (status) {
+g_free(cached_entry);
+return NULL;
+}
+smmu_iotlb_insert(bs, cfg, cached_entry);
+return cached_entry;
+}
+
 /**
  * The bus number is used for lookup when SID based invalidation occurs.
  * In that case we lazily populate the SMMUPciBus array from the bus hash
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index dab3ad2db9..f98c157221 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -827,6 +827,75 @@ static void smmuv3_flush_config(SMMUDevice *sdev)
 g_hash_table_remove(bc->configs, sdev);
 }
 
+/* Do translation with TLB lookup. */
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry)
+{
+SMMUPTWEventInfo ptw_info = {};
+SMMUState *bs = ARM_SMMU(s);
+SMMUTLBEntry *cached_entry = NULL;
+
+cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info);
+if (!cached_entry) {
+/* All faults from PTW has S2 field. */
+event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
+switch (ptw_info.type) {
+case SMMU_PTW_ERR_WALK_EABT:
+event->type = SMMU_EVT_F_WALK_EABT;
+event->u.f_walk_eabt.addr = addr;
+event->u.f_walk_eabt.rnw = flag & 0x1;
+event->u.f_walk_eabt.class = SMMU_CLASS_TT;
+event->u.f_walk_eabt.addr2 = ptw_info.addr;
+break;
+case SMMU_PTW_ERR_TRANSLATION:
+if (PTW_RECORD_FAULT(cfg)) {
+event->type = SMMU_EVT_F_TRANSLATION;
+  

[PATCH v4 02/19] hw/arm/smmu: Fix IPA for stage-2 events

2024-07-01 Thread Mostafa Saleh
For the following events (ARM IHI 0070 F.b - 7.3 Event records):
- F_TRANSLATION
- F_ACCESS
- F_PERMISSION
- F_ADDR_SIZE

If fault occurs at stage 2, S2 == 1 and:
  - If translating an IPA for a transaction (whether by input to
stage 2-only configuration, or after successful stage 1 translation),
CLASS == IN, and IPA is provided.

At the moment only CLASS == IN is used which indicates input
translation.

However, this was not implemented correctly, as for stage 2, the code
only sets the  S2 bit but not the IPA.

This field has the same bits as FetchAddr in F_WALK_EABT which is
populated correctly, so we don’t change that.
The setting of this field should be done from the walker as the IPA address
wouldn't be known in case of nesting.

For stage 1, the spec says:
  If fault occurs at stage 1, S2 == 0 and:
  CLASS == IN, IPA is UNKNOWN.

So, no need to set it to for stage 1, as ptw_info is initialised by zero in
smmuv3_translate().

Fixes: e703f7076a “hw/arm/smmuv3: Add page table walk for stage-2”
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 hw/arm/smmuv3.c  |  4 
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index eb2356bc35..8a8c718e6b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -448,7 +448,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (ipa >= (1ULL << inputsize)) {
 info->type = SMMU_PTW_ERR_TRANSLATION;
-goto error;
+goto error_ipa;
 }
 
 while (level < VMSA_LEVELS) {
@@ -494,13 +494,13 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (!PTE_AF(pte) && !cfg->s2cfg.affd) {
 info->type = SMMU_PTW_ERR_ACCESS;
-goto error;
+goto error_ipa;
 }
 
 s2ap = PTE_AP(pte);
 if (is_permission_fault_s2(s2ap, perm)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-goto error;
+goto error_ipa;
 }
 
 /*
@@ -509,7 +509,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-goto error;
+goto error_ipa;
 }
 
 tlbe->entry.translated_addr = gpa;
@@ -522,6 +522,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 }
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
+error_ipa:
+info->addr = ipa;
 error:
 info->stage = 2;
 tlbe->entry.perm = IOMMU_NONE;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2d1e0d55ec..9dd3ea48e4 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -949,6 +949,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
+event.u.f_translation.addr2 = ptw_info.addr;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -956,6 +957,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
+event.u.f_addr_size.addr2 = ptw_info.addr;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -963,6 +965,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
+event.u.f_access.addr2 = ptw_info.addr;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -970,6 +973,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
+event.u.f_permission.addr2 = ptw_info.addr;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 11/19] hw/arm/smmu-common: Support nested translation

2024-07-01 Thread Mostafa Saleh
When nested translation is requested, do the following:

- Translate stage-1 table address IPA into PA through stage-2.
- Translate stage-1 table walk output (IPA) through stage-2.
- Create a single TLB entry from stage-1 and stage-2 translations
  using logic introduced before.

For stage-1 table translation, the spec (ARM IHI 0070 F.b) says in:
7.3.12 F_WALK_EABT:
Translation of an IPA for Stage 1 descriptor fetch:
S2 == 1 (stage 2), CLASS == T
So, F_WALK_EABT is used which propagtes to CLASS == TT.

smmu_ptw() has a new argument SMMUState which include the TLB as
stage-1 table address can be cached in there.

Also in smmu_ptw() a separate path used for nesting to simplify the
code, although some logic can be combined.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 72 +++-
 include/hw/arm/smmu-common.h |  2 +-
 2 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 24b7d09e2b..71afd486ba 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -318,6 +318,38 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
dma_addr_t iova)
 return NULL;
 }
 
+/* Translate stage-1 table address using stage-2 page table. */
+static inline int translate_table_addr_ipa(dma_addr_t *table_addr,
+   SMMUTransCfg *cfg,
+   SMMUPTWEventInfo *info,
+   SMMUState *bs)
+{
+dma_addr_t addr = *table_addr;
+SMMUTLBEntry *cached_entry;
+int asid;
+
+/*
+ * The translation table walks performed from TTB0 or TTB1 are always
+ * performed in IPA space if stage 2 translations are enabled.
+ */
+asid = cfg->asid;
+cfg->stage = SMMU_STAGE_2;
+cfg->asid = -1;
+cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
+cfg->asid = asid;
+cfg->stage = SMMU_NESTED;
+
+if (cached_entry) {
+*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
+return 0;
+}
+
+info->stage = SMMU_STAGE_2;
+info->type = SMMU_PTW_ERR_WALK_EABT;
+info->addr = addr;
+return -EINVAL;
+}
+
 /**
  * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
  * @cfg: translation config
@@ -333,7 +365,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t 
iova)
  */
 static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
   dma_addr_t iova, IOMMUAccessFlags perm,
-  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
+  SMMUState *bs)
 {
 dma_addr_t baseaddr, indexmask;
 SMMUStage stage = cfg->stage;
@@ -381,6 +414,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 baseaddr = get_table_pte_address(pte, granule_sz);
+if (cfg->stage == SMMU_NESTED) {
+if (translate_table_addr_ipa(&baseaddr, cfg, info, bs)) {
+goto error;
+}
+}
 level++;
 continue;
 } else if (is_page_pte(pte, level)) {
@@ -568,10 +606,8 @@ error:
  * combine S1 and S2 TLB entries into a single entry.
  * As a result the S1 entry is overriden with combined data.
  */
-static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
-SMMUTLBEntry *tlbe_s2,
-dma_addr_t iova,
-SMMUTransCfg *cfg)
+static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova, SMMUTransCfg *cfg)
 {
 if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
 tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
@@ -596,14 +632,19 @@ static void __attribute__((unused)) 
combine_tlb(SMMUTLBEntry *tlbe,
  * @perm: tentative access type
  * @tlbe: returned entry
  * @info: ptw event handle
+ * @bs: smmu state which includes TLB instance
  *
  * return 0 on success
  */
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
- SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
 {
+int ret;
+SMMUTLBEntry tlbe_s2;
+dma_addr_t ipa;
+
 if (cfg->stage == SMMU_STAGE_1) {
-return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
+return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
 } else if (cfg->stage == SMMU_STAGE_2) {
 /*
  * If bypassing stage 1(or unimplemented), the input address is passed
@@ -621,7 +662,20 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
 }
 
-g_assert_not_reach

[PATCH v4 09/19] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-07-01 Thread Mostafa Saleh
In the next patch, combine_tlb() will be added which combines 2 TLB
entries into one for nested translations, which chooses the granule
and level from the smallest entry.

This means that with nested translation, an entry can be cached with
the granule of stage-2 and not stage-1.

However, currently, the lookup for an IOVA is done with input stage
granule, which is stage-1 for nested configuration, which will not
work with the above logic.
This patch reworks lookup in that case, so it falls back to stage-2
granule if no entry is found using stage-1 granule.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 21982621c0..0840b5cffd 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t 
iova,
 return key;
 }
 
-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
-SMMUTransTableInfo *tt, hwaddr iova)
+static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
+  SMMUTransCfg *cfg,
+  SMMUTransTableInfo *tt,
+  hwaddr iova)
 {
 uint8_t tg = (tt->granule_sz - 10) / 2;
 uint8_t inputsize = 64 - tt->tsz;
@@ -88,6 +90,36 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg 
*cfg,
 }
 level++;
 }
+return entry;
+}
+
+/**
+ * smmu_iotlb_lookup - Look up for a TLB entry.
+ * @bs: SMMU state which includes the TLB instance
+ * @cfg: Configuration of the translation
+ * @tt: Translation table info (granule and tsz)
+ * @iova: IOVA address to lookup
+ *
+ * returns a valid entry on success, otherwise NULL.
+ * In case of nested translation, tt can be updated to include
+ * the granule of the found entry as it might different from
+ * the IOVA granule.
+ */
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
+SMMUTransTableInfo *tt, hwaddr iova)
+{
+SMMUTLBEntry *entry = NULL;
+
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+/*
+ * For nested translation also try the s2 granule, as the TLB will insert
+ * it if the size of s2 tlb entry was smaller.
+ */
+if (!entry && (cfg->stage == SMMU_NESTED) &&
+(cfg->s2cfg.granule_sz != tt->granule_sz)) {
+tt->granule_sz = cfg->s2cfg.granule_sz;
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+}
 
 if (entry) {
 cfg->iotlb_hits++;
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 12/19] hw/arm/smmu: Support nesting in smmuv3_range_inval()

2024-07-01 Thread Mostafa Saleh
With nesting, we would need to invalidate IPAs without
over-invalidating stage-1 IOVAs. This can be done by
distinguishing IPAs in the TLBs by having ASID=-1.
To achieve that, rework the invalidation for IPAs to have a
separate function, while for IOVA invalidation ASID=-1 means
invalidate for all ASIDs.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 47 
 hw/arm/smmuv3.c  | 23 --
 hw/arm/trace-events  |  2 +-
 include/hw/arm/smmu-common.h |  3 ++-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 71afd486ba..5bf9eadeff 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -195,6 +195,25 @@ static gboolean 
smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
((entry->iova & ~info->mask) == info->iova);
 }
 
+static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
+ gpointer user_data)
+{
+SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
+IOMMUTLBEntry *entry = &iter->entry;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
+SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
+
+if (info->asid >= 0) {
+/* This is a stage-1 address. */
+return false;
+}
+if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
+return false;
+}
+return ((info->iova & ~entry->addr_mask) == entry->iova) ||
+   ((entry->iova & ~info->mask) == info->iova);
+}
+
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
@@ -223,6 +242,34 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, 
dma_addr_t iova,
 &info);
 }
 
+/*
+ * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
+ * in Stage-1 invalidation ASID = -1, means don't care.
+ */
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
+uint64_t num_pages, uint8_t ttl)
+{
+uint8_t granule = tg ? tg * 2 + 10 : 12;
+int asid = -1;
+
+   if (ttl && (num_pages == 1)) {
+SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
+
+if (g_hash_table_remove(s->iotlb, &key)) {
+return;
+}
+}
+
+SMMUIOTLBPageInvInfo info = {
+.iova = ipa,
+.vmid = vmid,
+.mask = (num_pages * 1 << granule) - 1};
+
+g_hash_table_foreach_remove(s->iotlb,
+smmu_hash_remove_by_vmid_ipa,
+&info);
+}
+
 void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 86f95c1e40..e5ecd93258 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1136,7 +1136,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 }
 }
 
-static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
+static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
 {
 dma_addr_t end, addr = CMD_ADDR(cmd);
 uint8_t type = CMD_TYPE(cmd);
@@ -1161,9 +1161,13 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 }
 
 if (!tg) {
-trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl);
+}
 return;
 }
 
@@ -1179,9 +1183,14 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
 
 num_pages = (mask + 1) >> granule;
-trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
+ ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl);
+}
 addr += mask + 1;
 }
 }
@@ -1340,7 +1349,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
-smmuv3_range_inval(bs, &cmd);
+smmuv3_range_inval(bs, &cmd, SMMU_STAGE_

[PATCH v4 06/19] hw/arm/smmu: Consolidate ASID and VMID types

2024-07-01 Thread Mostafa Saleh
ASID and VMID used to be uint16_t in the translation config, however,
in other contexts they can be int as -1 in case of TLB invalidation,
to represent all (don’t care).
When stage-2 was added asid was set to -1 in stage-2 and vmid to -1
in stage-1 configs. However, that meant they were set as (65536),
this was not an issue as nesting was not supported and no
commands/lookup uses both.

With nesting, it’s critical to get this right as translation must be
tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2.
Represent ASID/VMID everywhere as int.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 +-
 hw/arm/smmuv3.c  |  4 ++--
 hw/arm/trace-events  | 18 +-
 include/hw/arm/smmu-common.h | 14 +++---
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d94db6b34f..21982621c0 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -57,7 +57,7 @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, 
gconstpointer v2)
(k1->vmid == k2->vmid);
 }
 
-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
+SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
 uint8_t tg, uint8_t level)
 {
 SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova,
@@ -130,7 +130,7 @@ void smmu_iotlb_inv_all(SMMUState *s)
 static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t asid = *(uint16_t *)user_data;
+int asid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_ASID(*iotlb_key) == asid;
@@ -139,7 +139,7 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, 
gpointer value,
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t vmid = *(uint16_t *)user_data;
+int vmid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
@@ -191,13 +191,13 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int 
vmid, dma_addr_t iova,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
+void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
 }
 
-void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
+void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 {
 trace_smmu_iotlb_inv_vmid(vmid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index f98c157221..cc61708160 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1243,7 +1243,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 }
 case SMMU_CMD_TLBI_NH_ASID:
 {
-uint16_t asid = CMD_ASID(&cmd);
+int asid = CMD_ASID(&cmd);
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
@@ -1276,7 +1276,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 break;
 case SMMU_CMD_TLBI_S12_VMALL:
 {
-uint16_t vmid = CMD_VMID(&cmd);
+int vmid = CMD_VMID(&cmd);
 
 if (!STAGE2_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index cc12924a84..09ccd39548 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -11,13 +11,13 @@ smmu_ptw_page_pte(int stage, int level,  uint64_t iova, 
uint64_t baseaddr, uint6
 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, 
uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d 
base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block 
address = 0x%"PRIx64" block size = %d MiB"
 smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) 
"baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
-smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
-smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
-smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
+smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
+smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
+smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
-smmu_iotlb_lookup_hit(uint16_t

[PATCH v4 07/19] hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR

2024-07-01 Thread Mostafa Saleh
Soon, smmuv3_do_translate() will be used to translate the CD and the
TTBx, instead of re-writting the same logic to convert the returned
cached entry to an address, add a new macro CACHED_ENTRY_TO_ADDR.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c  | 3 +--
 include/hw/arm/smmu-common.h | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index cc61708160..229b3c388c 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -950,8 +950,7 @@ epilogue:
 switch (status) {
 case SMMU_TRANS_SUCCESS:
 entry.perm = cached_entry->entry.perm;
-entry.translated_addr = cached_entry->entry.translated_addr +
-(addr & cached_entry->entry.addr_mask);
+entry.translated_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
 entry.addr_mask = cached_entry->entry.addr_mask;
 trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr,
entry.translated_addr, entry.perm,
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 96eb017e50..09d3b9e734 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -37,6 +37,9 @@
 #define VMSA_IDXMSK(isz, strd, lvl) ((1ULL << \
  VMSA_BIT_LVL(isz, strd, lvl)) - 1)
 
+#define CACHED_ENTRY_TO_ADDR(ent, addr)  (ent)->entry.translated_addr + \
+ ((addr) & (ent)->entry.addr_mask);
+
 /*
  * Page table walk error types
  */
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 19/19] hw/arm/smmu: Refactor SMMU OAS

2024-07-01 Thread Mostafa Saleh
SMMUv3 OAS is currently hardcoded in the code to 44 bits, for nested
configurations that can be a problem, as stage-2 might be shared with
the CPU which might have different PARANGE, and according to SMMU manual
ARM IHI 0070F.b:
6.3.6 SMMU_IDR5, OAS must match the system physical address size.

This patch doesn't change the SMMU OAS, but refactors the code to
make it easier to do that:
- Rely everywhere on IDR5 for reading OAS instead of using the
  SMMU_IDR5_OAS macro, so, it is easier just to change IDR5 and
  it propagages correctly.
- Add additional checks when OAS is greater than 48bits.
- Remove unused functions/macros: pa_range/MAX_PA.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  7 ---
 hw/arm/smmuv3-internal.h | 13 -
 hw/arm/smmuv3.c  | 35 ---
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index faba4adc49..2cff80e5dd 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -452,7 +452,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 inputsize = 64 - tt->tsz;
 level = 4 - (inputsize - 4) / stride;
 indexmask = VMSA_IDXMSK(inputsize, stride, level);
-baseaddr = extract64(tt->ttb, 0, 48);
+
+baseaddr = extract64(tt->ttb, 0, cfg->oas);
 baseaddr &= ~indexmask;
 
 while (level < VMSA_LEVELS) {
@@ -576,8 +577,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  * Get the ttb from concatenated structure.
  * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
  */
-uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
-  idx * sizeof(uint64_t);
+uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) +
+  (1 << stride) * idx * sizeof(uint64_t);
 dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level);
 
 baseaddr &= ~indexmask;
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 0f3ecec804..0ebf2eebcf 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -602,19 +602,6 @@ static inline int oas2bits(int oas_field)
 return -1;
 }
 
-static inline int pa_range(STE *ste)
-{
-int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS);
-
-if (!STE_S2AA64(ste)) {
-return 40;
-}
-
-return oas2bits(oas_field);
-}
-
-#define MAX_PA(ste) ((1 << pa_range(ste)) - 1)
-
 /* CD fields */
 
 #define CD_VALID(x)   extract32((x)->word[0], 31, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 88378e83dd..6954b385c7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -410,10 +410,10 @@ static bool s2t0sz_valid(SMMUTransCfg *cfg)
 }
 
 if (cfg->s2cfg.granule_sz == 16) {
-return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS));
+return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps);
 }
 
-return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16));
+return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16));
 }
 
 /*
@@ -434,8 +434,11 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 return nr_concat <= VMSA_MAX_S2_CONCAT;
 }
 
-static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
+static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
+ STE *ste)
 {
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
+
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -468,7 +471,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 }
 
 /* For AA64, The effective S2PS size is capped to the OAS. */
-cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
+cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
+/*
+ * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input
+ * range is further limited to 48 bits unless STE.S2TG indicates a
+ * 64KB granule.
+ */
+if (cfg->s2cfg.granule_sz != 16) {
+cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48);
+}
 /*
  * It is ILLEGAL for the address in S2TTB to be outside the range
  * described by the effective S2PS value.
@@ -544,6 +555,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
 {
 uint32_t config;
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
 int ret;
 
 if (!STE_VALID(ste)) {
@@ -587,8 +599,8 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
  * Stage-1 OAS defaults to OAS even if not enabled as it would be used
  * in input address check for stage-2.
  */
-cfg->oas = oas2bits(SMMU_IDR5_OAS);
-ret = decode_ste_s2_cfg(cfg, ste);

[PATCH v4 03/19] hw/arm/smmuv3: Fix encoding of CLASS in events

2024-07-01 Thread Mostafa Saleh
The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
class of events faults as:

CLASS: The class of the operation that caused the fault:
- 0b00: CD, CD fetch.
- 0b01: TTD, Stage 1 translation table fetch.
- 0b10: IN, Input address

However, this value was not set and left as 0 which means CD and not
IN (0b10).
While at it, add an enum for class as it would be used for nesting.
However, at the moment stage-1 and stage-2 use the same class values.

Fixes: 9bde7f0674 “hw/arm/smmuv3: Implement translate callback”
Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3-internal.h | 6 ++
 hw/arm/smmuv3.c  | 6 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index e4dd11e1e6..0f3ecec804 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus {
 SMMU_TRANS_SUCCESS,
 } SMMUTranslationStatus;
 
+typedef enum SMMUTranslationClass {
+SMMU_CLASS_CD,
+SMMU_CLASS_TT,
+SMMU_CLASS_IN,
+} SMMUTranslationClass;
+
 /* MMIO Registers */
 
 REG32(IDR0,0x0)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 9dd3ea48e4..1eb5b160d2 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -942,7 +942,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_WALK_EABT;
 event.u.f_walk_eabt.addr = addr;
 event.u.f_walk_eabt.rnw = flag & 0x1;
-event.u.f_walk_eabt.class = 0x1;
+event.u.f_walk_eabt.class = SMMU_CLASS_TT;
 event.u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
@@ -950,6 +950,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
 event.u.f_translation.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -958,6 +959,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
 event.u.f_addr_size.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -966,6 +968,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
 event.u.f_access.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -974,6 +977,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
 event.u.f_permission.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 04/19] hw/arm/smmu: Use enum for SMMU stage

2024-07-01 Thread Mostafa Saleh
Currently, translation stage is represented as an int, where 1 is stage-1 and
2 is stage-2, when nested is added, 3 would be confusing to represent nesting,
so we use an enum instead.

While keeping the same values, this is useful for:
 - Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is
   stage-2 and both is nested.
 - Tracing, as stage is printed as int.

Reviewed-by: Eric Auger 
Reviewed-by: Alex Bennée 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 14 +++---
 hw/arm/smmuv3.c  | 15 ---
 include/hw/arm/smmu-common.h | 11 +--
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a8c718e6b..8a5858f69f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -304,7 +304,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
 dma_addr_t baseaddr, indexmask;
-int stage = cfg->stage;
+SMMUStage stage = cfg->stage;
 SMMUTransTableInfo *tt = select_tt(cfg, iova);
 uint8_t level, granule_sz, inputsize, stride;
 
@@ -402,7 +402,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
 error:
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -425,7 +425,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
   dma_addr_t ipa, IOMMUAccessFlags perm,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-const int stage = 2;
+const SMMUStage stage = SMMU_STAGE_2;
 int granule_sz = cfg->s2cfg.granule_sz;
 /* ARM DDI0487I.a: Table D8-7. */
 int inputsize = 64 - cfg->s2cfg.tsz;
@@ -525,7 +525,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 error_ipa:
 info->addr = ipa;
 error:
-info->stage = 2;
+info->stage = SMMU_STAGE_2;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -544,9 +544,9 @@ error:
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
-} else if (cfg->stage == 2) {
+} else if (cfg->stage == SMMU_STAGE_2) {
 /*
  * If bypassing stage 1(or unimplemented), the input address is passed
  * directly to stage 2 as IPA. If the input address of a transaction
@@ -555,7 +555,7 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
  */
 if (iova >= (1ULL << cfg->oas)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 1eb5b160d2..dab3ad2db9 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,7 +34,8 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == 1) ? (cfg)->record_faults : \
+#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
+ (cfg)->record_faults : \
  (cfg)->s2cfg.record_faults)
 
 /**
@@ -402,7 +403,7 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = 2;
+cfg->stage = SMMU_STAGE_2;
 
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
@@ -678,7 +679,7 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = 1;
+cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
@@ -762,7 +763,7 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, 
SMMUTransCfg *cfg,
 return ret;
 }
 
-if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) {
+if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) {
 return 0;
 }
 
@@ -882,7 +883,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 goto epilogue;
 }
 
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 /* Select stage1 translation table. */
 tt = select_tt(cfg, addr);
 if (!tt) {
@@ -919,7 +920,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
  * nesting is not supported. So it is sufficient to check the
  * translation stage to know the TLB stage for now.
  */
-  

[PATCH v4 01/19] hw/arm/smmu-common: Add missing size check for stage-1

2024-07-01 Thread Mostafa Saleh
According to the SMMU architecture specification (ARM IHI 0070 F.b),
in “3.4 Address sizes”
The address output from the translation causes a stage 1 Address Size
fault if it exceeds the range of the effective IPA size for the given CD.

However, this check was missing.

There is already a similar check for stage-2 against effective PA.

Reviewed-by: Eric Auger 
Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 1ce706bf94..eb2356bc35 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -381,6 +381,16 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 
+/*
+ * The address output from the translation causes a stage 1 Address
+ * Size fault if it exceeds the range of the effective IPA size for
+ * the given CD.
+ */
+if (gpa >= (1ULL << cfg->oas)) {
+info->type = SMMU_PTW_ERR_ADDR_SIZE;
+goto error;
+}
+
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-- 
2.45.2.803.g4e1b14247a-goog




[PATCH v4 00/19] SMMUv3 nested translation support

2024-07-01 Thread Mostafa Saleh
0-1-smost...@google.com/
- Collected Eric Rbs.
- Rebased on master.
- Fix an existing bug in class encoding.
- Fix an existing bug in S2 events missing IPA.
- Fix nesting event population (missing class and wrong events)
- Remove CALL_FUNC_CFG_S2.
- Rework TLB combination logic to cache the largest possible entries.
- Refactor nested translation code to be more clear.
- Split patch 05 to 4 patches.
- Convert asid/vmid in trace events to int also.
- Remove some extra traces as it was not needed.
- Improve commit messages.

Changes in v2:
v1: 
https://lore.kernel.org/qemu-devel/20240325101442.1306300-1-smost...@google.com/
- Collected Eric Rbs
- Rework TLB to rely on VMID/ASID instead of an extra key.
- Fixed TLB issue with large stage-1 reported by Julian.
- Cap the OAS to 48 bits as PTW doesn’t support 52 bits.
- Fix ASID/VMID representation in some contexts as 16 bits while
  they can be -1
- Increase visibility in trace points


Mostafa Saleh (19):
  hw/arm/smmu-common: Add missing size check for stage-1
  hw/arm/smmu: Fix IPA for stage-2 events
  hw/arm/smmuv3: Fix encoding of CLASS in events
  hw/arm/smmu: Use enum for SMMU stage
  hw/arm/smmu: Split smmuv3_translate()
  hw/arm/smmu: Consolidate ASID and VMID types
  hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR
  hw/arm/smmuv3: Translate CD and TT using stage-2 table
  hw/arm/smmu-common: Rework TLB lookup for nesting
  hw/arm/smmu-common: Add support for nested TLB
  hw/arm/smmu-common: Support nested translation
  hw/arm/smmu: Support nesting in smmuv3_range_inval()
  hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid
  hw/arm/smmu: Support nesting in the rest of commands
  hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()
  hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo
  hw/arm/smmuv3: Support and advertise nesting
  hw/arm/smmuv3: Advertise S2FWB
  hw/arm/smmu: Refactor SMMU OAS

 hw/arm/smmu-common.c | 320 +++---
 hw/arm/smmuv3-internal.h |  19 +-
 hw/arm/smmuv3.c  | 432 ++-
 hw/arm/trace-events  |  26 ++-
 include/hw/arm/smmu-common.h |  43 +++-
 5 files changed, 617 insertions(+), 223 deletions(-)

-- 
2.45.2.803.g4e1b14247a-goog




Re: [RFC PATCH v3 17/18] hw/arm/smmuv3: Add property for OAS

2024-06-27 Thread Mostafa Saleh
Hi Eric,

On Tue, May 21, 2024 at 11:32:48AM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:24, Mostafa Saleh wrote:
> > Add property that sets the OAS of the SMMU, this in not used in this
> > patch.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3-internal.h |  3 ++-
> >  hw/arm/smmuv3.c  | 29 -
> >  include/hw/arm/smmuv3.h  |  1 +
> >  3 files changed, 31 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> > index 0ebf2eebcf..dd91807624 100644
> > --- a/hw/arm/smmuv3-internal.h
> > +++ b/hw/arm/smmuv3-internal.h
> > @@ -111,7 +111,8 @@ REG32(IDR5,0x14)
> >   FIELD(IDR5, VAX,10, 2);
> >   FIELD(IDR5, STALL_MAX,  16, 16);
> >  
> > -#define SMMU_IDR5_OAS 4
> > +#define SMMU_IDR5_OAS_DEF 4 /* 44 bits. */
> > +#define SMMU_IDR5_OAS_MAX 5 /* 48 bits. */
> >  
> >  REG32(IIDR,0x18)
> >  REG32(AIDR,0x1c)
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 4ac818cf7a..39d03e7e24 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -299,7 +299,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
> >  
> > -s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 
> > bits */
> > +/* PTW doesn't support 52 bits. */
> remove the point
> > +s->oas = MIN(s->oas, SMMU_IDR5_OAS_MAX);
> > +s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, s->oas);
> >  /* 4K, 16K and 64K granule support */
> >  s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1);
> >  s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1);
> > @@ -1901,11 +1903,34 @@ static const VMStateDescription vmstate_gbpa = {
> >  }
> >  };
> >  
> > +static const VMStateDescription vmstate_oas = {
> > +.name = "smmuv3/oas",
> > +.version_id = 1,
> > +.minimum_version_id = 1,
> don't you need a .needed function?
> 
> I tested backward migration and this fails
> qemu-system-aarch64: error while loading state for instance 0x0 of
> device 'smmuv3'
> qemu-system-aarch64: load of migration failed: No such file or directory
> post-processing ...

Yes, I think we need a .needed which returns if oas is not as default,
that should be backward compatiable (at least for this patch) and would
break when oas changes.

Thanks,
Mostafa
> 
> Thanks
> 
> Eric
> > +.fields = (const VMStateField[]) {
> > +VMSTATE_INT32(oas, SMMUv3State),
> > +VMSTATE_END_OF_LIST()
> > +}
> > +};
> > +
> > +static int smmuv3_preload(void *opaque)
> > +{
> > +SMMUv3State *s = opaque;
> > +
> > +/*
> > + * In case it wasn't migrated, use the value used
> > + * by older QEMU.
> > + */
> > +s->oas = SMMU_IDR5_OAS_DEF;
> > +return 0;
> > +}
> > +
> >  static const VMStateDescription vmstate_smmuv3 = {
> >  .name = "smmuv3",
> >  .version_id = 1,
> >  .minimum_version_id = 1,
> >  .priority = MIG_PRI_IOMMU,
> > +.pre_load = smmuv3_preload,
> >  .fields = (const VMStateField[]) {
> >  VMSTATE_UINT32(features, SMMUv3State),
> >  VMSTATE_UINT8(sid_size, SMMUv3State),
> > @@ -1933,6 +1958,7 @@ static const VMStateDescription vmstate_smmuv3 = {
> >  },
> >  .subsections = (const VMStateDescription * const []) {
> >  &vmstate_gbpa,
> > +&vmstate_oas,
> >  NULL
> >  }
> >  };
> > @@ -1945,6 +1971,7 @@ static Property smmuv3_properties[] = {
> >   * Defaults to stage 1
> >   */
> >  DEFINE_PROP_STRING("stage", SMMUv3State, stage),
> > +DEFINE_PROP_INT32("oas", SMMUv3State, oas, SMMU_IDR5_OAS_DEF),
> >  DEFINE_PROP_END_OF_LIST()
> >  };
> >  
> > diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> > index d183a62766..00a9eb4467 100644
> > --- a/include/hw/arm/smmuv3.h
> > +++ b/include/hw/arm/smmuv3.h
> > @@ -63,6 +63,7 @@ struct SMMUv3State {
> >  qemu_irq irq[4];
> >  QemuMutex mutex;
> >  char *stage;
> > +int32_t oas;
> >  };
> >  
> >  typedef enum {
> 



Re: [RFC PATCH v3 18/18] hw/arm/virt: Set SMMU OAS based on CPU PARANGE

2024-06-27 Thread Mostafa Saleh
Hi Julien,

On Fri, May 24, 2024 at 06:22:12PM +0100, Julien Grall wrote:
> Hi Mostafa,
> 
> On 29/04/2024 04:24, Mostafa Saleh wrote:
> > Use the new SMMU property to make the SMMU OAS match the CPU PARANGE.
> > That's according to SMMU manual ARM IHI 0070F.b: >  6.3.6 SMMU_IDR5, 
> > OAS must match the system physical address size.
> 
> > 
> > Signed-off-by: Mostafa Saleh 
> > ---
> >   hw/arm/virt.c  | 14 --
> >   target/arm/cpu.h   |  2 ++
> >   target/arm/cpu64.c |  5 +
> 
> When trying to build qemu-system-arm, I get the following error:
> 
> [1/3028] Generating subprojects/dtc/version_gen.h with a custom command
> [2/3028] Generating qemu-version.h with a custom command (wrapped by meson
> to capture output)
> [3/3021] Linking target qemu-system-aarch64
> [4/3021] Linking target qemu-system-arm
> FAILED: qemu-system-arm
> clang -m64 -mcx16 @qemu-system-arm.rsp
> libqemu-arm-softmmu.fa.p/hw_arm_virt.c.o: In function `get_system_oas':
> /home/jgrall/works/oss/qemu/build/../hw/arm/virt.c:259: undefined reference
> to `cpu_arm_get_oas'
> clang-11: error: linker command failed with exit code 1 (use -v to see
> invocation)
> ninja: build stopped: subcommand failed.
> make: *** [run-ninja] Error 1
> 
> I think you need to provide cpu_arm_get_oas() also for 32-bit arm (I guess
> it is implemented in target/arm/cpu.c).
> 
Ouch, thanks for testing that, I am currently reworking this and migh drop
this change as Eric suggested, but I will make sure it also builds for arm.

Thanks,
Mostafa
> Cheers,
> 
> -- 
> Julien Grall



Re: [RFC PATCH v3 15/18] hw/arm/smmuv3: Advertise S2FWB

2024-06-17 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 03:30:58PM +0200, Eric Auger wrote:
> 
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > QEMU doesn's support memory attributes, so FWB is NOP, this
> > might change in the future if memory attributre would be supported.
> if mem attributes get supported
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 8 
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 88f6473d33..8a11e41144 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -287,6 +287,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
> >  if (FIELD_EX32(s->idr[0], IDR0, S2P)) {
> >  /* XNX is a stage-2-specific feature */
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1);
> > +if (FIELD_EX32(s->idr[0], IDR0, S1P)) {
> > +/*
> > + * QEMU doesn's support memory attributes, so FWB is NOP, this
> > + * might change in the future if memory attributre would be
> if mem attributes get supported
> > + * supported.
> > + */
> > +   s->idr[3] = FIELD_DP32(s->idr[3], IDR3, FWB, 1);
> spec says:
> 0b0    Stage 2 control of memory types and attributes is
> not supported and the STE.S2FWB bit is RES 0.

My understanding it is still OK for the SMMU to advertise that although as
patch description indicates it is useless, but I thought it is similar to
XNX recently added, but I can drop it if it is not useful.

Thanks,
Mostafa
> 
> 
> Thanks
> 
> Eric
> > +}
> >  }
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
> >  s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
> 



Re: [RFC PATCH v3 14/18] hw/arm/smmuv3: Support and advertise nesting

2024-06-17 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 03:16:40PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > Everything is in place, add the last missing bits:
> > - Handle fault checking according to the actual PTW event and not the
> >   the translation stage.
> missing the "why". Can't it be moved in a separate patch?

Sure, I will split.

Thanks,
Mostafa

> > - Consolidate parsing of STE cfg and setting translation stage.
> >
> > Advertise nesting if stage requested is "nested".
> I would move the introduction of the nested option in a separate patch
> and in the associated commit msg properly document how the new option
> shall be used.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 50 +
> >  1 file changed, 34 insertions(+), 16 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 96d07234fe..88f6473d33 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -34,9 +34,10 @@
> >  #include "smmuv3-internal.h"
> >  #include "smmu-internal.h"
> >  
> > -#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
> > - (cfg)->record_faults : \
> > - (cfg)->s2cfg.record_faults)
> > +#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 
> > && \
> > +(cfg)->record_faults) || \
> > +((ptw_info).stage == SMMU_STAGE_2 
> > && \
> > +(cfg)->s2cfg.record_faults))
> >  
> >  /**
> >   * smmuv3_trigger_irq - pulse @irq if enabled and update
> > @@ -260,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
> >  /* Based on sys property, the stages supported in smmu will be 
> > advertised.*/
> >  if (s->stage && !strcmp("2", s->stage)) {
> >  s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
> > +} else if (s->stage && !strcmp("nested", s->stage)) {
> > +s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
> > +s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
> >  } else {
> >  s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
> >  }
> > @@ -422,8 +426,6 @@ static bool s2_pgtable_config_valid(uint8_t sl0, 
> > uint8_t t0sz, uint8_t gran)
> >  
> >  static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
> >  {
> > -cfg->stage = SMMU_STAGE_2;
> > -
> >  if (STE_S2AA64(ste) == 0x0) {
> >  qemu_log_mask(LOG_UNIMP,
> >"SMMUv3 AArch32 tables not supported\n");
> > @@ -506,6 +508,27 @@ bad_ste:
> >  return -EINVAL;
> >  }
> >  
> > +static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
> > +{
> > +
> > +if (STE_CFG_ABORT(config)) {
> > +cfg->aborted = true;
> > +return;
> > +}
> > +if (STE_CFG_BYPASS(config)) {
> > +cfg->bypassed = true;
> > +return;
> > +}
> > +
> > +if (STE_CFG_S1_ENABLED(config)) {
> > +cfg->stage = SMMU_STAGE_1;
> > +}
> > +
> > +if (STE_CFG_S2_ENABLED(config)) {
> > +cfg->stage |= SMMU_STAGE_2;
> > +}
> > +}
> > +
> >  /* Returns < 0 in case of invalid STE, 0 otherwise */
> >  static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
> >STE *ste, SMMUEventInfo *event)
> > @@ -522,13 +545,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg 
> > *cfg,
> >  
> >  config = STE_CONFIG(ste);
> >  
> > -if (STE_CFG_ABORT(config)) {
> > -cfg->aborted = true;
> > -return 0;
> > -}
> > +decode_ste_config(cfg, config);
> >  
> > -if (STE_CFG_BYPASS(config)) {
> > -cfg->bypassed = true;
> > +if (cfg->aborted || cfg->bypassed) {
> >  return 0;
> >  }
> >  
> > @@ -701,7 +720,6 @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
> >  
> >  /* we support only those at the moment */
> >  cfg->aa64 = true;
> > -cfg->stage = SMMU_STAGE_1;
> >  
> >  cfg->oas = oas2bits(CD_IPS(cd));
> >  cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), c

Re: [RFC PATCH v3 13/18] hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()

2024-06-17 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 12:37:55PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > IOMMUTLBEvent only understands IOVA, for stage-2 only SMMUs keep
> > the implementation, while only notify for stage-1 invalidation
> > in case of nesting.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c | 23 +++
> >  hw/arm/trace-events |  2 +-
> >  2 files changed, 16 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index e0fd494646..96d07234fe 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1051,7 +1051,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
> > IOMMUNotifier *n,
> > int asid, int vmid,
> > dma_addr_t iova, uint8_t tg,
> > -   uint64_t num_pages)
> > +   uint64_t num_pages, int stage)
> add the new param in the doc comment above
> >  {
> >  SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
> >  IOMMUTLBEvent event;
> > @@ -1075,14 +1075,21 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion 
> > *mr,
> >  return;
> >  }
> >  
> > -if (STAGE1_SUPPORTED(s)) {
> > +/*
> > + * IOMMUTLBEvent only understands IOVA, for stage-2 only SMMUs
> > + * keep the implementation, while only notify for stage-1
> > + * invalidation in case of nesting.
> > + */
> > +if (stage == SMMU_STAGE_1) {
> >  tt = select_tt(cfg, iova);
> >  if (!tt) {
> >  return;
> >  }
> >  granule = tt->granule_sz;
> > -} else {
> > +} else if (!STAGE1_SUPPORTED(s)) {
> I don't get why you don't test stage == SMMU_STAGE_2 instead
> in each block shouldn't you test if the corresponding state of supported?
> >  granule = cfg->s2cfg.granule_sz;
> > +} else {
> I don't really understand the logic here. Please can you comment each case?

The current implementation will call memory_region_notify_iommu_one()
from smmuv3_notify_iova() for stage-1 or stage-2 based on which one is supported
and in each case this address is considered an “IOVA”.

However, with nested translation memory_region_notify_iommu_one() doesn’t 
distinguish
between stage-1 and stage-2, so only stage-1 is considered “IOVA”.

And the implementation basically as follows:
1) If the translation was stage-1, it’s an IOVA and
   memory_region_notify_iommu_one() is called.

2) If stage-1 is not supported (this is an stage-2 only instance) maintain
   the old behaviour by calling memory_region_notify_iommu_one()

3) This leaves us with stage-1 being supported and this is a stage-2
   translation, where the notification would be ignored, I think in
   this case if the SW configured only for stage-2 it would expect
   it to behave as 2) :/

Not sure how to fix that, maybe only ignore stage-2 if it was in a nested STE,
or just or always ignore stage-2?

Thanks,
Mostafa

> 
> Thanks
> 
> Eric
> > +return;
> >  }
> >  
> >  } else {
> > @@ -1101,7 +1108,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
> >  /* invalidate an asid/vmid/iova range tuple in all mr's */
> >  static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
> >dma_addr_t iova, uint8_t tg,
> > -  uint64_t num_pages)
> > +  uint64_t num_pages, int stage)
> >  {
> >  SMMUDevice *sdev;
> >  
> > @@ -1110,10 +1117,10 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, 
> > int asid, int vmid,
> >  IOMMUNotifier *n;
> >  
> >  trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
> > -iova, tg, num_pages);
> > +iova, tg, num_pages, stage);
> >  
> >  IOMMU_NOTIFIER_FOREACH(n, mr) {
> > -smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
> > +smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, 
> > stage);
> >  }
> >  }
> >  }
> > @@ -1144,7 +1151,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd 
> > *cmd, SMMUStage stage)
> >  
> >  if (!tg) {
> >  trace_smmuv3_range_inval(vmid, a

Re: [RFC PATCH v3 12/18] hw/arm/smmu: Support nesting in the rest of commands

2024-06-17 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 12:24:22PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > Some commands need rework for nesting, as they used to assume S1
> > and S2 are mutually exclusive:
> >
> > - CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
> > - CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
> >   invalidate everything, this required a new vmid invalidation
> >   function for stage-1 only (ASID >= 0)
> >
> > Also, rework trace events to reflect the new implementation.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 36 +---
> >  hw/arm/smmuv3.c  | 31 +--
> >  hw/arm/trace-events  |  6 --
> >  include/hw/arm/smmu-common.h |  3 ++-
> >  4 files changed, 64 insertions(+), 12 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index fa2460cf64..3ed0be05ef 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -147,13 +147,14 @@ void smmu_iotlb_inv_all(SMMUState *s)
> >  g_hash_table_remove_all(s->iotlb);
> >  }
> >  
> > -static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
> > - gpointer user_data)
> > +static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
> > +  gpointer user_data)
> Can't you introduce
> 
> smmu_hash_remove_by_asid_vmid() separately and replace the 
> smmu_iotlb_inv_asid() call in SMMU_CMD_TLBI_NH_ASID.
> Then you could focus on "if stage2 is supported" enhancements in this patch.
> 

Sure, will do.

Thanks,
Mostafa
> >  {
> > -int asid = *(int *)user_data;
> > +SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
> >  SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> >  
> > -return SMMU_IOTLB_ASID(*iotlb_key) == asid;
> > +return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
> > +   (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
> >  }
> >  
> >  static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
> > @@ -165,6 +166,16 @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, 
> > gpointer value,
> >  return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
> >  }
> >  
> > +static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
> > +gpointer user_data)
> > +{
> > +int vmid = *(int *)user_data;
> > +SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> > +
> > +return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
> > +   (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
> > +}
> > +
> >  static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer 
> > value,
> >gpointer user_data)
> >  {
> > @@ -258,10 +269,15 @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, 
> > dma_addr_t ipa, uint8_t tg,
> >  &info);
> >  }
> >  
> > -void smmu_iotlb_inv_asid(SMMUState *s, int asid)
> > +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
> >  {
> > -trace_smmu_iotlb_inv_asid(asid);
> > -g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
> > +SMMUIOTLBPageInvInfo info = {
> > +.asid = asid,
> > +.vmid = vmid,
> > +};
> > +
> > +trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
> > +g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, 
> > &info);
> >  }
> >  
> >  void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
> > @@ -270,6 +286,12 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
> >  g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
> >  }
> >  
> > +inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
> > +{
> > +trace_smmu_iotlb_inv_vmid_s1(vmid);
> > +g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, 
> > &vmid);
> > +}
> > +
> >  /* VMSAv8-64 Translation */
> >  
> >  /**
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 82d918d9b5..e0fd494646 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1303,25 +1303,52 @@ static int smmuv3_c

Re: [RFC PATCH v3 10/18] hw/arm/smmu-common: Support nested translation

2024-06-17 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 11:48:18AM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > When nested translation is requested, do the following:
> >
> > - Translate stage-1 IPA using stage-2 to a physical address.
> stage-1 table address IPA into PA through S2 stage
> 
Will do.

> > - Translate stage-1 table walks using stage-2.
> output of S1 stage (IPA) through S2.
Will do.

> > - Combine both to create a single TLB entry using the logic
> >   introduced before.
> this applies to second only. First one is associated with an S2 TLB
> entry, right?
Yes, I will clarify that.

> >
> > For stage-1 table translation, the spec (ARM IHI 0070 F.b) says in:
> > 7.3.12 F_WALK_EABT:
> > Translation of an IPA for Stage 1 descriptor fetch:
> > S2 == 1 (stage 2), CLASS == T
> > So, F_WALK_EABT is used which propagtes to CLASS == TT.
> >
> > smmu_ptw() has a new argument SMMUState which include the TLB as
> > stage-1 table address can be cached in there.
> >
> > Also in smmu_ptw() a separate path used for nesting to simplify the
> > code, although some logic can be combined.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 67 ++--
> >  include/hw/arm/smmu-common.h |  2 +-
> >  2 files changed, 58 insertions(+), 11 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index c67af3bc6d..d48ec08947 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -306,6 +306,32 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >  return NULL;
> >  }
> >  
> > +/* Translate stage-1 table address using stage-2 page table. */
> > +static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg 
> > *cfg,
> > + SMMUPTWEventInfo *info, SMMUState *bs)
> would suggest translate_table_addr_ipa().
Wil do.
> > +{
> > +dma_addr_t addr = *table_addr;
> > +SMMUTLBEntry *cached_entry;
> > +int asid;
> > +
> > +asid = cfg->asid;
> > +cfg->stage = SMMU_STAGE_2;
> > +cfg->asid = -1;
> > +cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
> so this is going to be cached as an S2 entry. Maybe worth adding a comment.

This is part of the IPA space and it is translated using a stage-2 page table,
so it would be cached as an S2, I will add a comment quoting the spec.

Thanks,
Mostafa
> > +cfg->asid = asid;
> > +cfg->stage = SMMU_NESTED;
> > +
> > +if (cached_entry) {
> > +*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> > +return 0;
> > +}
> > +
> > +info->stage = SMMU_STAGE_2;
> > +info->type = SMMU_PTW_ERR_WALK_EABT;
> > +info->addr = addr;
> > +return -EINVAL;
> > +}
> > +
> >  /**
> >   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
> >   * @cfg: translation config
> > @@ -321,7 +347,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >   */
> >  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >dma_addr_t iova, IOMMUAccessFlags perm,
> > -  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> > +  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> > +  SMMUState *bs)
> >  {
> >  dma_addr_t baseaddr, indexmask;
> >  SMMUStage stage = cfg->stage;
> > @@ -369,6 +396,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >  goto error;
> >  }
> >  baseaddr = get_table_pte_address(pte, granule_sz);
> > +if (cfg->stage == SMMU_NESTED) {
> > +if (translate_table_s1(&baseaddr, cfg, info, bs)) {
> > +goto error;
> > +}
> > +}
> >  level++;
> >  continue;
> >  } else if (is_page_pte(pte, level)) {
> > @@ -551,10 +583,8 @@ error:
> >  }
> >  
> >  /* combine 2 TLB entries and return in tlbe in nested config. */
> > -static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
> > -SMMUTLBEntry *tlbe_s2,
> > -dma_addr_t iova,
> > -SMMUTransCfg *cfg)
> >

Re: [RFC PATCH v3 08/18] hw/arm/smmu-common: Add support for nested TLB

2024-06-17 Thread Mostafa Saleh
On Wed, May 22, 2024 at 1:44 PM Mostafa Saleh  wrote:
>
> Hi Eric,
>
> On Mon, May 20, 2024 at 10:20:43AM +0200, Eric Auger wrote:
> > Hi Mostafa,
> > On 5/16/24 17:20, Mostafa Saleh wrote:
> > > Hi Eric,
> > >
> > > On Wed, May 15, 2024 at 03:48:05PM +0200, Eric Auger wrote:
> > >> Hi Mostafa,
> > >>
> > >> On 4/29/24 05:23, Mostafa Saleh wrote:
> > >>> This patch adds support for nested(combined) TLB entries.
> > >> space between nested and (.
> > > Will do.
> > >>> The main function combine_tlb() is not used here but in the next
> > >>> patches, but to simplify the patches it is introduced first.
> > >>>
> > >>> Main changes:
> > >>> 1) New entry added in the TLB, parent_perm, for nested TLB, holds the
> > >> s/entry/field, s/TLB/SMMUTLBEntry struct
> > > Will do.
> > >>>stage-2 permission, this can be used to know the origin of a
> > >>>permission fault from a cached entry as caching the “and” of the
> > >>>permissions loses this information.
> > >>>
> > >>>SMMUPTWEventInfo is used to hold information about PTW faults so
> > >>>the event can be populated, the value of stage (which maps to S2
> > >>>in the event) used to be set based on the current stage for TLB
> > >> I don't understand "(which maps to S2 in the event)". What do you mean?
> > >> This could be S1 or S2 depending on the active stage, no?
> > > Not really, if the IPA size is larger than S2 input size, this is
> > > considered stage-1 fault.
> > >
> > > For TLB permission fault, yes, that is how it is decided.
> > > However, with nesting, a permission fault from a cached entry can be
> > > from a stage-1 or stage-2, that’s why we now cache both and not just
> > > the combined permission, and the logic to set fault stage is modified
> > > accordingly.
> > I meant in smmu_translate() we initially had for permission fault
> > info->stage = cfg->stage whcih can be S1 or S2. Hence the fact I do not
> > understand the sentence
> >
> > the value of stage (which maps to S2 in the event)
> >
> > I understand that with nested this computation needs to change because the 
> > permission can be linked to either the S1 or S2 stage.
> > Maybe that's just a matter or rephrasing?
> >
>
> I see, that’s already how it is used now, I will rephrase it in case
> it is confusing.
>

After reading the mail again I think I get the confusion here, the bit
that indicates if the event is stage-1 or stage-2 is called “S2” in
the spec :)
and that’s what the commit is referring to, but I will remove this
sentence as it doesn’t add much in this context.

> >
> > >>>permission faults, however with the parent_perm, it is now set
> > >>>based on which perm has the missing permission
> > >>>
> > >>>When nesting is not enabled it has the same value as perm which
> > >>>doesn't change the logic.
> > >>>
> > >>> 2) As combined TLB implementation is used, the combination logic
> > >>>chooses:
> > >>>- tg and level from the entry which has the smallest addr_mask.
> > >> tbh I am scared bout swapping s1/s2 tg and level. In smmu_iotlb_lookup()
> > >> I see tt->granule_sz being used which is s1 data. I mean it is not
> > >> obvious to me this is correct. Could you maybe give more explanations
> > >> detailing why/how this is guaranted to work.
> > > As you mentioned the next patch reworks the lookup logic, I can reorder
> > > the 2 patches if that is better, please let me know what you think?
> > Yes if you manage to reorder that may be more logical because otherwise
> > it looks incorrect.
>
> Will do.
> > >
> > >> Can you give additional details about what s1+s2 combinations were 
> > >> tested?
> > > I tested with S1 and S2 4K pages
> > > S1 level = 3 and S2 level = 3
> > > S1 level = 2 and S2 level = 3
> > > S1 level = 3 and S2 level = 2
> > > S1 level = 1 and S2 level = 2
> > >
> > > And also tested with with S1 64K granule and S2 4K.
> > OK, I would suggest you mention that in the coverletter because it is
> > reassuring and the combination is not totally obvious - at least to me ;-) -
>
> Will do.
>
> Thanks,
> Mostafa
> >
> > Eric
> > >
> >

Re: [RFC PATCH v3 06/18] hw/arm/smmu: Consolidate ASID and VMID types

2024-06-17 Thread Mostafa Saleh
Hi Eric,

Sorry no updates for a while, I have been away for some time,
but I am now back to working on this.

On Wed, May 15, 2024 at 02:41:42PM +0200, Eric Auger wrote:
> Hi Mostafa,
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > ASID and VMID used to be uint16_t in the translation config, however,
> > in other contexts they can be int as -1 in case of TLB invalidation,
> other contexts = TLB invalidation, right?

Yes, although I was thinking this can be used for global entries lookup also
in case we would support it in the future.

Thanks,
Mostafa
> > to represent all(don’t care).
> add space bewteen all and (.
> > When stage-2 was added asid was set to -1 in stage-2 and vmid to -1
> > in stage-1 configs. However, that meant they were set as (65536),
> > this was not an issue as nesting was not supported and no
> > commands/lookup targets both.
> s/targets/uses
> >
> > With nesting, it’s critical to get this right as translation must be
> > tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2.
> > Represent ASID/VMID everywhere as int.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 10 +-
> >  hw/arm/smmuv3.c  |  4 ++--
> >  hw/arm/trace-events  | 18 +-
> >  include/hw/arm/smmu-common.h | 14 +++---
> >  4 files changed, 23 insertions(+), 23 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index d94db6b34f..21982621c0 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -57,7 +57,7 @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, 
> > gconstpointer v2)
> > (k1->vmid == k2->vmid);
> >  }
> >  
> > -SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t 
> > iova,
> > +SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
> >  uint8_t tg, uint8_t level)
> >  {
> >  SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova,
> > @@ -130,7 +130,7 @@ void smmu_iotlb_inv_all(SMMUState *s)
> >  static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
> >   gpointer user_data)
> >  {
> > -uint16_t asid = *(uint16_t *)user_data;
> > +int asid = *(int *)user_data;
> >  SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> >  
> >  return SMMU_IOTLB_ASID(*iotlb_key) == asid;
> > @@ -139,7 +139,7 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, 
> > gpointer value,
> >  static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
> >   gpointer user_data)
> >  {
> > -uint16_t vmid = *(uint16_t *)user_data;
> > +int vmid = *(int *)user_data;
> >  SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> >  
> >  return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
> > @@ -191,13 +191,13 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int 
> > vmid, dma_addr_t iova,
> >  &info);
> >  }
> >  
> > -void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
> > +void smmu_iotlb_inv_asid(SMMUState *s, int asid)
> >  {
> >  trace_smmu_iotlb_inv_asid(asid);
> >  g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
> >  }
> >  
> > -void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
> > +void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
> >  {
> >  trace_smmu_iotlb_inv_vmid(vmid);
> >  g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index f98c157221..cc61708160 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1243,7 +1243,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> >  }
> >  case SMMU_CMD_TLBI_NH_ASID:
> >  {
> > -uint16_t asid = CMD_ASID(&cmd);
> > +int asid = CMD_ASID(&cmd);
> >  
> >  if (!STAGE1_SUPPORTED(s)) {
> >  cmd_error = SMMU_CERROR_ILL;
> > @@ -1276,7 +1276,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> >  break;
> >  case SMMU_CMD_TLBI_S12_VMALL:
> >  {
> > -uint16_t vmid = CMD_VMID(&cmd);
> > +int vmid = CMD_VMID(&cmd);
> >  
> >  if (!STAGE2_SUPPORTED(s)) {
> >  cmd_error = SMMU_CERROR_ILL;
> > diff --git a/hw

Re: [RFC PATCH v3 00/18] SMMUv3 nested translation support

2024-05-27 Thread Mostafa Saleh
Hi Eric,

On Tue, May 21, 2024 at 10:47 AM Eric Auger  wrote:
>
> Hi Mostafa,
>
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > Currently, QEMU supports emulating either stage-1 or stage-2 SMMUs
> > but not nested instances.
> > This patch series adds support for nested translation in SMMUv3,
> > this is controlled by property “arm-smmuv3.stage=nested”, and
> > advertised to guests as (IDR0.S1P == 1 && IDR0.S2P == 2)
> >
> > Main changes(architecture):
> > 
> > 1) CDs are considered IPA and translated with stage-2.
> > 2) TTBx and tables for stage-1 are considered IPA and translated
> >with stage-2.
> > 3) Translate the IPA address with stage-2.
> >
> > TLBs:
> > ==
> > TLBs are the most tricky part.
> >
> > 1) General design
> >Unified(Combined) design is used, where entries with ASID=-1 are
> >IPAs(cached from stage-2 config)
> >
> >TLBs are also modified to cache 2 permissions, a new permission added
> >"parent_perm."
> >
> >For non-nested configuration, perm == parent_perm and nothing
> >changes. This is used to know which stage to use in case there is
> >a permission fault from a TLB entry.
> >
> > 2) Caching in TLB
> >Stage-1 and stage-2 are inserted in the TLB as is.
> >For nested translation, both entries are combined into one TLB
> >entry. The size (level and granule) are chosen from the smallest entries.
> >That means that a stage-1 translation can be cached with sage-2
> >granule in key, this is take into account lookup.
> >
> > 3) TLB Lookup
> >TLB lookup already uses ASID in key, so it can distinguish between
> >stage-1 and stage-2.
> >And as mentioned above, the granule for stage-1 can be different,
> >If stage-1 lookup failed, we try again with the stage-2 granule.
> >
> > 4) TLB invalidation
> >- Address invalidation is split, for IOVA(CMD_TLBI_NH_VA
> >  /CMD_TLBI_NH_VAA) and IPA(CMD_TLBI_S2_IPA) based on ASID value
> >- CMD_TLBI_NH_ASID/CMD_TLBI_NH_ALL: Consider VMID if stage-2 is
> >  supported, and invalidate stage-1 only by VMIDs
> >
> > As far as I understand, this is compliant with the ARM architecture:
> > - ARM ARM DDI 0487J.a: RLGSCG, RTVTYQ, RGNJPZ
> > - ARM IHI 0070F.b: 16.2 Caching
> >
> > An alternative approach would be to instantiate 2 TLBs, one per each
> > stage. I haven’t investigated that.
> >
> > Others
> > ===
> > - Advertise SMMUv3.2-S2FWB, it is NOP for QEMU as it doesn’t support
> >   attributes.
> >
> > - OAS: A typical setup with nesting is to share CPU stage-2 with the
> >   SMMU, and according to the user manual, SMMU OAS must match the
> >   system physical address.
> >
> >   This was discussed before in
> >   https://lore.kernel.org/all/20230226220650.1480786-11-smost...@google.com/
> >   The implementation here, follows the discussion, where migration is
> >   added and oas is set up from the board (virt). However, the OAS is
> >   chosen based on the CPU PARANGE as there is no fixed one.
> >
> > - For nested configuration, IOVA notifier only notifies for stage-1
> >   invalidations (as far as I understand this is the intended
> >   behaviour as it notifies for IOVA)
> >
> > - Stop ignoring VMID for stage-1 if stage-2 is also supported.
>
> I completed the review pass on my end. I strongly encourage you to move
> the series into a non RFC series to attract more reviewers. Migration
> needs to be fixed and compat handling as well I think but overall the
> nested support looks mostly OK for me. Some stuff are a bit hacky (like
> config local patching to force S2 only) as I mentionned in my comments
> but let see if other reviewers find some more elegant ways to handle things
>

Thanks a lot, I really appreciate your thorough review.
I will send a v4 without RFC in ~2 weeks as I am off for a while.
I will probably drop the OAS changes (again) to avoid the migration
hassle and this can be a separate series after this one,

Thanks,
Mostafa

> Eric
> >
> >
> > Future improvements:
> > =
> > 1) One small improvement, that I don’t think it’s worth the extra
> >complexity, is in case of Stage-1 TLB miss for nested translation,
> >we can do stage-1 walk and lookup for stage-2 TLBs, instead of
> >doing the full walk.
> >
> > Testing
> > 
> > 1) IOMMUFD + VFIO
> >Kernel: 
> > https://lore.kernel.org/all/cover.1683688960.git.ni

Re: [RFC PATCH v3 09/18] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-05-22 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 10:27:50AM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > In the previous patch, comine_tlb() was added which combines 2 TLB
> > entries into one, which chooses the granule and level from the
> > smallest entry.
> >
> > This means that a nested translation, an entry can be cached with the
> > granule of stage-2 and not stage-1.
> >
> > However, the lookup for an IOVA in nested configuration is done with
> > stage-1 granule, this patch reworks lookup in that case, so it falls
> > back to stage-2 granule if no entry is found using stage-1 granule.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 24 ++--
> >  1 file changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 0d6945fa54..c67af3bc6d 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, 
> > uint64_t iova,
> >  return key;
> >  }
> >  
> > -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > -SMMUTransTableInfo *tt, hwaddr iova)
> > +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> > +  SMMUTransCfg *cfg,
> > +  SMMUTransTableInfo *tt,
> > +  hwaddr iova)
> >  {
> >  uint8_t tg = (tt->granule_sz - 10) / 2;
> >  uint8_t inputsize = 64 - tt->tsz;
> > @@ -88,6 +90,24 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, 
> > SMMUTransCfg *cfg,
> >  }
> >  level++;
> >  }
> > +return entry;
> > +}
> > +
> > +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > +SMMUTransTableInfo *tt, hwaddr iova)
> > +{
> > +SMMUTLBEntry *entry = NULL;
> > +
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +/*
> > + * For nested translation also try the s2 granule, as the TLB will 
> > insert
> > + * it if the size of s2 tlb entry was smaller.
> > + */
> > +if (!entry && (cfg->stage == SMMU_NESTED) &&
> > +(cfg->s2cfg.granule_sz != tt->granule_sz)) {
> > +tt->granule_sz = cfg->s2cfg.granule_sz;
> is it safe to alter the tt->granule_sz without restoring it? In the
> positive I think this would deserve a comment.

It should be safe in the current usage, I will add a comment to
clarify how the function behaves (something as the the granule_sz
would be updated to the entry tg if found)

Thanks,
Mostafa
> 
> Eric
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +}
> >  
> >  if (entry) {
> >  cfg->iotlb_hits++;
> 



Re: [RFC PATCH v3 08/18] hw/arm/smmu-common: Add support for nested TLB

2024-05-22 Thread Mostafa Saleh
Hi Eric,

On Mon, May 20, 2024 at 10:20:43AM +0200, Eric Auger wrote:
> Hi Mostafa,
> On 5/16/24 17:20, Mostafa Saleh wrote:
> > Hi Eric,
> >
> > On Wed, May 15, 2024 at 03:48:05PM +0200, Eric Auger wrote:
> >> Hi Mostafa,
> >>
> >> On 4/29/24 05:23, Mostafa Saleh wrote:
> >>> This patch adds support for nested(combined) TLB entries.
> >> space between nested and (.
> > Will do.
> >>> The main function combine_tlb() is not used here but in the next
> >>> patches, but to simplify the patches it is introduced first.
> >>>
> >>> Main changes:
> >>> 1) New entry added in the TLB, parent_perm, for nested TLB, holds the
> >> s/entry/field, s/TLB/SMMUTLBEntry struct
> > Will do.
> >>>stage-2 permission, this can be used to know the origin of a
> >>>permission fault from a cached entry as caching the “and” of the
> >>>permissions loses this information.
> >>>
> >>>SMMUPTWEventInfo is used to hold information about PTW faults so
> >>>the event can be populated, the value of stage (which maps to S2
> >>>in the event) used to be set based on the current stage for TLB
> >> I don't understand "(which maps to S2 in the event)". What do you mean?
> >> This could be S1 or S2 depending on the active stage, no?
> > Not really, if the IPA size is larger than S2 input size, this is
> > considered stage-1 fault.
> >
> > For TLB permission fault, yes, that is how it is decided.
> > However, with nesting, a permission fault from a cached entry can be
> > from a stage-1 or stage-2, that’s why we now cache both and not just
> > the combined permission, and the logic to set fault stage is modified
> > accordingly.
> I meant in smmu_translate() we initially had for permission fault
> info->stage = cfg->stage whcih can be S1 or S2. Hence the fact I do not
> understand the sentence
> 
> the value of stage (which maps to S2 in the event)
> 
> I understand that with nested this computation needs to change because the 
> permission can be linked to either the S1 or S2 stage.
> Maybe that's just a matter or rephrasing?
> 

I see, that’s already how it is used now, I will rephrase it in case
it is confusing.

> 
> >>>permission faults, however with the parent_perm, it is now set
> >>>based on which perm has the missing permission
> >>>
> >>>When nesting is not enabled it has the same value as perm which
> >>>doesn't change the logic.
> >>>
> >>> 2) As combined TLB implementation is used, the combination logic
> >>>chooses:
> >>>- tg and level from the entry which has the smallest addr_mask.
> >> tbh I am scared bout swapping s1/s2 tg and level. In smmu_iotlb_lookup()
> >> I see tt->granule_sz being used which is s1 data. I mean it is not
> >> obvious to me this is correct. Could you maybe give more explanations
> >> detailing why/how this is guaranted to work.
> > As you mentioned the next patch reworks the lookup logic, I can reorder
> > the 2 patches if that is better, please let me know what you think?
> Yes if you manage to reorder that may be more logical because otherwise
> it looks incorrect.

Will do.
> >
> >> Can you give additional details about what s1+s2 combinations were tested?
> > I tested with S1 and S2 4K pages
> > S1 level = 3 and S2 level = 3
> > S1 level = 2 and S2 level = 3
> > S1 level = 3 and S2 level = 2
> > S1 level = 1 and S2 level = 2
> >
> > And also tested with with S1 64K granule and S2 4K.
> OK, I would suggest you mention that in the coverletter because it is
> reassuring and the combination is not totally obvious - at least to me ;-) -

Will do.

Thanks,
Mostafa
> 
> Eric
> >
> >>>- Based on that the iova that would be cached is recalculated.
> >>>- Translated_addr is chosen from stage-2.
> >>>
> >>> Signed-off-by: Mostafa Saleh 
> >>> ---
> >>>  hw/arm/smmu-common.c | 32 
> >>>  include/hw/arm/smmu-common.h |  1 +
> >>>  2 files changed, 29 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> >>> index 21982621c0..0d6945fa54 100644
> >>> --- a/hw/arm/smmu-common.c
> >>> +++ b/hw/arm/smmu-common.c
> >>> @@ -394,7 +394,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >>>  tlbe->en

Re: [RFC PATCH v3 07/18] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-05-16 Thread Mostafa Saleh
Hi Eric,

On Wed, May 15, 2024 at 03:15:02PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
> > In "5.2 Stream Table Entry":
> >  [51:6] S1ContextPtr
> >  If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
> >  stage 2 and the programmed value must be within the range of the IAS.
> >
> > In "5.4.1 CD notes":
> >  The translation table walks performed from TTB0 or TTB1 are always 
> > performed
> >  in IPA space if stage 2 translations are enabled.
> >
> > This patch implements translation of the S1 context descriptor pointer and
> > TTBx base addresses through the S2 stage (IPA -> PA)
> >
> > smmuv3_do_translate() is updated to have one arg which is translation
> > class, this is useful for:
> s/for/to?
Will do.
> >  - Decide wether a translation is stage-2 only or use the STE config.
> >  - Populate the class in case of faults, WALK_EABT is lefat as it as
> left unchanged?
Yup, that's a typo.
> >it is always triggered from TT access so no need to use the input
> >class.
> >
> > In case for stage-2 only translation, which only used in nesting, the
> in case of S2 translation used in the contexted of a nested translation, ...
Will do.
> > stage and asid are saved and restored before and after calling
> > smmu_translate().
> >
> > Translating CD or TTBx can fail for the following reasons:
> > 1) Large address size: This is described in
> >(3.4.3 Address sizes of SMMU-originated accesses)
> >- For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
> >  C_BAD_STE or Translation fault, we implement the latter as it
> >  requires no extra code.
> >- For TTBx, if larger than the effective stage 1 output address size, it
> >  triggers C_BAD_CD.
> >
> > 2) Faults from PTWs (7.3 Event records)
> >- F_ADDR_SIZE: large address size after first level causes stage 2 
> > Address
> >  Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
> >- F_PERMISSION: Same as an address translation. However, when
> >  CLASS == CD, the access is implicitly Data and a read.
> >- F_ACCESS: Same as an address translation.
> >- F_TRANSLATION: Same as an address translation.
> >- F_WALK_EABT: Same as an address translation.
> >   These are already implemented in the PTW logic, so no extra handling
> >   required.
> >
> > As, there is multiple locations where the address is calculated from
> > cached entry, a new macro is introduced CACHED_ENTRY_TO_ADDR.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c  | 76 ++--
> >  include/hw/arm/smmu-common.h |  3 ++
> >  2 files changed, 66 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index cc61708160..cc61c82321 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -337,14 +337,33 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t 
> > addr, STE *buf,
> >  
> >  }
> >  
> > +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr 
> > addr,
> > + SMMUTransCfg *cfg,
> > + SMMUEventInfo *event,
> > + IOMMUAccessFlags flag,
> > + SMMUTLBEntry **out_entry,
> > + SMMUTranslationClass 
> > class);
> >  /* @ssid > 0 not supported yet */
> > -static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
> > -   CD *buf, SMMUEventInfo *event)
> > +static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
> > +   uint32_t ssid, CD *buf, SMMUEventInfo *event)
> >  {
> >  dma_addr_t addr = STE_CTXPTR(ste);
> >  int ret, i;
> > +SMMUTranslationStatus status;
> > +SMMUTLBEntry *entry;
> >  
> >  trace_smmuv3_get_cd(addr);
> > +
> > +if (cfg->stage == SMMU_NESTED) {
> > +status = smmuv3_do_translate(s, addr, cfg, event,
> > + IOMMU_RO, &entry, SMMU_CLASS_CD);
> > +if (status != SMMU_TRANS_SUCCESS) {
> So I guess you rely on event being populated by above CD S2 translate().
> it does not need to be patched, correct?
&

Re: [RFC PATCH v3 09/18] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-05-16 Thread Mostafa Saleh
Hi Eric,

On Wed, May 15, 2024 at 03:54:36PM +0200, Eric Auger wrote:
> 
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > In the previous patch, comine_tlb() was added which combines 2 TLB
> combine
Will do.

> > entries into one, which chooses the granule and level from the
> > smallest entry.
> >
> > This means that a nested translation, an entry can be cached with the
> that with nested translation
Will do.

> > granule of stage-2 and not stage-1.
> >
> > However, the lookup for an IOVA in nested configuration is done with
> > stage-1 granule, this patch reworks lookup in that case, so it falls
> > back to stage-2 granule if no entry is found using stage-1 granule.
> I should have read that before commenting previous patch ;-)
> Anyway this shows that something is missing in previous patch, at least
> the above explanation ;-)

Yup, I can add a comment in the previous patch or reorder them, let me
know what you prefer.

Thanks,
Mostafa

> 
> Eric
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 24 ++--
> >  1 file changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 0d6945fa54..c67af3bc6d 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, 
> > uint64_t iova,
> >  return key;
> >  }
> >  
> > -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > -SMMUTransTableInfo *tt, hwaddr iova)
> > +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> > +  SMMUTransCfg *cfg,
> > +  SMMUTransTableInfo *tt,
> > +  hwaddr iova)
> >  {
> >  uint8_t tg = (tt->granule_sz - 10) / 2;
> >  uint8_t inputsize = 64 - tt->tsz;
> > @@ -88,6 +90,24 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, 
> > SMMUTransCfg *cfg,
> >  }
> >  level++;
> >  }
> > +return entry;
> > +}
> > +
> > +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > +SMMUTransTableInfo *tt, hwaddr iova)
> > +{
> > +SMMUTLBEntry *entry = NULL;
> > +
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +/*
> > + * For nested translation also try the s2 granule, as the TLB will 
> > insert
> > + * it if the size of s2 tlb entry was smaller.
> > + */
> > +if (!entry && (cfg->stage == SMMU_NESTED) &&
> > +(cfg->s2cfg.granule_sz != tt->granule_sz)) {
> > +tt->granule_sz = cfg->s2cfg.granule_sz;
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +}
> >  
> >  if (entry) {
> >  cfg->iotlb_hits++;
> 



Re: [RFC PATCH v3 08/18] hw/arm/smmu-common: Add support for nested TLB

2024-05-16 Thread Mostafa Saleh
Hi Eric,

On Wed, May 15, 2024 at 03:48:05PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > This patch adds support for nested(combined) TLB entries.
> space between nested and (.
Will do.
> > The main function combine_tlb() is not used here but in the next
> > patches, but to simplify the patches it is introduced first.
> >
> > Main changes:
> > 1) New entry added in the TLB, parent_perm, for nested TLB, holds the
> s/entry/field, s/TLB/SMMUTLBEntry struct
Will do.
> >stage-2 permission, this can be used to know the origin of a
> >permission fault from a cached entry as caching the “and” of the
> >permissions loses this information.
> >
> >SMMUPTWEventInfo is used to hold information about PTW faults so
> >the event can be populated, the value of stage (which maps to S2
> >in the event) used to be set based on the current stage for TLB
> I don't understand "(which maps to S2 in the event)". What do you mean?
> This could be S1 or S2 depending on the active stage, no?

Not really, if the IPA size is larger than S2 input size, this is
considered stage-1 fault.

For TLB permission fault, yes, that is how it is decided.
However, with nesting, a permission fault from a cached entry can be
from a stage-1 or stage-2, that’s why we now cache both and not just
the combined permission, and the logic to set fault stage is modified
accordingly.

> >permission faults, however with the parent_perm, it is now set
> >based on which perm has the missing permission
> >
> >When nesting is not enabled it has the same value as perm which
> >doesn't change the logic.
> >
> > 2) As combined TLB implementation is used, the combination logic
> >chooses:
> >- tg and level from the entry which has the smallest addr_mask.
> tbh I am scared bout swapping s1/s2 tg and level. In smmu_iotlb_lookup()
> I see tt->granule_sz being used which is s1 data. I mean it is not
> obvious to me this is correct. Could you maybe give more explanations
> detailing why/how this is guaranted to work.

As you mentioned the next patch reworks the lookup logic, I can reorder
the 2 patches if that is better, please let me know what you think?

> 
> Can you give additional details about what s1+s2 combinations were tested?

I tested with S1 and S2 4K pages
S1 level = 3 and S2 level = 3
S1 level = 2 and S2 level = 3
S1 level = 3 and S2 level = 2
S1 level = 1 and S2 level = 2

And also tested with with S1 64K granule and S2 4K.

> >- Based on that the iova that would be cached is recalculated.
> >- Translated_addr is chosen from stage-2.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 32 
> >  include/hw/arm/smmu-common.h |  1 +
> >  2 files changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 21982621c0..0d6945fa54 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -394,7 +394,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
> >  tlbe->entry.translated_addr = gpa;
> >  tlbe->entry.iova = iova & ~mask;
> >  tlbe->entry.addr_mask = mask;
> > -tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> > +tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> nit: I would prefer on separate lines.
Will do.

> >  tlbe->level = level;
> >  tlbe->granule = granule_sz;
> >  return 0;
> > @@ -515,7 +515,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >  tlbe->entry.translated_addr = gpa;
> >  tlbe->entry.iova = ipa & ~mask;
> >  tlbe->entry.addr_mask = mask;
> > -tlbe->entry.perm = s2ap;
> > +tlbe->parent_perm = tlbe->entry.perm = s2ap;
> >  tlbe->level = level;
> >  tlbe->granule = granule_sz;
> >  return 0;
> > @@ -530,6 +530,27 @@ error:
> >  return -EINVAL;
> >  }
> >  
> > +/* combine 2 TLB entries and return in tlbe in nested config. */
> suggestion: combine S1 and S2 TLB entries into a single entry. As a
> result the S1 entry is overriden with combined data.
Will do.

> > +static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
> > +SMMUTLBEntry *tlbe_s2,
> > +dma_addr_t iova,
> > +SMMUTransCfg *cfg)
> > +{
> > +if (tlbe_s2->entr

Re: [RFC PATCH v3 03/18] hw/arm/smmuv3: Fix encoding of CLASS in events

2024-05-16 Thread Mostafa Saleh
Hi Eric,

On Wed, May 15, 2024 at 02:27:45PM +0200, Eric Auger wrote:
> Hi Mostafa,
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
> > class of events faults as:
> >
> > CLASS: The class of the operation that caused the fault:
> > - 0b00: CD, CD fetch.
> > - 0b01: TTD, Stage 1 translation table fetch.
> > - 0b10: IN, Input address
> >
> > However, this value was not set and left as 0 which means CD and not
> > IN (0b10).
> > While at it, add an enum for class as it would be used for nesting.
> If this fixes somethings please add a Fixes: tag.
> 
> Also you may add that until nested gets implemented, CLASS values are
> the same for stage 1 and stage2. This will change later on.
Will do.

Thanks,
Mostafa

> 
> Besides
> Reviewed-by: Eric Auger 
> 
> Eric
> 
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3-internal.h | 6 ++
> >  hw/arm/smmuv3.c  | 6 +-
> >  2 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> > index e4dd11e1e6..0f3ecec804 100644
> > --- a/hw/arm/smmuv3-internal.h
> > +++ b/hw/arm/smmuv3-internal.h
> > @@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus {
> >  SMMU_TRANS_SUCCESS,
> >  } SMMUTranslationStatus;
> >  
> > +typedef enum SMMUTranslationClass {
> > +SMMU_CLASS_CD,
> > +SMMU_CLASS_TT,
> > +SMMU_CLASS_IN,
> > +} SMMUTranslationClass;
> > +
> >  /* MMIO Registers */
> >  
> >  REG32(IDR0,0x0)
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 9dd3ea48e4..1eb5b160d2 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -942,7 +942,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_WALK_EABT;
> >  event.u.f_walk_eabt.addr = addr;
> >  event.u.f_walk_eabt.rnw = flag & 0x1;
> > -event.u.f_walk_eabt.class = 0x1;
> > +event.u.f_walk_eabt.class = SMMU_CLASS_TT;
> >  event.u.f_walk_eabt.addr2 = ptw_info.addr;
> >  break;
> >  case SMMU_PTW_ERR_TRANSLATION:
> > @@ -950,6 +950,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_TRANSLATION;
> >  event.u.f_translation.addr = addr;
> >  event.u.f_translation.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_translation.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -958,6 +959,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_ADDR_SIZE;
> >  event.u.f_addr_size.addr = addr;
> >  event.u.f_addr_size.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_addr_size.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -966,6 +968,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_ACCESS;
> >  event.u.f_access.addr = addr;
> >  event.u.f_access.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_access.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -974,6 +977,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  event.type = SMMU_EVT_F_PERMISSION;
> >  event.u.f_permission.addr = addr;
> >  event.u.f_permission.addr2 = ptw_info.addr;
> > +event.u.f_translation.class = SMMU_CLASS_IN;
> >  event.u.f_permission.rnw = flag & 0x1;
> >  }
> >  break;
> 



Re: [RFC PATCH v3 02/18] hw/arm/smmu: Fix IPA for stage-2 events

2024-05-16 Thread Mostafa Saleh
Hi Eric,

On Mon, May 13, 2024 at 01:47:44PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/29/24 05:23, Mostafa Saleh wrote:
> > For the following events (ARM IHI 0070 F.b - 7.3 Event records):
> > - F_TRANSLATION
> > - F_ACCESS
> > - F_PERMISSION
> > - F_ADDR_SIZE
> >
> > If fault occurs at stage 2, S2 == 1 and:
> >   - If translating an IPA for a transaction (whether by input to
> > stage 2-only configuration, or after successful stage 1 translation),
> > CLASS == IN, and IPA is provided.
> CLASS == IN sounds a bit confusing here since the class value depends on
> what is being translated and class is not handled in that patch.
At this point only CLASS IN is used as nesting is not supported,
I will clarify that in the commit message.

> >
> > However, this was not implemented correctly, as for stage 2, we Qemu
> s/we QEMU/ the code
Will do.

> > only sets the  S2 bit but not the IPA.
> If this is a fix, please add the "Fixes:" tag and fixed commit sha1.
Will do.

> >
> > This field has the same bits as FetchAddr in F_WALK_EABT which is
> > populated correctly, so we don’t change that.
> > The population of this field should be done from the walker as the IPA 
> > address
> s/population/setting? I am not a native english speaker though
Me neither :), I will change it.

Thanks,
Mostafa
> > wouldn't be known in case of nesting.
> >
> > For stage 1, the spec says:
> >   If fault occurs at stage 1, S2 == 0 and:
> >   CLASS == IN, IPA is UNKNOWN.
> >
> > So, no need to set it to for stage 1, as ptw_info is initialised by zero in
> > smmuv3_translate().
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 10 ++
> >  hw/arm/smmuv3.c  |  4 
> >  2 files changed, 10 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index eb2356bc35..8a8c718e6b 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -448,7 +448,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >   */
> >  if (ipa >= (1ULL << inputsize)) {
> >  info->type = SMMU_PTW_ERR_TRANSLATION;
> > -goto error;
> > +goto error_ipa;
> >  }
> >  
> >  while (level < VMSA_LEVELS) {
> > @@ -494,13 +494,13 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >   */
> >  if (!PTE_AF(pte) && !cfg->s2cfg.affd) {
> >  info->type = SMMU_PTW_ERR_ACCESS;
> > -goto error;
> > +goto error_ipa;
> >  }
> >  
> >  s2ap = PTE_AP(pte);
> >  if (is_permission_fault_s2(s2ap, perm)) {
> >  info->type = SMMU_PTW_ERR_PERMISSION;
> > -goto error;
> > +goto error_ipa;
> >  }
> >  
> >  /*
> > @@ -509,7 +509,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >   */
> >  if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) {
> >  info->type = SMMU_PTW_ERR_ADDR_SIZE;
> > -goto error;
> > +goto error_ipa;
> >  }
> >  
> >  tlbe->entry.translated_addr = gpa;
> > @@ -522,6 +522,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
> >  }
> >  info->type = SMMU_PTW_ERR_TRANSLATION;
> >  
> > +error_ipa:
> > +info->addr = ipa;
> >  error:
> >  info->stage = 2;
> >  tlbe->entry.perm = IOMMU_NONE;
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 2d1e0d55ec..9dd3ea48e4 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -949,6 +949,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  if (PTW_RECORD_FAULT(cfg)) {
> >  event.type = SMMU_EVT_F_TRANSLATION;
> >  event.u.f_translation.addr = addr;
> > +event.u.f_translation.addr2 = ptw_info.addr;
> >  event.u.f_translation.rnw = flag & 0x1;
> >  }
> >  break;
> > @@ -956,6 +957,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> > *mr, hwaddr addr,
> >  if (PTW_RECORD_FAULT(cfg)) {
> >  event.type = SMMU_EVT_F_ADDR_SIZE;
> >  event.u.f_addr_size.addr = addr;
> > +event.u.f_addr_size.addr2 = ptw_info.addr;
> >  event.u.f_addr_size.rnw = flag & 0x1;
> &

[RFC PATCH v3 07/18] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-04-28 Thread Mostafa Saleh
According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
In "5.2 Stream Table Entry":
 [51:6] S1ContextPtr
 If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
 stage 2 and the programmed value must be within the range of the IAS.

In "5.4.1 CD notes":
 The translation table walks performed from TTB0 or TTB1 are always performed
 in IPA space if stage 2 translations are enabled.

This patch implements translation of the S1 context descriptor pointer and
TTBx base addresses through the S2 stage (IPA -> PA)

smmuv3_do_translate() is updated to have one arg which is translation
class, this is useful for:
 - Decide wether a translation is stage-2 only or use the STE config.
 - Populate the class in case of faults, WALK_EABT is lefat as it as
   it is always triggered from TT access so no need to use the input
   class.

In case for stage-2 only translation, which only used in nesting, the
stage and asid are saved and restored before and after calling
smmu_translate().

Translating CD or TTBx can fail for the following reasons:
1) Large address size: This is described in
   (3.4.3 Address sizes of SMMU-originated accesses)
   - For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
 C_BAD_STE or Translation fault, we implement the latter as it
 requires no extra code.
   - For TTBx, if larger than the effective stage 1 output address size, it
 triggers C_BAD_CD.

2) Faults from PTWs (7.3 Event records)
   - F_ADDR_SIZE: large address size after first level causes stage 2 Address
 Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
   - F_PERMISSION: Same as an address translation. However, when
 CLASS == CD, the access is implicitly Data and a read.
   - F_ACCESS: Same as an address translation.
   - F_TRANSLATION: Same as an address translation.
   - F_WALK_EABT: Same as an address translation.
  These are already implemented in the PTW logic, so no extra handling
  required.

As, there is multiple locations where the address is calculated from
cached entry, a new macro is introduced CACHED_ENTRY_TO_ADDR.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c  | 76 ++--
 include/hw/arm/smmu-common.h |  3 ++
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index cc61708160..cc61c82321 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -337,14 +337,33 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, 
STE *buf,
 
 }
 
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry,
+ SMMUTranslationClass class);
 /* @ssid > 0 not supported yet */
-static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
-   CD *buf, SMMUEventInfo *event)
+static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
+   uint32_t ssid, CD *buf, SMMUEventInfo *event)
 {
 dma_addr_t addr = STE_CTXPTR(ste);
 int ret, i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 trace_smmuv3_get_cd(addr);
+
+if (cfg->stage == SMMU_NESTED) {
+status = smmuv3_do_translate(s, addr, cfg, event,
+ IOMMU_RO, &entry, SMMU_CLASS_CD);
+if (status != SMMU_TRANS_SUCCESS) {
+return -EINVAL;
+}
+
+addr = CACHED_ENTRY_TO_ADDR(entry, addr);
+}
+
 /* TODO: guarantee 64-bit single-copy atomicity */
 ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf),
   MEMTXATTRS_UNSPECIFIED);
@@ -659,10 +678,13 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, 
STE *ste,
 return 0;
 }
 
-static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
+static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
+ CD *cd, SMMUEventInfo *event)
 {
 int ret = -EINVAL;
 int i;
+SMMUTranslationStatus status;
+SMMUTLBEntry *entry;
 
 if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
 goto bad_cd;
@@ -713,9 +735,21 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 tt->tsz = tsz;
 tt->ttb = CD_TTB(cd, i);
+
 if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) {
 goto bad_cd;
 }
+
+/* Translate the TTBx, from IPA to PA if nesting is enabled. */
+if (cfg->stage == SMMU_NESTED) {
+status = smmuv3_do_translate(s, tt->ttb, cfg, event, IOMMU_RO,
+ &entry, SMMU_CLASS_TT);
+if (

[RFC PATCH v3 01/18] hw/arm/smmu-common: Add missing size check for stage-1

2024-04-28 Thread Mostafa Saleh
According to the SMMU architecture specification (ARM IHI 0070 F.b),
in “3.4 Address sizes”
The address output from the translation causes a stage 1 Address Size
fault if it exceeds the range of the effective IPA size for the given CD.

However, this check was missing.

There is already a similar check for stage-2 against effective PA.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 1ce706bf94..eb2356bc35 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -381,6 +381,16 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 
+/*
+ * The address output from the translation causes a stage 1 Address
+ * Size fault if it exceeds the range of the effective IPA size for
+ * the given CD.
+ */
+if (gpa >= (1ULL << cfg->oas)) {
+info->type = SMMU_PTW_ERR_ADDR_SIZE;
+goto error;
+}
+
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 06/18] hw/arm/smmu: Consolidate ASID and VMID types

2024-04-28 Thread Mostafa Saleh
ASID and VMID used to be uint16_t in the translation config, however,
in other contexts they can be int as -1 in case of TLB invalidation,
to represent all(don’t care).
When stage-2 was added asid was set to -1 in stage-2 and vmid to -1
in stage-1 configs. However, that meant they were set as (65536),
this was not an issue as nesting was not supported and no
commands/lookup targets both.

With nesting, it’s critical to get this right as translation must be
tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2.
Represent ASID/VMID everywhere as int.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 +-
 hw/arm/smmuv3.c  |  4 ++--
 hw/arm/trace-events  | 18 +-
 include/hw/arm/smmu-common.h | 14 +++---
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d94db6b34f..21982621c0 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -57,7 +57,7 @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, 
gconstpointer v2)
(k1->vmid == k2->vmid);
 }
 
-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
+SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
 uint8_t tg, uint8_t level)
 {
 SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova,
@@ -130,7 +130,7 @@ void smmu_iotlb_inv_all(SMMUState *s)
 static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t asid = *(uint16_t *)user_data;
+int asid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_ASID(*iotlb_key) == asid;
@@ -139,7 +139,7 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, 
gpointer value,
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
  gpointer user_data)
 {
-uint16_t vmid = *(uint16_t *)user_data;
+int vmid = *(int *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
@@ -191,13 +191,13 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int 
vmid, dma_addr_t iova,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
+void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
 }
 
-void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
+void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 {
 trace_smmu_iotlb_inv_vmid(vmid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index f98c157221..cc61708160 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1243,7 +1243,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 }
 case SMMU_CMD_TLBI_NH_ASID:
 {
-uint16_t asid = CMD_ASID(&cmd);
+int asid = CMD_ASID(&cmd);
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
@@ -1276,7 +1276,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 break;
 case SMMU_CMD_TLBI_S12_VMALL:
 {
-uint16_t vmid = CMD_VMID(&cmd);
+int vmid = CMD_VMID(&cmd);
 
 if (!STAGE2_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index cc12924a84..09ccd39548 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -11,13 +11,13 @@ smmu_ptw_page_pte(int stage, int level,  uint64_t iova, 
uint64_t baseaddr, uint6
 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, 
uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d 
base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block 
address = 0x%"PRIx64" block size = %d MiB"
 smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) 
"baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
 smmu_iotlb_inv_all(void) "IOTLB invalidate all"
-smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
-smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
-smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
+smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
+smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
+smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d 
addr=0x%"PRIx64
 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
-smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t

[RFC PATCH v3 12/18] hw/arm/smmu: Support nesting in the rest of commands

2024-04-28 Thread Mostafa Saleh
Some commands need rework for nesting, as they used to assume S1
and S2 are mutually exclusive:

- CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
- CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
  invalidate everything, this required a new vmid invalidation
  function for stage-1 only (ASID >= 0)

Also, rework trace events to reflect the new implementation.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 36 +---
 hw/arm/smmuv3.c  | 31 +--
 hw/arm/trace-events  |  6 --
 include/hw/arm/smmu-common.h |  3 ++-
 4 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index fa2460cf64..3ed0be05ef 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -147,13 +147,14 @@ void smmu_iotlb_inv_all(SMMUState *s)
 g_hash_table_remove_all(s->iotlb);
 }
 
-static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
- gpointer user_data)
+static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
+  gpointer user_data)
 {
-int asid = *(int *)user_data;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
 SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
 
-return SMMU_IOTLB_ASID(*iotlb_key) == asid;
+return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
+   (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
 }
 
 static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
@@ -165,6 +166,16 @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, 
gpointer value,
 return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
 }
 
+static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
+gpointer user_data)
+{
+int vmid = *(int *)user_data;
+SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
+
+return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
+   (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
+}
+
 static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer 
value,
   gpointer user_data)
 {
@@ -258,10 +269,15 @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, 
dma_addr_t ipa, uint8_t tg,
 &info);
 }
 
-void smmu_iotlb_inv_asid(SMMUState *s, int asid)
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
 {
-trace_smmu_iotlb_inv_asid(asid);
-g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
+SMMUIOTLBPageInvInfo info = {
+.asid = asid,
+.vmid = vmid,
+};
+
+trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, 
&info);
 }
 
 void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
@@ -270,6 +286,12 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
 }
 
+inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
+{
+trace_smmu_iotlb_inv_vmid_s1(vmid);
+g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid);
+}
+
 /* VMSAv8-64 Translation */
 
 /**
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 82d918d9b5..e0fd494646 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1303,25 +1303,52 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 case SMMU_CMD_TLBI_NH_ASID:
 {
 int asid = CMD_ASID(&cmd);
+int vmid = -1;
 
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
 
+/*
+ * VMID is only matched when stage 2 is supported for the Security
+ * state corresponding to the command queue that the command was
+ * issued in.
+ * QEMU ignores the field by setting to -1, similarly to what STE
+ * decoding does. And invalidation commands ignore VMID < 0.
+ */
+if (STAGE2_SUPPORTED(s)) {
+vmid = CMD_VMID(&cmd);
+}
+
 trace_smmuv3_cmdq_tlbi_nh_asid(asid);
 smmu_inv_notifiers_all(&s->smmu_state);
-smmu_iotlb_inv_asid(bs, asid);
+smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
 break;
 }
 case SMMU_CMD_TLBI_NH_ALL:
+{
+int vmid = -1;
+
 if (!STAGE1_SUPPORTED(s)) {
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
+
+/*
+ * If stage-2 is supported, invalidate for this VMID only, 
otherwise
+ * invalidate the whole thing, see SMMU_CMD_TLBI_NH_ASID()
+ */
+if (STAGE2_SUPPORTED(s)) {
+ 

[RFC PATCH v3 16/18] hw/arm/smmu: Refactor SMMU OAS

2024-04-28 Thread Mostafa Saleh
SMMUv3 OAS is hardcoded to 44 bits, for nested configurations that
can be a problem as stage-2 might be shared with the CPU which might
have different PARANGE, and according to SMMU manual ARM IHI 0070F.b:
6.3.6 SMMU_IDR5, OAS must match the system physical address size.

This patch doesn't change the SMMU OAS, but refactors the code to
make it easier to do that:
- Rely everywhere on IDR5 for reading OAS instead of using the macro so
  it is easier just change IDR5 and it propagages correctly.
- Remove unused functions/macros: pa_range/MAX_PA

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  7 ---
 hw/arm/smmuv3-internal.h | 13 -
 hw/arm/smmuv3.c  | 35 ---
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 3ed0be05ef..b559878aef 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -434,7 +434,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 inputsize = 64 - tt->tsz;
 level = 4 - (inputsize - 4) / stride;
 indexmask = VMSA_IDXMSK(inputsize, stride, level);
-baseaddr = extract64(tt->ttb, 0, 48);
+
+baseaddr = extract64(tt->ttb, 0, cfg->oas);
 baseaddr &= ~indexmask;
 
 while (level < VMSA_LEVELS) {
@@ -557,8 +558,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  * Get the ttb from concatenated structure.
  * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
  */
-uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
-  idx * sizeof(uint64_t);
+uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) +
+  (1 << stride) * idx * sizeof(uint64_t);
 dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level);
 
 baseaddr &= ~indexmask;
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 0f3ecec804..0ebf2eebcf 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -602,19 +602,6 @@ static inline int oas2bits(int oas_field)
 return -1;
 }
 
-static inline int pa_range(STE *ste)
-{
-int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS);
-
-if (!STE_S2AA64(ste)) {
-return 40;
-}
-
-return oas2bits(oas_field);
-}
-
-#define MAX_PA(ste) ((1 << pa_range(ste)) - 1)
-
 /* CD fields */
 
 #define CD_VALID(x)   extract32((x)->word[0], 31, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 8a11e41144..4ac818cf7a 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -408,10 +408,10 @@ static bool s2t0sz_valid(SMMUTransCfg *cfg)
 }
 
 if (cfg->s2cfg.granule_sz == 16) {
-return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS));
+return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps);
 }
 
-return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16));
+return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16));
 }
 
 /*
@@ -432,8 +432,11 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 return nr_concat <= VMSA_MAX_S2_CONCAT;
 }
 
-static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
+static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
+ STE *ste)
 {
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
+
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -466,7 +469,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 }
 
 /* For AA64, The effective S2PS size is capped to the OAS. */
-cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
+cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
+/*
+ * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input
+ * range is further limited to 48 bits unless STE.S2TG indicates a
+ * 64KB granule.
+ */
+if (cfg->s2cfg.granule_sz != 16) {
+cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48);
+}
 /*
  * It is ILLEGAL for the address in S2TTB to be outside the range
  * described by the effective S2PS value.
@@ -542,6 +553,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
 {
 uint32_t config;
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
 int ret;
 
 if (!STE_VALID(ste)) {
@@ -585,8 +597,8 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
  * Stage-1 OAS defaults to OAS even if not enabled as it would be used
  * in input address check for stage-2.
  */
-cfg->oas = oas2bits(SMMU_IDR5_OAS);
-ret = decode_ste_s2_cfg(cfg, ste);
+cfg->oas = oas2bits(oas);
+ret = decode_ste_s2_cfg(s, cfg, ste);
 if (ret) {
 goto bad_ste;
 

[RFC PATCH v3 17/18] hw/arm/smmuv3: Add property for OAS

2024-04-28 Thread Mostafa Saleh
Add property that sets the OAS of the SMMU, this in not used in this
patch.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3-internal.h |  3 ++-
 hw/arm/smmuv3.c  | 29 -
 include/hw/arm/smmuv3.h  |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 0ebf2eebcf..dd91807624 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -111,7 +111,8 @@ REG32(IDR5,0x14)
  FIELD(IDR5, VAX,10, 2);
  FIELD(IDR5, STALL_MAX,  16, 16);
 
-#define SMMU_IDR5_OAS 4
+#define SMMU_IDR5_OAS_DEF 4 /* 44 bits. */
+#define SMMU_IDR5_OAS_MAX 5 /* 48 bits. */
 
 REG32(IIDR,0x18)
 REG32(AIDR,0x1c)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 4ac818cf7a..39d03e7e24 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -299,7 +299,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
 
-s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */
+/* PTW doesn't support 52 bits. */
+s->oas = MIN(s->oas, SMMU_IDR5_OAS_MAX);
+s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, s->oas);
 /* 4K, 16K and 64K granule support */
 s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1);
 s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1);
@@ -1901,11 +1903,34 @@ static const VMStateDescription vmstate_gbpa = {
 }
 };
 
+static const VMStateDescription vmstate_oas = {
+.name = "smmuv3/oas",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (const VMStateField[]) {
+VMSTATE_INT32(oas, SMMUv3State),
+VMSTATE_END_OF_LIST()
+}
+};
+
+static int smmuv3_preload(void *opaque)
+{
+SMMUv3State *s = opaque;
+
+/*
+ * In case it wasn't migrated, use the value used
+ * by older QEMU.
+ */
+s->oas = SMMU_IDR5_OAS_DEF;
+return 0;
+}
+
 static const VMStateDescription vmstate_smmuv3 = {
 .name = "smmuv3",
 .version_id = 1,
 .minimum_version_id = 1,
 .priority = MIG_PRI_IOMMU,
+.pre_load = smmuv3_preload,
 .fields = (const VMStateField[]) {
 VMSTATE_UINT32(features, SMMUv3State),
 VMSTATE_UINT8(sid_size, SMMUv3State),
@@ -1933,6 +1958,7 @@ static const VMStateDescription vmstate_smmuv3 = {
 },
 .subsections = (const VMStateDescription * const []) {
 &vmstate_gbpa,
+&vmstate_oas,
 NULL
 }
 };
@@ -1945,6 +1971,7 @@ static Property smmuv3_properties[] = {
  * Defaults to stage 1
  */
 DEFINE_PROP_STRING("stage", SMMUv3State, stage),
+DEFINE_PROP_INT32("oas", SMMUv3State, oas, SMMU_IDR5_OAS_DEF),
 DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index d183a62766..00a9eb4467 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -63,6 +63,7 @@ struct SMMUv3State {
 qemu_irq irq[4];
 QemuMutex mutex;
 char *stage;
+int32_t oas;
 };
 
 typedef enum {
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 04/18] hw/arm/smmu: Use enum for SMMU stage

2024-04-28 Thread Mostafa Saleh
Currently, translation stage is represented as an int, where 1 is stage-1 and
2 is stage-2, when nested is added, 3 would be confusing to represent nesting,
so we use an enum instead.

While keeping the same values, this is useful for:
 - Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is
   stage-2 and both is nested.
 - Tracing, as stage is printed as int.

Signed-off-by: Mostafa Saleh 
Reviewed-by: Eric Auger 
---
 hw/arm/smmu-common.c | 14 +++---
 hw/arm/smmuv3.c  | 15 ---
 include/hw/arm/smmu-common.h | 11 +--
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a8c718e6b..8a5858f69f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -304,7 +304,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
 dma_addr_t baseaddr, indexmask;
-int stage = cfg->stage;
+SMMUStage stage = cfg->stage;
 SMMUTransTableInfo *tt = select_tt(cfg, iova);
 uint8_t level, granule_sz, inputsize, stride;
 
@@ -402,7 +402,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
 error:
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -425,7 +425,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
   dma_addr_t ipa, IOMMUAccessFlags perm,
   SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-const int stage = 2;
+const SMMUStage stage = SMMU_STAGE_2;
 int granule_sz = cfg->s2cfg.granule_sz;
 /* ARM DDI0487I.a: Table D8-7. */
 int inputsize = 64 - cfg->s2cfg.tsz;
@@ -525,7 +525,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 error_ipa:
 info->addr = ipa;
 error:
-info->stage = 2;
+info->stage = SMMU_STAGE_2;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
@@ -544,9 +544,9 @@ error:
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
 {
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
-} else if (cfg->stage == 2) {
+} else if (cfg->stage == SMMU_STAGE_2) {
 /*
  * If bypassing stage 1(or unimplemented), the input address is passed
  * directly to stage 2 as IPA. If the input address of a transaction
@@ -555,7 +555,7 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
  */
 if (iova >= (1ULL << cfg->oas)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-info->stage = 1;
+info->stage = SMMU_STAGE_1;
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 1eb5b160d2..dab3ad2db9 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,7 +34,8 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == 1) ? (cfg)->record_faults : \
+#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
+ (cfg)->record_faults : \
  (cfg)->s2cfg.record_faults)
 
 /**
@@ -402,7 +403,7 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = 2;
+cfg->stage = SMMU_STAGE_2;
 
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
@@ -678,7 +679,7 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, 
SMMUEventInfo *event)
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = 1;
+cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
@@ -762,7 +763,7 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, 
SMMUTransCfg *cfg,
 return ret;
 }
 
-if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) {
+if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) {
 return 0;
 }
 
@@ -882,7 +883,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 goto epilogue;
 }
 
-if (cfg->stage == 1) {
+if (cfg->stage == SMMU_STAGE_1) {
 /* Select stage1 translation table. */
 tt = select_tt(cfg, addr);
 if (!tt) {
@@ -919,7 +920,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
  * nesting is not supported. So it is sufficient to check the
  * translation stage to know the TLB stage for now.
  */
-event.u.f_walk_eabt.s2 = (

[RFC PATCH v3 14/18] hw/arm/smmuv3: Support and advertise nesting

2024-04-28 Thread Mostafa Saleh
Everything is in place, add the last missing bits:
- Handle fault checking according to the actual PTW event and not the
  the translation stage.
- Consolidate parsing of STE cfg and setting translation stage.

Advertise nesting if stage requested is "nested".

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 50 +
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 96d07234fe..88f6473d33 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,9 +34,10 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
- (cfg)->record_faults : \
- (cfg)->s2cfg.record_faults)
+#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \
+(cfg)->record_faults) || \
+((ptw_info).stage == SMMU_STAGE_2 && \
+(cfg)->s2cfg.record_faults))
 
 /**
  * smmuv3_trigger_irq - pulse @irq if enabled and update
@@ -260,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
 /* Based on sys property, the stages supported in smmu will be 
advertised.*/
 if (s->stage && !strcmp("2", s->stage)) {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
+} else if (s->stage && !strcmp("nested", s->stage)) {
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
 } else {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
 }
@@ -422,8 +426,6 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = SMMU_STAGE_2;
-
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -506,6 +508,27 @@ bad_ste:
 return -EINVAL;
 }
 
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
+{
+
+if (STE_CFG_ABORT(config)) {
+cfg->aborted = true;
+return;
+}
+if (STE_CFG_BYPASS(config)) {
+cfg->bypassed = true;
+return;
+}
+
+if (STE_CFG_S1_ENABLED(config)) {
+cfg->stage = SMMU_STAGE_1;
+}
+
+if (STE_CFG_S2_ENABLED(config)) {
+cfg->stage |= SMMU_STAGE_2;
+}
+}
+
 /* Returns < 0 in case of invalid STE, 0 otherwise */
 static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
@@ -522,13 +545,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
 
 config = STE_CONFIG(ste);
 
-if (STE_CFG_ABORT(config)) {
-cfg->aborted = true;
-return 0;
-}
+decode_ste_config(cfg, config);
 
-if (STE_CFG_BYPASS(config)) {
-cfg->bypassed = true;
+if (cfg->aborted || cfg->bypassed) {
 return 0;
 }
 
@@ -701,7 +720,6 @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
@@ -901,7 +919,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 event->u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_TRANSLATION;
 event->u.f_translation.addr = addr;
 event->u.f_translation.addr2 = ptw_info.addr;
@@ -910,7 +928,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ADDR_SIZE:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ADDR_SIZE;
 event->u.f_addr_size.addr = addr;
 event->u.f_addr_size.addr2 = ptw_info.addr;
@@ -919,7 +937,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
 break;
 case SMMU_PTW_ERR_ACCESS:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ACCESS;
 event->u.f_access.addr = addr;
 event->u.f_access.addr2 = ptw_info.addr;
@@ -928,7 +946,7 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 }
   

[RFC PATCH v3 11/18] hw/arm/smmu: Support nesting in smmuv3_range_inval()

2024-04-28 Thread Mostafa Saleh
With nesting, we would need to invalidate IPAs without
over-invalidating stage-1 IOVAs. This can be done by
distinguishing IPAs in the TLBs by having ASID=-1.
To achieve that, rework the invalidation for IPAs to have a
separate function, while for IOVA invalidation ASID=-1 means
invalidate for all ASIDs.

Signed-off-by: Mostafa Saleh 
Reviewed-by: Eric Auger 
---
 hw/arm/smmu-common.c | 47 
 hw/arm/smmuv3.c  | 23 --
 hw/arm/trace-events  |  2 +-
 include/hw/arm/smmu-common.h |  3 ++-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d48ec08947..fa2460cf64 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -183,6 +183,25 @@ static gboolean 
smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
((entry->iova & ~info->mask) == info->iova);
 }
 
+static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
+ gpointer user_data)
+{
+SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
+IOMMUTLBEntry *entry = &iter->entry;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
+SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
+
+if (info->asid >= 0) {
+/* This is a stage-1 address. */
+return false;
+}
+if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
+return false;
+}
+return ((info->iova & ~entry->addr_mask) == entry->iova) ||
+   ((entry->iova & ~info->mask) == info->iova);
+}
+
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
@@ -211,6 +230,34 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, 
dma_addr_t iova,
 &info);
 }
 
+/*
+ * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
+ * in Stage-1 invalidation ASID = -1, means don't care.
+ */
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
+uint64_t num_pages, uint8_t ttl)
+{
+uint8_t granule = tg ? tg * 2 + 10 : 12;
+int asid = -1;
+
+   if (ttl && (num_pages == 1)) {
+SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
+
+if (g_hash_table_remove(s->iotlb, &key)) {
+return;
+}
+}
+
+SMMUIOTLBPageInvInfo info = {
+.iova = ipa,
+.vmid = vmid,
+.mask = (num_pages * 1 << granule) - 1};
+
+g_hash_table_foreach_remove(s->iotlb,
+smmu_hash_remove_by_vmid_ipa,
+&info);
+}
+
 void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index cc61c82321..82d918d9b5 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1118,7 +1118,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 }
 }
 
-static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
+static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
 {
 dma_addr_t end, addr = CMD_ADDR(cmd);
 uint8_t type = CMD_TYPE(cmd);
@@ -1143,9 +1143,13 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 }
 
 if (!tg) {
-trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl);
+}
 return;
 }
 
@@ -1161,9 +1165,14 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
 
 num_pages = (mask + 1) >> granule;
-trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
+ ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl);
+}
 addr += mask + 1;
 }
 }
@@ -1322,7 +1331,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
-smmuv3_range_inval(bs, &cmd);
+smmuv3_range_inval(bs, &cmd, SMMU_STAGE_

[RFC PATCH v3 10/18] hw/arm/smmu-common: Support nested translation

2024-04-28 Thread Mostafa Saleh
When nested translation is requested, do the following:

- Translate stage-1 IPA using stage-2 to a physical address.
- Translate stage-1 table walks using stage-2.
- Combine both to create a single TLB entry using the logic
  introduced before.

For stage-1 table translation, the spec (ARM IHI 0070 F.b) says in:
7.3.12 F_WALK_EABT:
Translation of an IPA for Stage 1 descriptor fetch:
S2 == 1 (stage 2), CLASS == T
So, F_WALK_EABT is used which propagtes to CLASS == TT.

smmu_ptw() has a new argument SMMUState which include the TLB as
stage-1 table address can be cached in there.

Also in smmu_ptw() a separate path used for nesting to simplify the
code, although some logic can be combined.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 67 ++--
 include/hw/arm/smmu-common.h |  2 +-
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index c67af3bc6d..d48ec08947 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -306,6 +306,32 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
dma_addr_t iova)
 return NULL;
 }
 
+/* Translate stage-1 table address using stage-2 page table. */
+static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg *cfg,
+ SMMUPTWEventInfo *info, SMMUState *bs)
+{
+dma_addr_t addr = *table_addr;
+SMMUTLBEntry *cached_entry;
+int asid;
+
+asid = cfg->asid;
+cfg->stage = SMMU_STAGE_2;
+cfg->asid = -1;
+cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
+cfg->asid = asid;
+cfg->stage = SMMU_NESTED;
+
+if (cached_entry) {
+*table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
+return 0;
+}
+
+info->stage = SMMU_STAGE_2;
+info->type = SMMU_PTW_ERR_WALK_EABT;
+info->addr = addr;
+return -EINVAL;
+}
+
 /**
  * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
  * @cfg: translation config
@@ -321,7 +347,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t 
iova)
  */
 static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
   dma_addr_t iova, IOMMUAccessFlags perm,
-  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+  SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
+  SMMUState *bs)
 {
 dma_addr_t baseaddr, indexmask;
 SMMUStage stage = cfg->stage;
@@ -369,6 +396,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 goto error;
 }
 baseaddr = get_table_pte_address(pte, granule_sz);
+if (cfg->stage == SMMU_NESTED) {
+if (translate_table_s1(&baseaddr, cfg, info, bs)) {
+goto error;
+}
+}
 level++;
 continue;
 } else if (is_page_pte(pte, level)) {
@@ -551,10 +583,8 @@ error:
 }
 
 /* combine 2 TLB entries and return in tlbe in nested config. */
-static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
-SMMUTLBEntry *tlbe_s2,
-dma_addr_t iova,
-SMMUTransCfg *cfg)
+static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova, SMMUTransCfg *cfg)
 {
 if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
 tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
@@ -579,14 +609,19 @@ static void __attribute__((unused)) 
combine_tlb(SMMUTLBEntry *tlbe,
  * @perm: tentative access type
  * @tlbe: returned entry
  * @info: ptw event handle
+ * @bs: smmu state which includes TLB instance
  *
  * return 0 on success
  */
 int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
- SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
 {
+int ret;
+SMMUTLBEntry tlbe_s2;
+dma_addr_t ipa;
+
 if (cfg->stage == SMMU_STAGE_1) {
-return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
+return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
 } else if (cfg->stage == SMMU_STAGE_2) {
 /*
  * If bypassing stage 1(or unimplemented), the input address is passed
@@ -600,11 +635,23 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 tlbe->entry.perm = IOMMU_NONE;
 return -EINVAL;
 }
-
 return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
 }
 
-g_assert_not_reached();
+/* SMMU_NESTED. */
+ret = smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
+if (ret) {
+return ret;
+}
+
+ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
+ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
+if (ret) {

[RFC PATCH v3 13/18] hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()

2024-04-28 Thread Mostafa Saleh
IOMMUTLBEvent only understands IOVA, for stage-2 only SMMUs keep
the implementation, while only notify for stage-1 invalidation
in case of nesting.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 23 +++
 hw/arm/trace-events |  2 +-
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index e0fd494646..96d07234fe 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1051,7 +1051,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
IOMMUNotifier *n,
int asid, int vmid,
dma_addr_t iova, uint8_t tg,
-   uint64_t num_pages)
+   uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
 IOMMUTLBEvent event;
@@ -1075,14 +1075,21 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 return;
 }
 
-if (STAGE1_SUPPORTED(s)) {
+/*
+ * IOMMUTLBEvent only understands IOVA, for stage-2 only SMMUs
+ * keep the implementation, while only notify for stage-1
+ * invalidation in case of nesting.
+ */
+if (stage == SMMU_STAGE_1) {
 tt = select_tt(cfg, iova);
 if (!tt) {
 return;
 }
 granule = tt->granule_sz;
-} else {
+} else if (!STAGE1_SUPPORTED(s)) {
 granule = cfg->s2cfg.granule_sz;
+} else {
+return;
 }
 
 } else {
@@ -1101,7 +1108,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
 /* invalidate an asid/vmid/iova range tuple in all mr's */
 static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
   dma_addr_t iova, uint8_t tg,
-  uint64_t num_pages)
+  uint64_t num_pages, int stage)
 {
 SMMUDevice *sdev;
 
@@ -1110,10 +1117,10 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 IOMMUNotifier *n;
 
 trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
-iova, tg, num_pages);
+iova, tg, num_pages, stage);
 
 IOMMU_NOTIFIER_FOREACH(n, mr) {
-smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
+smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage);
 }
 }
 }
@@ -1144,7 +1151,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 
 if (!tg) {
 trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
 } else {
@@ -1167,7 +1174,7 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, 
SMMUStage stage)
 num_pages = (mask + 1) >> granule;
 trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
  ttl, leaf, stage);
-smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
+smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage);
 if (stage == SMMU_STAGE_1) {
 smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
 } else {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 593cc571da..be6c8f720b 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -55,7 +55,7 @@ smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
 smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
 smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu 
mr=%s"
 smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu 
mr=%s"
-smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, 
uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" 
tg=%d num_pages=0x%"PRIx64
+smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, 
uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d 
iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
 
 # strongarm.c
 strongarm_uart_update_parameters(const char *label, int speed, char parity, 
int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 09/18] hw/arm/smmu-common: Rework TLB lookup for nesting

2024-04-28 Thread Mostafa Saleh
In the previous patch, comine_tlb() was added which combines 2 TLB
entries into one, which chooses the granule and level from the
smallest entry.

This means that a nested translation, an entry can be cached with the
granule of stage-2 and not stage-1.

However, the lookup for an IOVA in nested configuration is done with
stage-1 granule, this patch reworks lookup in that case, so it falls
back to stage-2 granule if no entry is found using stage-1 granule.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 0d6945fa54..c67af3bc6d 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t 
iova,
 return key;
 }
 
-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
-SMMUTransTableInfo *tt, hwaddr iova)
+static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
+  SMMUTransCfg *cfg,
+  SMMUTransTableInfo *tt,
+  hwaddr iova)
 {
 uint8_t tg = (tt->granule_sz - 10) / 2;
 uint8_t inputsize = 64 - tt->tsz;
@@ -88,6 +90,24 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg 
*cfg,
 }
 level++;
 }
+return entry;
+}
+
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
+SMMUTransTableInfo *tt, hwaddr iova)
+{
+SMMUTLBEntry *entry = NULL;
+
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+/*
+ * For nested translation also try the s2 granule, as the TLB will insert
+ * it if the size of s2 tlb entry was smaller.
+ */
+if (!entry && (cfg->stage == SMMU_NESTED) &&
+(cfg->s2cfg.granule_sz != tt->granule_sz)) {
+tt->granule_sz = cfg->s2cfg.granule_sz;
+entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
+}
 
 if (entry) {
 cfg->iotlb_hits++;
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 15/18] hw/arm/smmuv3: Advertise S2FWB

2024-04-28 Thread Mostafa Saleh
QEMU doesn's support memory attributes, so FWB is NOP, this
might change in the future if memory attributre would be supported.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 88f6473d33..8a11e41144 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -287,6 +287,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
 if (FIELD_EX32(s->idr[0], IDR0, S2P)) {
 /* XNX is a stage-2-specific feature */
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1);
+if (FIELD_EX32(s->idr[0], IDR0, S1P)) {
+/*
+ * QEMU doesn's support memory attributes, so FWB is NOP, this
+ * might change in the future if memory attributre would be
+ * supported.
+ */
+   s->idr[3] = FIELD_DP32(s->idr[3], IDR3, FWB, 1);
+}
 }
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
 s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 03/18] hw/arm/smmuv3: Fix encoding of CLASS in events

2024-04-28 Thread Mostafa Saleh
The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
class of events faults as:

CLASS: The class of the operation that caused the fault:
- 0b00: CD, CD fetch.
- 0b01: TTD, Stage 1 translation table fetch.
- 0b10: IN, Input address

However, this value was not set and left as 0 which means CD and not
IN (0b10).
While at it, add an enum for class as it would be used for nesting.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3-internal.h | 6 ++
 hw/arm/smmuv3.c  | 6 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index e4dd11e1e6..0f3ecec804 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus {
 SMMU_TRANS_SUCCESS,
 } SMMUTranslationStatus;
 
+typedef enum SMMUTranslationClass {
+SMMU_CLASS_CD,
+SMMU_CLASS_TT,
+SMMU_CLASS_IN,
+} SMMUTranslationClass;
+
 /* MMIO Registers */
 
 REG32(IDR0,0x0)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 9dd3ea48e4..1eb5b160d2 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -942,7 +942,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_WALK_EABT;
 event.u.f_walk_eabt.addr = addr;
 event.u.f_walk_eabt.rnw = flag & 0x1;
-event.u.f_walk_eabt.class = 0x1;
+event.u.f_walk_eabt.class = SMMU_CLASS_TT;
 event.u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
@@ -950,6 +950,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
 event.u.f_translation.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -958,6 +959,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
 event.u.f_addr_size.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -966,6 +968,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
 event.u.f_access.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -974,6 +977,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
 event.u.f_permission.addr2 = ptw_info.addr;
+event.u.f_translation.class = SMMU_CLASS_IN;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 05/18] hw/arm/smmu: Split smmuv3_translate()

2024-04-28 Thread Mostafa Saleh
smmuv3_translate() does everything from STE/CD parsing to TLB lookup
and PTW.

Soon, when nesting is supported, stage-1 data (tt, CD) needs to be
translated using stage-2.

Split smmuv3_translate() to 3 functions:

- smmu_translate(): in smmu-common.c, which does the TLB lookup, PTW,
  TLB insertion, all the functions are already there, this just puts
  them together.
  This also simplifies the code as it consolidates event generation
  in case of TLB lookup permission failure or in TT selection.

- smmuv3_do_translate(): in smmuv3.c, Calls smmu_translate() and does
  the event population in case of errors.

 - smmuv3_translate(), now calls smmuv3_do_translate() for
   translation while the rest is the same.

Also, add stage in trace_smmuv3_translate_success()

Signed-off-by: Mostafa Saleh 
Reviewed-by: Eric Auger 
---
 hw/arm/smmu-common.c |  59 +++
 hw/arm/smmuv3.c  | 191 +--
 hw/arm/trace-events  |   2 +-
 include/hw/arm/smmu-common.h |   8 ++
 4 files changed, 141 insertions(+), 119 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 8a5858f69f..d94db6b34f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -566,6 +566,65 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, 
IOMMUAccessFlags perm,
 g_assert_not_reached();
 }
 
+SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
+ IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
+{
+uint64_t page_mask, aligned_addr;
+SMMUTLBEntry *cached_entry = NULL;
+SMMUTransTableInfo *tt;
+int status;
+
+/*
+ * Combined attributes used for TLB lookup, as only one stage is supported,
+ * it will hold attributes based on the enabled stage.
+ */
+SMMUTransTableInfo tt_combined;
+
+if (cfg->stage == SMMU_STAGE_1) {
+/* Select stage1 translation table. */
+tt = select_tt(cfg, addr);
+if (!tt) {
+info->type = SMMU_PTW_ERR_TRANSLATION;
+info->stage = SMMU_STAGE_1;
+return NULL;
+}
+tt_combined.granule_sz = tt->granule_sz;
+tt_combined.tsz = tt->tsz;
+
+} else {
+/* Stage2. */
+tt_combined.granule_sz = cfg->s2cfg.granule_sz;
+tt_combined.tsz = cfg->s2cfg.tsz;
+}
+
+/*
+ * TLB lookup looks for granule and input size for a translation stage,
+ * as only one stage is supported right now, choose the right values
+ * from the configuration.
+ */
+page_mask = (1ULL << tt_combined.granule_sz) - 1;
+aligned_addr = addr & ~page_mask;
+
+cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
+if (cached_entry) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+info->type = SMMU_PTW_ERR_PERMISSION;
+info->stage = cfg->stage;
+return NULL;
+}
+return cached_entry;
+}
+
+cached_entry = g_new0(SMMUTLBEntry, 1);
+status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
+if (status) {
+g_free(cached_entry);
+return NULL;
+}
+smmu_iotlb_insert(bs, cfg, cached_entry);
+return cached_entry;
+}
+
 /**
  * The bus number is used for lookup when SID based invalidation occurs.
  * In that case we lazily populate the SMMUPciBus array from the bus hash
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index dab3ad2db9..f98c157221 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -827,6 +827,75 @@ static void smmuv3_flush_config(SMMUDevice *sdev)
 g_hash_table_remove(bc->configs, sdev);
 }
 
+/* Do translation with TLB lookup. */
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
+ SMMUTransCfg *cfg,
+ SMMUEventInfo *event,
+ IOMMUAccessFlags flag,
+ SMMUTLBEntry **out_entry)
+{
+SMMUPTWEventInfo ptw_info = {};
+SMMUState *bs = ARM_SMMU(s);
+SMMUTLBEntry *cached_entry = NULL;
+
+cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info);
+if (!cached_entry) {
+/* All faults from PTW has S2 field. */
+event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
+switch (ptw_info.type) {
+case SMMU_PTW_ERR_WALK_EABT:
+event->type = SMMU_EVT_F_WALK_EABT;
+event->u.f_walk_eabt.addr = addr;
+event->u.f_walk_eabt.rnw = flag & 0x1;
+event->u.f_walk_eabt.class = SMMU_CLASS_TT;
+event->u.f_walk_eabt.addr2 = ptw_info.addr;
+break;
+case SMMU_PTW_ERR_TRANSLATION:
+if (PTW_RECORD_FAULT(cfg)) {
+event->type = SMMU_EVT_F_TRANSLATION;
+  

[RFC PATCH v3 18/18] hw/arm/virt: Set SMMU OAS based on CPU PARANGE

2024-04-28 Thread Mostafa Saleh
Use the new SMMU property to make the SMMU OAS match the CPU PARANGE.
That's according to SMMU manual ARM IHI 0070F.b:
6.3.6 SMMU_IDR5, OAS must match the system physical address size.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/virt.c  | 14 --
 target/arm/cpu.h   |  2 ++
 target/arm/cpu64.c |  5 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3c93c0c0a6..f203b1f8e1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -252,6 +252,13 @@ static bool ns_el2_virt_timer_present(void)
 arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu);
 }
 
+/* We rely on CPU to define system OAS. */
+static int32_t get_system_oas(void)
+{
+ARMCPU *cpu = ARM_CPU(qemu_get_cpu(0));
+return cpu_arm_get_oas(cpu);
+}
+
 static void create_fdt(VirtMachineState *vms)
 {
 MachineState *ms = MACHINE(vms);
@@ -1384,7 +1391,7 @@ static void create_pcie_irq_map(const MachineState *ms,
 }
 
 static void create_smmu(const VirtMachineState *vms,
-PCIBus *bus)
+PCIBus *bus, int32_t oas)
 {
 char *node;
 const char compat[] = "arm,smmu-v3";
@@ -1404,6 +1411,9 @@ static void create_smmu(const VirtMachineState *vms,
 
 object_property_set_link(OBJECT(dev), "primary-bus", OBJECT(bus),
  &error_abort);
+
+qdev_prop_set_uint64(dev, "oas", oas);
+
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
 for (i = 0; i < NUM_SMMU_IRQS; i++) {
@@ -1578,7 +1588,7 @@ static void create_pcie(VirtMachineState *vms)
 
 switch (vms->iommu) {
 case VIRT_IOMMU_SMMUV3:
-create_smmu(vms, vms->bus);
+create_smmu(vms, vms->bus, get_system_oas());
 qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
0x0, vms->iommu_phandle, 0x0, 0x1);
 break;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 17efc5d565..68261ffbf9 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3287,4 +3287,6 @@ static inline target_ulong cpu_untagged_addr(CPUState 
*cs, target_ulong x)
 }
 #endif
 
+int32_t cpu_arm_get_oas(ARMCPU *cpu);
+
 #endif
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 985b1efe16..08da83c082 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -787,6 +787,11 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs)
 return "aarch64";
 }
 
+int32_t cpu_arm_get_oas(ARMCPU *cpu)
+{
+return FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
+}
+
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
 CPUClass *cc = CPU_CLASS(oc);
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 02/18] hw/arm/smmu: Fix IPA for stage-2 events

2024-04-28 Thread Mostafa Saleh
For the following events (ARM IHI 0070 F.b - 7.3 Event records):
- F_TRANSLATION
- F_ACCESS
- F_PERMISSION
- F_ADDR_SIZE

If fault occurs at stage 2, S2 == 1 and:
  - If translating an IPA for a transaction (whether by input to
stage 2-only configuration, or after successful stage 1 translation),
CLASS == IN, and IPA is provided.

However, this was not implemented correctly, as for stage 2, we Qemu
only sets the  S2 bit but not the IPA.

This field has the same bits as FetchAddr in F_WALK_EABT which is
populated correctly, so we don’t change that.
The population of this field should be done from the walker as the IPA address
wouldn't be known in case of nesting.

For stage 1, the spec says:
  If fault occurs at stage 1, S2 == 0 and:
  CLASS == IN, IPA is UNKNOWN.

So, no need to set it to for stage 1, as ptw_info is initialised by zero in
smmuv3_translate().

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 10 ++
 hw/arm/smmuv3.c  |  4 
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index eb2356bc35..8a8c718e6b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -448,7 +448,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (ipa >= (1ULL << inputsize)) {
 info->type = SMMU_PTW_ERR_TRANSLATION;
-goto error;
+goto error_ipa;
 }
 
 while (level < VMSA_LEVELS) {
@@ -494,13 +494,13 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (!PTE_AF(pte) && !cfg->s2cfg.affd) {
 info->type = SMMU_PTW_ERR_ACCESS;
-goto error;
+goto error_ipa;
 }
 
 s2ap = PTE_AP(pte);
 if (is_permission_fault_s2(s2ap, perm)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-goto error;
+goto error_ipa;
 }
 
 /*
@@ -509,7 +509,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  */
 if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) {
 info->type = SMMU_PTW_ERR_ADDR_SIZE;
-goto error;
+goto error_ipa;
 }
 
 tlbe->entry.translated_addr = gpa;
@@ -522,6 +522,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 }
 info->type = SMMU_PTW_ERR_TRANSLATION;
 
+error_ipa:
+info->addr = ipa;
 error:
 info->stage = 2;
 tlbe->entry.perm = IOMMU_NONE;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2d1e0d55ec..9dd3ea48e4 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -949,6 +949,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_TRANSLATION;
 event.u.f_translation.addr = addr;
+event.u.f_translation.addr2 = ptw_info.addr;
 event.u.f_translation.rnw = flag & 0x1;
 }
 break;
@@ -956,6 +957,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ADDR_SIZE;
 event.u.f_addr_size.addr = addr;
+event.u.f_addr_size.addr2 = ptw_info.addr;
 event.u.f_addr_size.rnw = flag & 0x1;
 }
 break;
@@ -963,6 +965,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_ACCESS;
 event.u.f_access.addr = addr;
+event.u.f_access.addr2 = ptw_info.addr;
 event.u.f_access.rnw = flag & 0x1;
 }
 break;
@@ -970,6 +973,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
*mr, hwaddr addr,
 if (PTW_RECORD_FAULT(cfg)) {
 event.type = SMMU_EVT_F_PERMISSION;
 event.u.f_permission.addr = addr;
+event.u.f_permission.addr2 = ptw_info.addr;
 event.u.f_permission.rnw = flag & 0x1;
 }
 break;
-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 00/18] SMMUv3 nested translation support

2024-04-28 Thread Mostafa Saleh
its while
  they can be -1
- Increase visibility in trace points

Mostafa Saleh (18):
  hw/arm/smmu-common: Add missing size check for stage-1
  hw/arm/smmu: Fix IPA for stage-2 events
  hw/arm/smmuv3: Fix encoding of CLASS in events
  hw/arm/smmu: Use enum for SMMU stage
  hw/arm/smmu: Split smmuv3_translate()
  hw/arm/smmu: Consolidate ASID and VMID types
  hw/arm/smmuv3: Translate CD and TT using stage-2 table
  hw/arm/smmu-common: Add support for nested TLB
  hw/arm/smmu-common: Rework TLB lookup for nesting
  hw/arm/smmu-common: Support nested translation
  hw/arm/smmu: Support nesting in smmuv3_range_inval()
  hw/arm/smmu: Support nesting in the rest of commands
  hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()
  hw/arm/smmuv3: Support and advertise nesting
  hw/arm/smmuv3: Advertise S2FWB
  hw/arm/smmu: Refactor SMMU OAS
  hw/arm/smmuv3: Add property for OAS
  hw/arm/virt: Set SMMU OAS based on CPU PARANGE

 hw/arm/smmu-common.c | 298 ---
 hw/arm/smmuv3-internal.h |  22 +-
 hw/arm/smmuv3.c  | 441 +++
 hw/arm/trace-events  |  26 ++-
 hw/arm/virt.c|  14 +-
 include/hw/arm/smmu-common.h |  43 +++-
 include/hw/arm/smmuv3.h  |   1 +
 target/arm/cpu.h |   2 +
 target/arm/cpu64.c   |   5 +
 9 files changed, 625 insertions(+), 227 deletions(-)

-- 
2.44.0.769.g3c40516874-goog




[RFC PATCH v3 08/18] hw/arm/smmu-common: Add support for nested TLB

2024-04-28 Thread Mostafa Saleh
This patch adds support for nested(combined) TLB entries.
The main function combine_tlb() is not used here but in the next
patches, but to simplify the patches it is introduced first.

Main changes:
1) New entry added in the TLB, parent_perm, for nested TLB, holds the
   stage-2 permission, this can be used to know the origin of a
   permission fault from a cached entry as caching the “and” of the
   permissions loses this information.

   SMMUPTWEventInfo is used to hold information about PTW faults so
   the event can be populated, the value of stage (which maps to S2
   in the event) used to be set based on the current stage for TLB
   permission faults, however with the parent_perm, it is now set
   based on which perm has the missing permission

   When nesting is not enabled it has the same value as perm which
   doesn't change the logic.

2) As combined TLB implementation is used, the combination logic
   chooses:
   - tg and level from the entry which has the smallest addr_mask.
   - Based on that the iova that would be cached is recalculated.
   - Translated_addr is chosen from stage-2.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 32 
 include/hw/arm/smmu-common.h |  1 +
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 21982621c0..0d6945fa54 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -394,7 +394,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = iova & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = PTE_AP_TO_PERM(ap);
+tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -515,7 +515,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
 tlbe->entry.translated_addr = gpa;
 tlbe->entry.iova = ipa & ~mask;
 tlbe->entry.addr_mask = mask;
-tlbe->entry.perm = s2ap;
+tlbe->parent_perm = tlbe->entry.perm = s2ap;
 tlbe->level = level;
 tlbe->granule = granule_sz;
 return 0;
@@ -530,6 +530,27 @@ error:
 return -EINVAL;
 }
 
+/* combine 2 TLB entries and return in tlbe in nested config. */
+static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
+SMMUTLBEntry *tlbe_s2,
+dma_addr_t iova,
+SMMUTransCfg *cfg)
+{
+if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
+tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
+tlbe->granule = tlbe_s2->granule;
+tlbe->level = tlbe_s2->level;
+}
+
+tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
+tlbe->entry.translated_addr);
+
+tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
+/* parent_perm has s2 perm while perm has s1 perm. */
+tlbe->parent_perm = tlbe_s2->entry.perm;
+return;
+}
+
 /**
  * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
  *
@@ -607,9 +628,12 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg 
*cfg, dma_addr_t addr,
 
 cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
 if (cached_entry) {
-if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
+if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
+cached_entry->parent_perm & IOMMU_WO)) {
 info->type = SMMU_PTW_ERR_PERMISSION;
-info->stage = cfg->stage;
+info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
+  SMMU_STAGE_1 :
+  SMMU_STAGE_2;
 return NULL;
 }
 return cached_entry;
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 09d3b9e734..1db566d451 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -77,6 +77,7 @@ typedef struct SMMUTLBEntry {
 IOMMUTLBEntry entry;
 uint8_t level;
 uint8_t granule;
+IOMMUAccessFlags parent_perm;
 } SMMUTLBEntry;
 
 /* Stage-2 configuration. */
-- 
2.44.0.769.g3c40516874-goog




Re: [RFC PATCH v2 07/13] hw/arm/smmu: Support nesting in the rest of commands

2024-04-19 Thread Mostafa Saleh
Hi Eric,

On Thu, Apr 18, 2024 at 04:48:39PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/8/24 16:08, Mostafa Saleh wrote:
> > Some commands need rework for nesting, as they used to assume S1
> > and S2 are mutually exclusive:
> >
> > - CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
> > - CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
> >   invalidate everything, this required a new vmid invalidation
> >   function for stage-1 only (ASID >= 0)
> >
> > Also, rework trace events to reflect the new implementation.
> 
> This does not apply for me. Could you share a branch or respin?

Oh, Sorry about that. I will address the previous comments and respin.

Thanks,
Mostafa

> 
> Thank you in advance
> 
> Eric
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 36 +---
> >  hw/arm/smmuv3.c  | 31 +--
> >  hw/arm/trace-events  |  6 --
> >  include/hw/arm/smmu-common.h |  3 ++-
> >  4 files changed, 64 insertions(+), 12 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 8b9e59b24b..b1cf1303c6 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -148,13 +148,14 @@ void smmu_iotlb_inv_all(SMMUState *s)
> >  g_hash_table_remove_all(s->iotlb);
> >  }
> >  
> > -static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
> > - gpointer user_data)
> > +static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
> > +  gpointer user_data)
> >  {
> > -int asid = *(int *)user_data;
> > +SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
> >  SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> >  
> > -return SMMU_IOTLB_ASID(*iotlb_key) == asid;
> > +return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
> > +   (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
> >  }
> >  
> >  static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
> > @@ -166,6 +167,16 @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, 
> > gpointer value,
> >  return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
> >  }
> >  
> > +static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
> > +gpointer user_data)
> > +{
> > +int vmid = *(int *)user_data;
> > +SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
> > +
> > +return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
> > +   (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
> > +}
> > +
> >  static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer 
> > value,
> >gpointer user_data)
> >  {
> > @@ -259,10 +270,15 @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, 
> > dma_addr_t ipa, uint8_t tg,
> >  &info);
> >  }
> >  
> > -void smmu_iotlb_inv_asid(SMMUState *s, int asid)
> > +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
> >  {
> > -trace_smmu_iotlb_inv_asid(asid);
> > -g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
> > +SMMUIOTLBPageInvInfo info = {
> > +.asid = asid,
> > +.vmid = vmid,
> > +};
> > +
> > +trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
> > +g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, 
> > &info);
> >  }
> >  
> >  inline void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
> > @@ -271,6 +287,12 @@ inline void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
> >  g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
> >  }
> >  
> > +inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
> > +{
> > +trace_smmu_iotlb_inv_vmid_s1(vmid);
> > +g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, 
> > &vmid);
> > +}
> > +
> >  /* VMSAv8-64 Translation */
> >  
> >  /**
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 17bbd43c13..ece647b8bf 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1280,25 +1280,52 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> >  case SMMU_CMD_TLBI_NH_ASID:
&g

Re: [RFC PATCH v2 05/13] hw/arm/smmu-common: Support nested translation

2024-04-19 Thread Mostafa Saleh
Hi Eric,

On Thu, Apr 18, 2024 at 03:54:01PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/8/24 16:08, Mostafa Saleh wrote:
> > When nested translation is requested, do the following:
> >
> > - Translate stage-1 IPA using stage-2 to a physical address.
> > - Translate stage-1 PTW walks using stage-2.
> > - Combine both to create a single TLB entry, for that we choose
> >   the smallest entry to cache, which means that if the smallest
> >   entry comes from stage-2, and stage-2 use different granule,
> >   TLB lookup for stage-1 (in nested config) will always miss.
> >   Lookup logic is modified for nesting to lookup using stage-2
> >   granule if stage-1 granule missed and they are different.
> >
> > Also, add more visibility in trace points, to make it easier to debug.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmu-common.c | 153 ---
> >  hw/arm/trace-events  |   6 +-
> >  include/hw/arm/smmu-common.h |   3 +-
> >  3 files changed, 131 insertions(+), 31 deletions(-)
> >
> > diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> > index 771b9c79a3..2cf27b490b 100644
> > --- a/hw/arm/smmu-common.c
> > +++ b/hw/arm/smmu-common.c
> > @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, 
> > uint64_t iova,
> >  return key;
> >  }
> >  
> > -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > -SMMUTransTableInfo *tt, hwaddr iova)
> > +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> > +  SMMUTransCfg *cfg,
> > +  SMMUTransTableInfo *tt,
> > +  hwaddr iova)
> this helper can be introduced in a separate patch to ease the code review

Will do.

> >  {
> >  uint8_t tg = (tt->granule_sz - 10) / 2;
> >  uint8_t inputsize = 64 - tt->tsz;
> > @@ -88,10 +90,29 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, 
> > SMMUTransCfg *cfg,
> >  }
> >  level++;
> >  }
> > +return entry;
> > +}
> > +
> > +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> > +SMMUTransTableInfo *tt, hwaddr iova)
> > +{
> > +SMMUTLBEntry *entry = NULL;
> > +
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> > +/*
> > + * For nested translation also use the s2 granule, as the TLB will 
> > insert
> > + * the smallest of both, so the entry can be cached with the s2 
> > granule.
> > + */
> > +if (!entry && (cfg->stage == SMMU_NESTED) &&
> > +(cfg->s2cfg.granule_sz != tt->granule_sz)) {
> > +tt->granule_sz = cfg->s2cfg.granule_sz;
> > +entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> this is also the kind of stuff that can be introduced and reviewed
> separately without tkaing any risk until NESTED is not support. In this
> new patch you could also document the TLB strategy.

Will do.

> > +}
> >  
> >  if (entry) {
> >  cfg->iotlb_hits++;
> >  trace_smmu_iotlb_lookup_hit(cfg->asid, cfg->s2cfg.vmid, iova,
> > +entry->entry.addr_mask,
> can be moved to a separate fix. same for the trace point changes

Will do.

> >  cfg->iotlb_hits, cfg->iotlb_misses,
> >  100 * cfg->iotlb_hits /
> >  (cfg->iotlb_hits + cfg->iotlb_misses));
> > @@ -117,7 +138,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg 
> > *cfg, SMMUTLBEntry *new)
> >  *key = smmu_get_iotlb_key(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> >tg, new->level);
> >  trace_smmu_iotlb_insert(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> > -tg, new->level);
> > +tg, new->level, new->entry.translated_addr);
> >  g_hash_table_insert(bs->iotlb, key, new);
> >  }
> >  
> > @@ -286,6 +307,27 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> > dma_addr_t iova)
> >  return NULL;
> >  }
> >  
> > +/* Return the correct table address based on configuration. */
> does the S2 translation for a PTE table?

The intention was to abstract that, 

Re: [RFC PATCH v2 04/13] hw/arm/smmuv3: Translate CD and TT using stage-2 table

2024-04-19 Thread Mostafa Saleh
Hi Eric,

On Thu, Apr 18, 2024 at 02:51:59PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/8/24 16:08, Mostafa Saleh wrote:
> > According to the user manual (ARM IHI 0070 F.b),
> s/user manual/ARM SMMU architecture specification
> > In "5.2 Stream Table Entry":
> >  [51:6] S1ContextPtr
> >  If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
> >  stage 2 and the programmed value must be within the range of the IAS.
> >
> > In "5.4.1 CD notes":
> >  The translation table walks performed from TTB0 or TTB1 are always 
> > performed
> >  in IPA space if stage 2 translations are enabled.
> >
> > So translate both the CD and the TTBx in this patch if nested
> translate the S1 context descriptor pointer and TTBx base addresses
> through the S2 stage (IPA -> PA)
> 
> You may describe what you put in place to do the translation in the
> commit msg, new functions, macro, ...

Will do.

> > translation is requested.
> >
> > Signed-off-by: Mostafa Saleh 
> > ---
> >  hw/arm/smmuv3.c  | 49 ++--
> >  include/hw/arm/smmu-common.h | 17 +
> >  2 files changed, 59 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 897f8fe085..a7cf543acc 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -337,14 +337,36 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t 
> > addr, STE *buf,
> >  
> >  }
> >  
> > +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr 
> > addr,
> > + SMMUTransCfg *cfg,
> > + SMMUEventInfo *event,
> > + IOMMUAccessFlags flag,
> > + SMMUTLBEntry **out_entry);
> >  /* @ssid > 0 not supported yet */
> > -static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
> > -   CD *buf, SMMUEventInfo *event)
> > +static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
> > +   uint32_t ssid, CD *buf, SMMUEventInfo *event)
> >  {
> >  dma_addr_t addr = STE_CTXPTR(ste);
> >  int ret, i;
> > +SMMUTranslationStatus status;
> > +SMMUTLBEntry *entry;
> >  
> >  trace_smmuv3_get_cd(addr);
> > +
> > +if (cfg->stage == SMMU_NESTED) {
> > +CALL_FUNC_CFG_S2(cfg, status, smmuv3_do_translate, s, addr,
> > + cfg, event, IOMMU_RO, &entry);
> the fact we pass 2 times cfg looks pretty weird from a caller pov. See
> my comment below.

Yes, I don’t like it also as I mentioned in the cover letter,
(see me comment below also)

> 
> do we somewhere check addr is within the proper addr range, IAS if S2,
> OAS if S1. This was missing for S1 but I think it is worth improving now.
> see 3.4.3
Yes, this was added in the next patch.

> > +/*
> > + * It is not clear what should happen if this fails, so we return 
> > here
> > + * which gets propagated as a translation error.
> but the error event might be different, no?

But the event is passed to the translate function so the right translation
error will be in the event (addr size, permission…).
It isn't clear to me from the specs if this should be a translation error
or some F_CD_FETCH/C_BAD_CD, and hence the comment, but I though the
translation error info would be more useful for SW that's why I used it.

> > + */
> > +if (status != SMMU_TRANS_SUCCESS) {
> > +return -EINVAL;
> > +}
> > +
> > +addr = CACHED_ENTRY_TO_ADDR(entry, addr);
> > +}
> > +
> >  /* TODO: guarantee 64-bit single-copy atomicity */
> >  ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf),
> >MEMTXATTRS_UNSPECIFIED);
> > @@ -659,10 +681,13 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t 
> > sid, STE *ste,
> >  return 0;
> >  }
> >  
> > -static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
> > +static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
> > + CD *cd, SMMUEventInfo *event)
> >  {
> >  int ret = -EINVAL;
> >  int i;
> > +SMMUTranslationStatus status;
> > +SMMUTLBEntry *entry;
> >  
> >  if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
> >  goto bad_cd;
> > @@ -713,6 +738,17 @@ static int 

Re: [RFC PATCH v2 00/13] SMMUv3 nested translation support

2024-04-19 Thread Mostafa Saleh
Hi Eric,

On Thu, Apr 18, 2024 at 08:11:06PM +0200, Eric Auger wrote:
> Hi Mostafa,
> 
> On 4/8/24 16:08, Mostafa Saleh wrote:
> > Currently, QEMU supports emulating either stage-1 or stage-2 SMMUs
> > but not nested instances.
> > This patch series adds support for nested translation in SMMUv3,
> > this is controlled by property “arm-smmuv3.stage=nested”, and
> > advertised to guests as (IDR0.S1P == 1 && IDR0.S2P == 2)
> >
> > Main changes(architecture):
> > 
> > 1) CDs are considered IPA and translated with stage-2.
> > 2) TTBx and tables for stage-1 are considered IPA and translated
> >with stage-2.
> > 3) Translate the IPA address with stage-2.
> >
> > TLBs:
> > ==
> > TLBs are the most tricky part.
> >
> > 1) General design
> >Unified(Combined) design is used, where entries with ASID=-1 are
> >IPAs(cached from stage-2 config)
> >
> >TLBs are also modified to cache 2 permissions, a new permission added
> >"parent_perm."
> >
> >For non-nested configuration, perm == parent_perm and nothing
> >changes. This is used to know which stage to use in case there is
> >a permission fault from a TLB entry.
> >
> > 2) Caching in TLB
> >Stage-1 and stage-2 are inserted in the TLB as is.
> >For nested translation, both entries are combined into one TLB
> >entry. The size (level and granule) are chosen from the smallest entries.
> >That means that a stage-1 translation can be cached with sage-2
> >granule in key, this is take into account lookup.
> is that a correct understanding that with the current implementation, in
> nested mode, you end up with combined S1 + S2 entries (IOVA -> PA) and
> S2 entries (IPA -> PA)?
Yes, that’s correct.

> Out of cusiosity, how did you end up with that choice? Have you made
> some perf assessment compared to separate S1 and S2 entries? I guess it
> is a complex topic and choice.
> 

I didn’t do any perf, but from my simplistic understanding, combined
TLBs should be faster as they use only one look up for full translation,
also I guess having a single TLB would be better for HW area.
(However my knowledge in this “area” is almost null)
Although in SW, we don’t have tough memory constraints and having more
(or separate) TLBs isn’t a problem.

While implementing this, at some point I thought it’s getting too
complicated and a separate one might have been better, but the
grass is always greener on the other side, and I believe it would
also have its challenges.

One other thing I like about combined TLBs (which I am not sure is
important for qemu) is that it is more relaxed which means it would
catch more SW bugs. For example if the SW only changes an IPA and
only invalidates by IPA, it would have issues with combined TLBs.

I am open to try other designs if you have something else in mind.

Thanks,
Mostafa


> Thanks
> 
> Eric
> >
> > 3) TLB Lookup
> >TLB lookup already uses ASID in key, so it can distinguish between
> >stage-1 and stage-2.
> >And as mentioned above, the granule for stage-1 can be different,
> >If stage-1 lookup failed, we try again with the stage-2 granule.
> >
> > 4) TLB invalidation
> >- Address invalidation is split, for IOVA(CMD_TLBI_NH_VA
> >  /CMD_TLBI_NH_VAA) and IPA(CMD_TLBI_S2_IPA) based on ASID value
> >- CMD_TLBI_NH_ASID/CMD_TLBI_NH_ALL: Consider VMID if stage-2 is
> >  supported, and invalidate stage-1 only by VMIDs
> >
> > As far as I understand, this is compliant with the ARM architecture:
> > - ARM ARM DDI 0487J.a: RLGSCG, RTVTYQ, RGNJPZ
> > - ARM IHI 0070F.b: 16.2 Caching
> >
> > An alternative approach would be to instantiate 2 TLBs, one per each
> > stage. I haven’t investigated that.
> >
> > Others
> > ===
> > - Advertise SMMUv3.2-S2FWB, it is NOP for QEMU as it doesn’t support
> >   attributes.
> >
> > - OAS: A typical setup with nesting is to share CPU stage-2 with the
> >   SMMU, and according to the user manual, SMMU OAS must match the
> >   system physical address.
> >
> >   This was discussed before in
> >   https://lore.kernel.org/all/20230226220650.1480786-11-smost...@google.com/
> >   The implementation here, follows the discussion, where migration is
> >   added and oas is set up from the board (virt). However, the OAS is
> >   chosen based on the CPU PARANGE as there is no fixed one.
> >
> > - For nested configuration, IOVA notifier only notifies for stage-1
> >   invalidations (as far as I understand this is the intended
&

[RFC PATCH v2 06/13] hw/arm/smmu: Support nesting in smmuv3_range_inval()

2024-04-08 Thread Mostafa Saleh
With nesting, we would need to invalidate IPAs without
over-invalidating stage-1 IOVAs. This can be done by
distinguishing IPAs in the TLBs by having ASID=-1.
To achieve that, rework the invalidation for IPAs to have a
separate function, while for IOVA invalidation ASID=-1 means
invalidate for all ASIDs.

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c | 47 
 hw/arm/smmuv3.c  | 23 --
 hw/arm/trace-events  |  2 +-
 include/hw/arm/smmu-common.h |  3 ++-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 2cf27b490b..8b9e59b24b 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -184,6 +184,25 @@ static gboolean 
smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
((entry->iova & ~info->mask) == info->iova);
 }
 
+static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
+ gpointer user_data)
+{
+SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
+IOMMUTLBEntry *entry = &iter->entry;
+SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
+SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
+
+/* This is a stage-1 address. */
+if (info->asid >= 0) {
+return false;
+}
+if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
+return false;
+}
+return ((info->iova & ~entry->addr_mask) == entry->iova) ||
+   ((entry->iova & ~info->mask) == info->iova);
+}
+
 void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
  uint8_t tg, uint64_t num_pages, uint8_t ttl)
 {
@@ -212,6 +231,34 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, 
dma_addr_t iova,
 &info);
 }
 
+/*
+ * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
+ * in Stage-1 invalidation ASID = -1, means don't care.
+ */
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
+uint64_t num_pages, uint8_t ttl)
+{
+uint8_t granule = tg ? tg * 2 + 10 : 12;
+int asid = -1;
+
+   if (ttl && (num_pages == 1)) {
+SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
+
+if (g_hash_table_remove(s->iotlb, &key)) {
+return;
+}
+}
+
+SMMUIOTLBPageInvInfo info = {
+.iova = ipa,
+.vmid = vmid,
+.mask = (num_pages * 1 << granule) - 1};
+
+g_hash_table_foreach_remove(s->iotlb,
+smmu_hash_remove_by_vmid_ipa,
+&info);
+}
+
 void smmu_iotlb_inv_asid(SMMUState *s, int asid)
 {
 trace_smmu_iotlb_inv_asid(asid);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index a7cf543acc..17bbd43c13 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1095,7 +1095,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
asid, int vmid,
 }
 }
 
-static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
+static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
 {
 dma_addr_t end, addr = CMD_ADDR(cmd);
 uint8_t type = CMD_TYPE(cmd);
@@ -1120,9 +1120,13 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 }
 
 if (!tg) {
-trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl);
+}
 return;
 }
 
@@ -1138,9 +1142,14 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
 uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
 
 num_pages = (mask + 1) >> granule;
-trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
+ ttl, leaf, stage);
 smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
-smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+if (stage == SMMU_STAGE_1) {
+smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
+} else {
+smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl);
+}
 addr += mask + 1;
 }
 }
@@ -1299,7 +1308,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 cmd_error = SMMU_CERROR_ILL;
 break;
 }
-smmuv3_range_inval(bs, &cmd);
+smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
 bre

[RFC PATCH v2 09/13] hw/arm/smmuv3: Support and advertise nesting

2024-04-08 Thread Mostafa Saleh
Everything is in place, add the last missing bits:
- Handle fault checking according to the actual PTW event and not the
  the translation stage.
- Consolidate parsing of STE cfg and setting translation stage.

Advertise nesting if stage requested is "nested".

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmuv3.c | 50 +
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 85b3ac6a9c..da47411410 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,9 +34,10 @@
 #include "smmuv3-internal.h"
 #include "smmu-internal.h"
 
-#define PTW_RECORD_FAULT(cfg)   (((cfg)->stage == SMMU_STAGE_1) ? \
- (cfg)->record_faults : \
- (cfg)->s2cfg.record_faults)
+#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \
+(cfg)->record_faults) || \
+((ptw_info).stage == SMMU_STAGE_2 && \
+(cfg)->s2cfg.record_faults))
 
 /**
  * smmuv3_trigger_irq - pulse @irq if enabled and update
@@ -260,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
 /* Based on sys property, the stages supported in smmu will be 
advertised.*/
 if (s->stage && !strcmp("2", s->stage)) {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
+} else if (s->stage && !strcmp("nested", s->stage)) {
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
+s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
 } else {
 s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
 }
@@ -425,8 +429,6 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 
 static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 {
-cfg->stage = SMMU_STAGE_2;
-
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -509,6 +511,27 @@ bad_ste:
 return -EINVAL;
 }
 
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
+{
+
+if (STE_CFG_ABORT(config)) {
+cfg->aborted = true;
+return;
+}
+if (STE_CFG_BYPASS(config)) {
+cfg->bypassed = true;
+return;
+}
+
+if (STE_CFG_S1_ENABLED(config)) {
+cfg->stage = SMMU_STAGE_1;
+}
+
+if (STE_CFG_S2_ENABLED(config)) {
+cfg->stage |= SMMU_STAGE_2;
+}
+}
+
 /* Returns < 0 in case of invalid STE, 0 otherwise */
 static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
@@ -525,13 +548,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
 
 config = STE_CONFIG(ste);
 
-if (STE_CFG_ABORT(config)) {
-cfg->aborted = true;
-return 0;
-}
+decode_ste_config(cfg, config);
 
-if (STE_CFG_BYPASS(config)) {
-cfg->bypassed = true;
+if (cfg->aborted || cfg->bypassed) {
 return 0;
 }
 
@@ -704,7 +723,6 @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
 
 /* we support only those at the moment */
 cfg->aa64 = true;
-cfg->stage = SMMU_STAGE_1;
 
 cfg->oas = oas2bits(CD_IPS(cd));
 cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
@@ -887,28 +905,28 @@ static SMMUTranslationStatus 
smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
 event->u.f_walk_eabt.addr2 = ptw_info.addr;
 break;
 case SMMU_PTW_ERR_TRANSLATION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_TRANSLATION;
 event->u.f_translation.addr = addr;
 event->u.f_translation.rnw = flag & 0x1;
 }
 break;
 case SMMU_PTW_ERR_ADDR_SIZE:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ADDR_SIZE;
 event->u.f_addr_size.addr = addr;
 event->u.f_addr_size.rnw = flag & 0x1;
 }
 break;
 case SMMU_PTW_ERR_ACCESS:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_ACCESS;
 event->u.f_access.addr = addr;
 event->u.f_access.rnw = flag & 0x1;
 }
 break;
 case SMMU_PTW_ERR_PERMISSION:
-if (PTW_RECORD_FAULT(cfg)) {
+if (PTW_RECORD_FAULT(ptw_info, cfg)) {
 event->type = SMMU_EVT_F_PERMISSION;
 event->u.f_permission.addr = addr;
 event->u.f_permission.rnw = flag & 0x1;
-- 
2.44.0.478.gd926399ef9-goog




[RFC PATCH v2 11/13] hw/arm/smmu: Refactor SMMU OAS

2024-04-08 Thread Mostafa Saleh
SMMUv3 OAS is hardcoded to 44 bits, for nested configurations that
can be a problem as stage-2 might be shared with the CPU which might
have different PARANGE, and according to SMMU manual ARM IHI 0070F.b:
6.3.6 SMMU_IDR5, OAS must match the system physical address size.

This patch doesn't change the SMMU OAS, but refactors the code to
make it easier to do that:
- Rely everywhere on IDR5 for reading OAS instead of using the macro so
  it is easier just change IDR5 and it propagages correctly.
- Remove unused functions/macros: pa_range/MAX_PA

Signed-off-by: Mostafa Saleh 
---
 hw/arm/smmu-common.c |  7 ---
 hw/arm/smmuv3-internal.h | 13 -
 hw/arm/smmuv3.c  | 35 ---
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index b1cf1303c6..0710ee6b7d 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -430,7 +430,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
 inputsize = 64 - tt->tsz;
 level = 4 - (inputsize - 4) / stride;
 indexmask = VMSA_IDXMSK(inputsize, stride, level);
-baseaddr = extract64(tt->ttb, 0, 48);
+
+baseaddr = extract64(tt->ttb, 0, cfg->oas);
 baseaddr &= ~indexmask;
 
 while (level < VMSA_LEVELS) {
@@ -543,8 +544,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
  * Get the ttb from concatenated structure.
  * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
  */
-uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
-  idx * sizeof(uint64_t);
+uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) +
+  (1 << stride) * idx * sizeof(uint64_t);
 dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level);
 
 baseaddr &= ~indexmask;
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index e4dd11e1e6..b0d7ad6da3 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -596,19 +596,6 @@ static inline int oas2bits(int oas_field)
 return -1;
 }
 
-static inline int pa_range(STE *ste)
-{
-int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS);
-
-if (!STE_S2AA64(ste)) {
-return 40;
-}
-
-return oas2bits(oas_field);
-}
-
-#define MAX_PA(ste) ((1 << pa_range(ste)) - 1)
-
 /* CD fields */
 
 #define CD_VALID(x)   extract32((x)->word[0], 31, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 0e367c70ad..c377c05379 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -411,10 +411,10 @@ static bool s2t0sz_valid(SMMUTransCfg *cfg)
 }
 
 if (cfg->s2cfg.granule_sz == 16) {
-return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS));
+return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps);
 }
 
-return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16));
+return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16));
 }
 
 /*
@@ -435,8 +435,11 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t 
t0sz, uint8_t gran)
 return nr_concat <= VMSA_MAX_S2_CONCAT;
 }
 
-static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
+static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
+ STE *ste)
 {
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
+
 if (STE_S2AA64(ste) == 0x0) {
 qemu_log_mask(LOG_UNIMP,
   "SMMUv3 AArch32 tables not supported\n");
@@ -469,7 +472,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
 }
 
 /* For AA64, The effective S2PS size is capped to the OAS. */
-cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
+cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
+/*
+ * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input
+ * range is further limited to 48 bits unless STE.S2TG indicates a
+ * 64KB granule.
+ */
+if (cfg->s2cfg.granule_sz != 16) {
+cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48);
+}
 /*
  * It is ILLEGAL for the address in S2TTB to be outside the range
  * described by the effective S2PS value.
@@ -545,6 +556,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
   STE *ste, SMMUEventInfo *event)
 {
 uint32_t config;
+uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
 int ret;
 
 if (!STE_VALID(ste)) {
@@ -588,8 +600,8 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
  * Stage-1 OAS defaults to OAS even if not enabled as it would be used
  * in input address check for stage-2.
  */
-cfg->oas = oas2bits(SMMU_IDR5_OAS);
-ret = decode_ste_s2_cfg(cfg, ste);
+cfg->oas = oas2bits(oas);
+ret = decode_ste_s2_cfg(s, cfg, ste);
 if (ret) {
 goto bad_ste;
 

  1   2   3   >