Re: [PATCH v3 07/13] iommu/arm-smmu-v3: Add support for Substream IDs

2019-12-18 Thread Auger Eric
Hi jean,

On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote:
> At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
> page directory per device. However SMMUv3 allows more than one address
> space for some devices, by providing multiple stage-1 page directories. In
> addition to the Stream ID (SID), that identifies a device, we can now have
> Substream IDs (SSID) identifying an address space. In PCIe, SID is called
> Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
> A complete stage-1 walk goes through the context descriptor table:
> 
>   Stream tables   Ctx. Desc. tables   Page tables
> ++   ,--->+---+   ,--->+---+
> ::   |:   :   |:   :
> ++   |+---+   |+---+
>SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
> +++---++---+
> :::   ::   :
> +++---++---+
> 
> Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table
> entries. To keep things simple we only implement one level of context
> descriptor tables here, but as with stream and page tables, an SSID can
> be split to index multiple levels of tables.
> 
> Reviewed-by: Jonathan Cameron 
> Signed-off-by: Jean-Philippe Brucker 
Reviewed-by: Eric Auger 

Thanks

Eric

> ---
>  drivers/iommu/arm-smmu-v3.c | 125 +---
>  1 file changed, 102 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 43d6a7ded6e4..a01071123c34 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -227,6 +227,11 @@
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> +#define STRTAB_STE_1_S1DSS   GENMASK_ULL(1, 0)
> +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
> +#define STRTAB_STE_1_S1DSS_BYPASS0x1
> +#define STRTAB_STE_1_S1DSS_SSID0 0x2
> +
>  #define STRTAB_STE_1_S1C_CACHE_NC0UL
>  #define STRTAB_STE_1_S1C_CACHE_WBRA  1UL
>  #define STRTAB_STE_1_S1C_CACHE_WT2UL
> @@ -329,6 +334,7 @@
>  #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
>  #define CMDQ_PREFETCH_1_ADDR_MASKGENMASK_ULL(63, 12)
>  
> +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
>  #define CMDQ_CFGI_0_SID  GENMASK_ULL(63, 32)
>  #define CMDQ_CFGI_1_LEAF (1UL << 0)
>  #define CMDQ_CFGI_1_RANGEGENMASK_ULL(4, 0)
> @@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent {
>  
>   #define CMDQ_OP_CFGI_STE0x3
>   #define CMDQ_OP_CFGI_ALL0x4
> + #define CMDQ_OP_CFGI_CD 0x5
> + #define CMDQ_OP_CFGI_CD_ALL 0x6
>   struct {
>   u32 sid;
> + u32 ssid;
>   union {
>   boolleaf;
>   u8  span;
> @@ -568,6 +577,7 @@ struct arm_smmu_cd_table {
>  struct arm_smmu_s1_cfg {
>   struct arm_smmu_cd_tabletable;
>   struct arm_smmu_ctx_desccd;
> + u8  s1fmt;
>   u8  s1cdmax;
>  };
>  
> @@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
> arm_smmu_cmdq_ent *ent)
>   cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
>   cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
>   break;
> + case CMDQ_OP_CFGI_CD:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
> + /* Fallthrough */
>   case CMDQ_OP_CFGI_STE:
>   cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
>   break;
> + case CMDQ_OP_CFGI_CD_ALL:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
> + break;
>   case CMDQ_OP_CFGI_ALL:
>   /* Cover the entire SID range */
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
> @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct 
> arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> +  int ssid, bool leaf)
> +{
> + size_t i;
> + unsigned long flags;
> + struct arm_smmu_master *master;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_cmdq_ent cmd = {
> + .opcode = CMDQ_OP_CFGI_CD,
> + .cfgi   = {
> + .ssid   = ssid,
> + .leaf   = leaf,
> + },
> + };
> +
> + 

Re: [PATCH v3 07/13] iommu/arm-smmu-v3: Add support for Substream IDs

2019-12-18 Thread Auger Eric
Hi Jean,

On 12/18/19 5:07 PM, Jean-Philippe Brucker wrote:
> On Tue, Dec 17, 2019 at 05:43:59PM +0100, Auger Eric wrote:
>>> -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
>>> -   struct arm_smmu_s1_cfg *cfg)
>>> +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>>> +  int ssid, struct arm_smmu_ctx_desc *cd)
>>>  {
>>> -   u64 val;
>>> -   __le64 *cdptr = cfg->table.ptr;
>>> -
>>> /*
>>> -* We don't need to issue any invalidation here, as we'll invalidate
>>> -* the STE when installing the new entry anyway.
>>> +* This function handles the following cases:
>>> +*
>>> +* (1) Install primary CD, for normal DMA traffic (SSID = 0).
>>> +* (2) Install a secondary CD, for SID+SSID traffic.
>>> +* (3) Update ASID of a CD. Atomically write the first 64 bits of the
>>> +* CD, then invalidate the old entry and mappings.
>>> +* (4) Remove a secondary CD.
>> I see arm_smmu_write_ctx_desc getting called with non null cd in
>> arm_smmu_attach_dev but I do not see it removed in the detach path?
> 
> No we don't have to remove the primary CD, since detach clears the STE and
> frees the CD tables.
OK
> 
>>>  */
>>> -   val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
>>> -#ifdef __BIG_ENDIAN
>>> - CTXDESC_CD_0_ENDI |
>>> -#endif
>>> - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
>>> - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
>>> - CTXDESC_CD_0_V;
>>> +   u64 val;
>>> +   bool cd_live;
>>> +   struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> +   __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
>>> +   CTXDESC_CD_DWORDS;>
>>> -   /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
>>> -   if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
>>> -   val |= CTXDESC_CD_0_S;
>>> +   val = le64_to_cpu(cdptr[0]);
>>> +   cd_live = !!(val & CTXDESC_CD_0_V);
>>>  
>>> -   cdptr[0] = cpu_to_le64(val);
>>> +   if (!cd) { /* (4) */
>>> +   val = 0;
>>> +   } else if (cd_live) { /* (3) */
>>> +   val &= ~CTXDESC_CD_0_ASID;
>>> +   val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
>>> +   /*
>>> +* Until CD+TLB invalidation, both ASIDs may be used for tagging
>>> +* this substream's traffic
>>> +*/
>>> +   } else { /* (1) and (2) */
>>> +   cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
>>> +   cdptr[2] = 0;
>>> +   cdptr[3] = cpu_to_le64(cd->mair);
>>> +
>>> +   /*
>>> +* STE is live, and the SMMU might fetch this CD at any
>>> +* time. Ensure that it observes the rest of the CD before we
>>> +* enable it.
>> Mostly, on't you want the invalid state to be seen?
Sorry I wanted to say you mostly want to enforce the INVALID bit (what I
understand from your below explanations). I was confused by the wording
"Ensure that it observes the rest of the CD before we enable it".
> 
> Sorry I didn't get your question. Without the sync, the SMMU could read an
> invalid address in CD[1], read V=1 in CD[0] and proceed with dereferencing
> the bogus pointer before seeing our update to CD[1].
> 
> To prevent this we follow the update procedure described in IHI0070Ca
> 3.21.3.1 (Configuration structure update procedure):
> 
> Because the SMMU can read any reachable structure at any time, and is not
> required to read the double-words of the structure in order, Arm
> recommends that the following procedure is used to initialize structures:
> 1. Structure starts invalid, having V == 0.
> 2. Fill in all fields, leaving V == 0, then perform a DSB operation to
>ensure written data is observable from the SMMU.
> 3. Issue a CMD_CFGI_, as appropriate.
> 4. Issue a CMD_SYNC, and wait for completion.
> 5. Set V to 1, then perform a DSB operation to ensure write is
>observable by the SMMU.
> 6. Issue CMD_CFGI_, as appropriate.
> 7. Optionally issue a CMD_SYNC, and wait for completion. This must be
>done if a subsequent software operation, such as enabling device
>DMA, depends on the SMMU using the new structure.
> 
>>> @@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct 
>>> arm_smmu_master *master, u32 sid,
>>> if (s1_cfg) {
>>> BUG_ON(ste_live);
>>> dst[1] = cpu_to_le64(
>>> +FIELD_PREP(STRTAB_STE_1_S1DSS, 
>>> STRTAB_STE_1_S1DSS_SSID0) |
>>>  FIELD_PREP(STRTAB_STE_1_S1CIR, 
>>> STRTAB_STE_1_S1C_CACHE_WBRA) |
>>>  FIELD_PREP(STRTAB_STE_1_S1COR, 
>>> STRTAB_STE_1_S1C_CACHE_WBRA) |
>>>  FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
>>> @@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct 
>>> arm_smmu_master *master, u32 sid,
>>> dst[1] |= 

Re: [PATCH v3 07/13] iommu/arm-smmu-v3: Add support for Substream IDs

2019-12-18 Thread Jean-Philippe Brucker
On Tue, Dec 17, 2019 at 05:43:59PM +0100, Auger Eric wrote:
> > -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
> > -   struct arm_smmu_s1_cfg *cfg)
> > +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
> > +  int ssid, struct arm_smmu_ctx_desc *cd)
> >  {
> > -   u64 val;
> > -   __le64 *cdptr = cfg->table.ptr;
> > -
> > /*
> > -* We don't need to issue any invalidation here, as we'll invalidate
> > -* the STE when installing the new entry anyway.
> > +* This function handles the following cases:
> > +*
> > +* (1) Install primary CD, for normal DMA traffic (SSID = 0).
> > +* (2) Install a secondary CD, for SID+SSID traffic.
> > +* (3) Update ASID of a CD. Atomically write the first 64 bits of the
> > +* CD, then invalidate the old entry and mappings.
> > +* (4) Remove a secondary CD.
> I see arm_smmu_write_ctx_desc getting called with non null cd in
> arm_smmu_attach_dev but I do not see it removed in the detach path?

No we don't have to remove the primary CD, since detach clears the STE and
frees the CD tables.

> >  */
> > -   val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
> > -#ifdef __BIG_ENDIAN
> > - CTXDESC_CD_0_ENDI |
> > -#endif
> > - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
> > - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
> > - CTXDESC_CD_0_V;
> > +   u64 val;
> > +   bool cd_live;
> > +   struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +   __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *
> > +   CTXDESC_CD_DWORDS;>
> > -   /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> > -   if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> > -   val |= CTXDESC_CD_0_S;
> > +   val = le64_to_cpu(cdptr[0]);
> > +   cd_live = !!(val & CTXDESC_CD_0_V);
> >  
> > -   cdptr[0] = cpu_to_le64(val);
> > +   if (!cd) { /* (4) */
> > +   val = 0;
> > +   } else if (cd_live) { /* (3) */
> > +   val &= ~CTXDESC_CD_0_ASID;
> > +   val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
> > +   /*
> > +* Until CD+TLB invalidation, both ASIDs may be used for tagging
> > +* this substream's traffic
> > +*/
> > +   } else { /* (1) and (2) */
> > +   cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
> > +   cdptr[2] = 0;
> > +   cdptr[3] = cpu_to_le64(cd->mair);
> > +
> > +   /*
> > +* STE is live, and the SMMU might fetch this CD at any
> > +* time. Ensure that it observes the rest of the CD before we
> > +* enable it.
> Mostly, on't you want the invalid state to be seen?

Sorry I didn't get your question. Without the sync, the SMMU could read an
invalid address in CD[1], read V=1 in CD[0] and proceed with dereferencing
the bogus pointer before seeing our update to CD[1].

To prevent this we follow the update procedure described in IHI0070Ca
3.21.3.1 (Configuration structure update procedure):

Because the SMMU can read any reachable structure at any time, and is not
required to read the double-words of the structure in order, Arm
recommends that the following procedure is used to initialize structures:
1. Structure starts invalid, having V == 0.
2. Fill in all fields, leaving V == 0, then perform a DSB operation to
   ensure written data is observable from the SMMU.
3. Issue a CMD_CFGI_, as appropriate.
4. Issue a CMD_SYNC, and wait for completion.
5. Set V to 1, then perform a DSB operation to ensure write is
   observable by the SMMU.
6. Issue CMD_CFGI_, as appropriate.
7. Optionally issue a CMD_SYNC, and wait for completion. This must be
   done if a subsequent software operation, such as enabling device
   DMA, depends on the SMMU using the new structure.

> > @@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct 
> > arm_smmu_master *master, u32 sid,
> > if (s1_cfg) {
> > BUG_ON(ste_live);
> > dst[1] = cpu_to_le64(
> > +FIELD_PREP(STRTAB_STE_1_S1DSS, 
> > STRTAB_STE_1_S1DSS_SSID0) |
> >  FIELD_PREP(STRTAB_STE_1_S1CIR, 
> > STRTAB_STE_1_S1C_CACHE_WBRA) |
> >  FIELD_PREP(STRTAB_STE_1_S1COR, 
> > STRTAB_STE_1_S1C_CACHE_WBRA) |
> >  FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
> > @@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct 
> > arm_smmu_master *master, u32 sid,
> > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> >  
> > val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > -   FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
> > +   FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) 
> > |
> > +   FIELD_PREP(STRTAB_STE_0_S1CDMAX, 

Re: [PATCH v3 07/13] iommu/arm-smmu-v3: Add support for Substream IDs

2019-12-17 Thread Auger Eric
Hi Jean,

On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote:
> At the moment, the SMMUv3 driver implements only one stage-1 or stage-2
> page directory per device. However SMMUv3 allows more than one address
> space for some devices, by providing multiple stage-1 page directories. In
> addition to the Stream ID (SID), that identifies a device, we can now have
> Substream IDs (SSID) identifying an address space. In PCIe, SID is called
> Requester ID (RID) and SSID is called Process Address-Space ID (PASID).
> A complete stage-1 walk goes through the context descriptor table:
> 
>   Stream tables   Ctx. Desc. tables   Page tables
> ++   ,--->+---+   ,--->+---+
> ::   |:   :   |:   :
> ++   |+---+   |+---+
>SID->|  STE   |---'  SSID->|  CD   |---'  IOVA->|  PTE  |--> IPA
> +++---++---+
> :::   ::   :
> +++---++---+
> 
> Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table
> entries. To keep things simple we only implement one level of context
> descriptor tables here, but as with stream and page tables, an SSID can
> be split to index multiple levels of tables.
> 
> Reviewed-by: Jonathan Cameron 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/arm-smmu-v3.c | 125 +---
>  1 file changed, 102 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 43d6a7ded6e4..a01071123c34 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -227,6 +227,11 @@
>  #define STRTAB_STE_0_S1CTXPTR_MASK   GENMASK_ULL(51, 6)
>  #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
>  
> +#define STRTAB_STE_1_S1DSS   GENMASK_ULL(1, 0)
> +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
> +#define STRTAB_STE_1_S1DSS_BYPASS0x1
> +#define STRTAB_STE_1_S1DSS_SSID0 0x2
> +
>  #define STRTAB_STE_1_S1C_CACHE_NC0UL
>  #define STRTAB_STE_1_S1C_CACHE_WBRA  1UL
>  #define STRTAB_STE_1_S1C_CACHE_WT2UL
> @@ -329,6 +334,7 @@
>  #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
>  #define CMDQ_PREFETCH_1_ADDR_MASKGENMASK_ULL(63, 12)
>  
> +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
>  #define CMDQ_CFGI_0_SID  GENMASK_ULL(63, 32)
>  #define CMDQ_CFGI_1_LEAF (1UL << 0)
>  #define CMDQ_CFGI_1_RANGEGENMASK_ULL(4, 0)
> @@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent {
>  
>   #define CMDQ_OP_CFGI_STE0x3
>   #define CMDQ_OP_CFGI_ALL0x4
> + #define CMDQ_OP_CFGI_CD 0x5
> + #define CMDQ_OP_CFGI_CD_ALL 0x6
>   struct {
>   u32 sid;
> + u32 ssid;
>   union {
>   boolleaf;
>   u8  span;
> @@ -568,6 +577,7 @@ struct arm_smmu_cd_table {
>  struct arm_smmu_s1_cfg {
>   struct arm_smmu_cd_tabletable;
>   struct arm_smmu_ctx_desccd;
> + u8  s1fmt;
>   u8  s1cdmax;
>  };
>  
> @@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
> arm_smmu_cmdq_ent *ent)
>   cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
>   cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
>   break;
> + case CMDQ_OP_CFGI_CD:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
> + /* Fallthrough */
>   case CMDQ_OP_CFGI_STE:
>   cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
>   break;
> + case CMDQ_OP_CFGI_CD_ALL:
> + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
> + break;
>   case CMDQ_OP_CFGI_ALL:
>   /* Cover the entire SID range */
>   cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
> @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct 
> arm_smmu_device *smmu)
>  }
>  
>  /* Context descriptor manipulation functions */
> +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> +  int ssid, bool leaf)
> +{
> + size_t i;
> + unsigned long flags;
> + struct arm_smmu_master *master;
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> + struct arm_smmu_cmdq_ent cmd = {
> + .opcode = CMDQ_OP_CFGI_CD,
> + .cfgi   = {
> + .ssid   = ssid,
> + .leaf   = leaf,
> + },
> + };
> +
> +